Commit 9ddded35 authored by peastman's avatar peastman
Browse files

OpenCL CustomIntegrator supports if and while blocks

parent 506908ce
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. * * Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
......
...@@ -34,6 +34,9 @@ ...@@ -34,6 +34,9 @@
#include "OpenCLParameterSet.h" #include "OpenCLParameterSet.h"
#include "OpenCLSort.h" #include "OpenCLSort.h"
#include "openmm/kernels.h" #include "openmm/kernels.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CustomIntegratorUtilities.h"
#include "lepton/CompiledExpression.h"
#include "openmm/System.h" #include "openmm/System.h"
namespace OpenMM { namespace OpenMM {
...@@ -1202,9 +1205,10 @@ private: ...@@ -1202,9 +1205,10 @@ private:
*/ */
class OpenCLIntegrateCustomStepKernel : public IntegrateCustomStepKernel { class OpenCLIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
public: public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER};
OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl), OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), contextParameterValues(NULL), sumBuffer(NULL), potentialEnergy(NULL), hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL),
kineticEnergy(NULL), uniformRandoms(NULL), randomSeed(NULL), perDofValues(NULL) { randomSeed(NULL), perDofValues(NULL) {
} }
~OpenCLIntegrateCustomStepKernel(); ~OpenCLIntegrateCustomStepKernel();
/** /**
...@@ -1268,20 +1272,21 @@ public: ...@@ -1268,20 +1272,21 @@ public:
void setPerDofVariable(ContextImpl& context, int variable, const std::vector<Vec3>& values); void setPerDofVariable(ContextImpl& context, int variable, const std::vector<Vec3>& values);
private: private:
class ReorderListener; class ReorderListener;
std::string createGlobalComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const std::string& energyName); class GlobalTarget;
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName); std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const std::string& forceName, const std::string& energyName);
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid); void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
void recordGlobalValue(double value, GlobalTarget target);
void recordChangedParameters(ContextImpl& context); void recordChangedParameters(ContextImpl& context);
bool evaluateCondition(int step);
OpenCLContext& cl; OpenCLContext& cl;
double prevStepSize, energy; double prevStepSize, energy;
float energyFloat;
int numGlobalVariables; int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters, keNeedsForce; bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce;
mutable bool localValuesAreCurrent; mutable bool localValuesAreCurrent;
OpenCLArray* globalValues; OpenCLArray* globalValues;
OpenCLArray* contextParameterValues;
OpenCLArray* sumBuffer; OpenCLArray* sumBuffer;
OpenCLArray* potentialEnergy; OpenCLArray* summedValue;
OpenCLArray* kineticEnergy;
OpenCLArray* uniformRandoms; OpenCLArray* uniformRandoms;
OpenCLArray* randomSeed; OpenCLArray* randomSeed;
std::map<int, OpenCLArray*> savedForces; std::map<int, OpenCLArray*> savedForces;
...@@ -1289,20 +1294,41 @@ private: ...@@ -1289,20 +1294,41 @@ private:
OpenCLParameterSet* perDofValues; OpenCLParameterSet* perDofValues;
mutable std::vector<std::vector<cl_float> > localPerDofValuesFloat; mutable std::vector<std::vector<cl_float> > localPerDofValuesFloat;
mutable std::vector<std::vector<cl_double> > localPerDofValuesDouble; mutable std::vector<std::vector<cl_double> > localPerDofValuesDouble;
std::vector<float> contextValuesFloat; std::vector<float> globalValuesFloat;
std::vector<double> contextValuesDouble; std::vector<double> globalValuesDouble;
std::vector<float> contextValues; std::vector<double> initialGlobalVariables;
std::vector<std::vector<cl::Kernel> > kernels; std::vector<std::vector<cl::Kernel> > kernels;
cl::Kernel randomKernel, kineticEnergyKernel, sumKineticEnergyKernel; cl::Kernel randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;
std::vector<CustomIntegrator::ComputationType> stepType; std::vector<CustomIntegrator::ComputationType> stepType;
std::vector<CustomIntegratorUtilities::Comparison> comparisons;
std::vector<std::vector<Lepton::CompiledExpression> > globalExpressions;
CompiledExpressionSet expressionSet;
std::vector<bool> needsGlobals;
std::vector<bool> needsForces; std::vector<bool> needsForces;
std::vector<bool> needsEnergy; std::vector<bool> needsEnergy;
std::vector<bool> computeBothForceAndEnergy;
std::vector<bool> invalidatesForces; std::vector<bool> invalidatesForces;
std::vector<bool> merged; std::vector<bool> merged;
std::vector<int> forceGroup; std::vector<int> forceGroupFlags;
std::vector<int> blockEnd;
std::vector<int> requiredGaussian; std::vector<int> requiredGaussian;
std::vector<int> requiredUniform; std::vector<int> requiredUniform;
std::vector<int> stepEnergyVariableIndex;
std::vector<int> globalVariableIndex;
std::vector<int> parameterVariableIndex;
int gaussianVariableIndex, uniformVariableIndex, dtVariableIndex;
std::vector<std::string> parameterNames; std::vector<std::string> parameterNames;
std::vector<GlobalTarget> stepTarget;
};
class OpenCLIntegrateCustomStepKernel::GlobalTarget {
public:
OpenCLIntegrateCustomStepKernel::GlobalTargetType type;
int variableIndex;
GlobalTarget() {
}
GlobalTarget(OpenCLIntegrateCustomStepKernel::GlobalTargetType type, int variableIndex) : type(type), variableIndex(variableIndex) {
}
}; };
/** /**
......
...@@ -5940,14 +5940,10 @@ private: ...@@ -5940,14 +5940,10 @@ private:
OpenCLIntegrateCustomStepKernel::~OpenCLIntegrateCustomStepKernel() { OpenCLIntegrateCustomStepKernel::~OpenCLIntegrateCustomStepKernel() {
if (globalValues != NULL) if (globalValues != NULL)
delete globalValues; delete globalValues;
if (contextParameterValues != NULL)
delete contextParameterValues;
if (sumBuffer != NULL) if (sumBuffer != NULL)
delete sumBuffer; delete sumBuffer;
if (potentialEnergy != NULL) if (summedValue != NULL)
delete potentialEnergy; delete summedValue;
if (kineticEnergy != NULL)
delete kineticEnergy;
if (uniformRandoms != NULL) if (uniformRandoms != NULL)
delete uniformRandoms; delete uniformRandoms;
if (randomSeed != NULL) if (randomSeed != NULL)
...@@ -5963,46 +5959,14 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus ...@@ -5963,46 +5959,14 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
numGlobalVariables = integrator.getNumGlobalVariables(); numGlobalVariables = integrator.getNumGlobalVariables();
int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
globalValues = new OpenCLArray(cl, max(1, numGlobalVariables), elementSize, "globalVariables");
sumBuffer = new OpenCLArray(cl, ((3*system.getNumParticles()+3)/4)*4, elementSize, "sumBuffer"); sumBuffer = new OpenCLArray(cl, ((3*system.getNumParticles()+3)/4)*4, elementSize, "sumBuffer");
potentialEnergy = new OpenCLArray(cl, 1, cl.getEnergyBuffer().getElementSize(), "potentialEnergy"); summedValue = new OpenCLArray(cl, 1, elementSize, "summedValue");
kineticEnergy = new OpenCLArray(cl, 1, elementSize, "kineticEnergy");
perDofValues = new OpenCLParameterSet(cl, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables", false, cl.getUseDoublePrecision() || cl.getUseMixedPrecision()); perDofValues = new OpenCLParameterSet(cl, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables", false, cl.getUseDoublePrecision() || cl.getUseMixedPrecision());
cl.addReorderListener(new ReorderListener(cl, *perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent)); cl.addReorderListener(new ReorderListener(cl, *perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent));
prevStepSize = -1.0; prevStepSize = -1.0;
SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed()); SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
} }
string OpenCLIntegrateCustomStepKernel::createGlobalComputation(const string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator, const string& energyName) {
map<string, Lepton::ParsedExpression> expressions;
if (variable == "dt")
expressions["dt[0].y = "] = expr;
else {
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
if (variable == integrator.getGlobalVariableName(i))
expressions["globals["+cl.intToString(i)+"] = "] = expr;
for (int i = 0; i < (int) parameterNames.size(); i++)
if (variable == parameterNames[i]) {
expressions["params["+cl.intToString(i)+"] = "] = expr;
modifiesParameters = true;
}
}
if (expressions.size() == 0)
throw OpenMMException("Unknown global variable: "+variable);
map<string, string> variables;
variables["dt"] = "dt[0].y";
variables["uniform"] = "uniform";
variables["gaussian"] = "gaussian";
variables[energyName] = "energy";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(i)+"]";
for (int i = 0; i < (int) parameterNames.size(); i++)
variables[parameterNames[i]] = "params["+cl.intToString(i)+"]";
vector<const TabulatedFunction*> functions;
vector<pair<string, string> > functionNames;
return cl.getExpressionUtilities().createExpressions(expressions, variables, functions, functionNames, "temp");
}
string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const string& forceName, const string& energyName) { string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const string& forceName, const string& energyName) {
const string suffixes[] = {".x", ".y", ".z"}; const string suffixes[] = {".x", ".y", ".z"};
string suffix = suffixes[component]; string suffix = suffixes[component];
...@@ -6031,11 +5995,11 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va ...@@ -6031,11 +5995,11 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
if (energyName != "") if (energyName != "")
variables[energyName] = "energy"; variables[energyName] = "energy";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++) for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(i)+"]"; variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(globalVariableIndex[i])+"]";
for (int i = 0; i < integrator.getNumPerDofVariables(); i++) for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i); variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
for (int i = 0; i < (int) parameterNames.size(); i++) for (int i = 0; i < (int) parameterNames.size(); i++)
variables[parameterNames[i]] = "params["+cl.intToString(i)+"]"; variables[parameterNames[i]] = "globals["+cl.intToString(parameterVariableIndex[i])+"]";
vector<const TabulatedFunction*> functions; vector<const TabulatedFunction*> functions;
vector<pair<string, string> > functionNames; vector<pair<string, string> > functionNames;
string tempType = (cl.getSupportsDoublePrecision() ? "double" : "float"); string tempType = (cl.getSupportsDoublePrecision() ? "double" : "float");
...@@ -6053,53 +6017,54 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6053,53 +6017,54 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
// Initialize various data structures. // Initialize various data structures.
const map<string, double>& params = context.getParameters(); const map<string, double>& params = context.getParameters();
if (useDouble) { for (map<string, double>::const_iterator iter = params.begin(); iter != params.end(); ++iter)
contextParameterValues = OpenCLArray::create<cl_double>(cl, max(1, (int) params.size()), "contextParameters");
contextValuesDouble.resize(contextParameterValues->getSize());
for (map<string, double>::const_iterator iter = params.begin(); iter != params.end(); ++iter) {
contextValuesDouble[parameterNames.size()] = iter->second;
parameterNames.push_back(iter->first); parameterNames.push_back(iter->first);
}
contextParameterValues->upload(contextValuesDouble);
}
else {
contextParameterValues = OpenCLArray::create<cl_float>(cl, max(1, (int) params.size()), "contextParameters");
contextValuesFloat.resize(contextParameterValues->getSize());
for (map<string, double>::const_iterator iter = params.begin(); iter != params.end(); ++iter) {
contextValuesFloat[parameterNames.size()] = (float) iter->second;
parameterNames.push_back(iter->first);
}
contextParameterValues->upload(contextValuesFloat);
}
kernels.resize(integrator.getNumComputations()); kernels.resize(integrator.getNumComputations());
requiredGaussian.resize(integrator.getNumComputations(), 0); requiredGaussian.resize(integrator.getNumComputations(), 0);
requiredUniform.resize(integrator.getNumComputations(), 0); requiredUniform.resize(integrator.getNumComputations(), 0);
needsForces.resize(numSteps, false); needsGlobals.resize(numSteps, false);
needsEnergy.resize(numSteps, false); globalExpressions.resize(numSteps);
forceGroup.resize(numSteps, -2); stepType.resize(numSteps);
invalidatesForces.resize(numSteps, false); stepTarget.resize(numSteps);
merged.resize(numSteps, false); merged.resize(numSteps, false);
modifiesParameters = false; modifiesParameters = false;
map<string, string> defines; map<string, string> defines;
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms()); defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["WORK_GROUP_SIZE"] = cl.intToString(OpenCLContext::ThreadBlockSize); defines["WORK_GROUP_SIZE"] = cl.intToString(OpenCLContext::ThreadBlockSize);
// Build a list of all variables that affect the forces, so we can tell which // Record information about all the computation steps.
// steps invalidate them.
set<string> affectsForce; vector<string> variable(numSteps);
affectsForce.insert("x"); vector<int> forceGroup;
for (vector<ForceImpl*>::const_iterator iter = context.getForceImpls().begin(); iter != context.getForceImpls().end(); ++iter) { vector<vector<Lepton::ParsedExpression> > expression;
const map<string, double> params = (*iter)->getDefaultParameters(); CustomIntegratorUtilities::analyzeComputations(context, integrator, expression, comparisons, blockEnd, invalidatesForces, needsForces, needsEnergy, computeBothForceAndEnergy, forceGroup);
for (map<string, double>::const_iterator param = params.begin(); param != params.end(); ++param) for (int step = 0; step < numSteps; step++) {
affectsForce.insert(param->first); string expr;
integrator.getComputationStep(step, stepType[step], variable[step], expr);
if (stepType[step] == CustomIntegrator::BeginWhileBlock)
blockEnd[blockEnd[step]] = step; // Record where to branch back to.
if (stepType[step] == CustomIntegrator::ComputeGlobal || stepType[step] == CustomIntegrator::BeginIfBlock || stepType[step] == CustomIntegrator::BeginWhileBlock)
for (int i = 0; i < (int) expression[step].size(); i++)
globalExpressions[step].push_back(expression[step][i].createCompiledExpression());
}
for (int step = 0; step < numSteps; step++) {
for (int i = 0; i < (int) globalExpressions[step].size(); i++)
expressionSet.registerExpression(globalExpressions[step][i]);
} }
// Record information about all the computation steps. // Record the indices for variables in the CompiledExpressionSet.
stepType.resize(numSteps); gaussianVariableIndex = expressionSet.getVariableIndex("gaussian");
vector<string> variable(numSteps); uniformVariableIndex = expressionSet.getVariableIndex("uniform");
vector<Lepton::ParsedExpression> expression(numSteps); dtVariableIndex = expressionSet.getVariableIndex("dt");
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
globalVariableIndex.push_back(expressionSet.getVariableIndex(integrator.getGlobalVariableName(i)));
for (int i = 0; i < (int) parameterNames.size(); i++)
parameterVariableIndex.push_back(expressionSet.getVariableIndex(parameterNames[i]));
// Record the variable names and flags for the force and energy in each step.
forceGroupFlags.resize(numSteps, -1);
vector<string> forceGroupName; vector<string> forceGroupName;
vector<string> energyGroupName; vector<string> energyGroupName;
for (int i = 0; i < 32; i++) { for (int i = 0; i < 32; i++) {
...@@ -6112,41 +6077,67 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6112,41 +6077,67 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
} }
vector<string> forceName(numSteps, "f"); vector<string> forceName(numSteps, "f");
vector<string> energyName(numSteps, "energy"); vector<string> energyName(numSteps, "energy");
stepEnergyVariableIndex.resize(numSteps, expressionSet.getVariableIndex("energy"));
for (int step = 0; step < numSteps; step++) { for (int step = 0; step < numSteps; step++) {
string expr; if (needsForces[step] && forceGroup[step] > -1)
integrator.getComputationStep(step, stepType[step], variable[step], expr); forceName[step] = forceGroupName[forceGroup[step]];
if (expr.size() > 0) { if (needsEnergy[step] && forceGroup[step] > -1) {
expression[step] = Lepton::Parser::parse(expr).optimize(); energyName[step] = energyGroupName[forceGroup[step]];
if (usesVariable(expression[step], "f")) { stepEnergyVariableIndex[step] = expressionSet.getVariableIndex(energyName[step]);
needsForces[step] = true;
forceGroup[step] = -1;
} }
if (usesVariable(expression[step], "energy")) { if (forceGroup[step] > -1)
needsEnergy[step] = true; forceGroupFlags[step] = 1<<forceGroup[step];
forceGroup[step] = -1; if (forceGroupFlags[step] == -2 && step > 0)
forceGroupFlags[step] = forceGroupFlags[step-1];
if (forceGroupFlags[step] != -2 && savedForces.find(forceGroupFlags[step]) == savedForces.end())
savedForces[forceGroupFlags[step]] = new OpenCLArray(cl, cl.getForce().getSize(), cl.getForce().getElementSize(), "savedForces");
} }
for (int i = 0; i < 32; i++) {
if (usesVariable(expression[step], forceGroupName[i])) { // Allocate space for storing global values, both on the host and the device.
if (forceGroup[step] != -2)
throw OpenMMException("A single computation step cannot depend on multiple force groups"); globalValuesFloat.resize(expressionSet.getNumVariables());
needsForces[step] = true; globalValuesDouble.resize(expressionSet.getNumVariables());
forceGroup[step] = 1<<i; int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
forceName[step] = forceGroupName[i]; globalValues = new OpenCLArray(cl, expressionSet.getNumVariables(), elementSize, "globalValues");
for (int i = 0; i < integrator.getNumGlobalVariables(); i++) {
globalValuesDouble[globalVariableIndex[i]] = initialGlobalVariables[i];
expressionSet.setVariable(globalVariableIndex[i], initialGlobalVariables[i]);
}
for (int i = 0; i < (int) parameterVariableIndex.size(); i++) {
double value = context.getParameter(parameterNames[i]);
globalValuesDouble[parameterVariableIndex[i]] = value;
expressionSet.setVariable(parameterVariableIndex[i], value);
}
// Record information about the targets of steps that will be stored in global variables.
for (int step = 0; step < numSteps; step++) {
if (stepType[step] == CustomIntegrator::ComputeGlobal || stepType[step] == CustomIntegrator::ComputeSum) {
if (variable[step] == "dt")
stepTarget[step].type = DT;
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
if (variable[step] == integrator.getGlobalVariableName(i))
stepTarget[step].type = VARIABLE;
for (int i = 0; i < (int) parameterNames.size(); i++)
if (variable[step] == parameterNames[i]) {
stepTarget[step].type = PARAMETER;
modifiesParameters = true;
} }
if (usesVariable(expression[step], energyGroupName[i])) { stepTarget[step].variableIndex = expressionSet.getVariableIndex(variable[step]);
if (forceGroup[step] != -2)
throw OpenMMException("A single computation step cannot depend on multiple force groups");
needsEnergy[step] = true;
forceGroup[step] = 1<<i;
energyName[step] = energyGroupName[i];
} }
} }
// Identify which per-DOF steps are going to require global variables or context parameters.
for (int step = 0; step < numSteps; step++) {
if (stepType[step] == CustomIntegrator::ComputePerDof || stepType[step] == CustomIntegrator::ComputeSum) {
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
if (usesVariable(expression[step][0], integrator.getGlobalVariableName(i)))
needsGlobals[step] = true;
for (int i = 0; i < (int) parameterNames.size(); i++)
if (usesVariable(expression[step][0], parameterNames[i]))
needsGlobals[step] = true;
} }
invalidatesForces[step] = (stepType[step] == CustomIntegrator::ConstrainPositions || affectsForce.find(variable[step]) != affectsForce.end());
if (forceGroup[step] == -2 && step > 0)
forceGroup[step] = forceGroup[step-1];
if (forceGroup[step] != -2 && savedForces.find(forceGroup[step]) == savedForces.end())
savedForces[forceGroup[step]] = new OpenCLArray(cl, cl.getForce().getSize(), cl.getForce().getElementSize(), "savedForces");
} }
// Determine how each step will represent the position (as just a value, or a value plus a delta). // Determine how each step will represent the position (as just a value, or a value plus a delta).
...@@ -6174,9 +6165,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6174,9 +6165,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
for (int step = 1; step < numSteps; step++) { for (int step = 1; step < numSteps; step++) {
if (needsForces[step] || needsEnergy[step]) if (needsForces[step] || needsEnergy[step])
continue; continue;
if (stepType[step-1] == CustomIntegrator::ComputeGlobal && stepType[step] == CustomIntegrator::ComputeGlobal &&
!usesVariable(expression[step], "uniform") && !usesVariable(expression[step], "gaussian"))
merged[step] = true;
if (stepType[step-1] == CustomIntegrator::ComputePerDof && stepType[step] == CustomIntegrator::ComputePerDof) if (stepType[step-1] == CustomIntegrator::ComputePerDof && stepType[step] == CustomIntegrator::ComputePerDof)
merged[step] = true; merged[step] = true;
} }
...@@ -6196,15 +6184,15 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6196,15 +6184,15 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
} }
int numGaussian = 0, numUniform = 0; int numGaussian = 0, numUniform = 0;
for (int j = step; j < numSteps && (j == step || merged[j]); j++) { for (int j = step; j < numSteps && (j == step || merged[j]); j++) {
numGaussian += numAtoms*usesVariable(expression[j], "gaussian"); numGaussian += numAtoms*usesVariable(expression[j][0], "gaussian");
numUniform += numAtoms*usesVariable(expression[j], "uniform"); numUniform += numAtoms*usesVariable(expression[j][0], "uniform");
compute << "{\n"; compute << "{\n";
if (numGaussian > 0) if (numGaussian > 0)
compute << "float4 gaussian = gaussianValues[gaussianIndex+index];\n"; compute << "float4 gaussian = gaussianValues[gaussianIndex+index];\n";
if (numUniform > 0) if (numUniform > 0)
compute << "float4 uniform = uniformValues[uniformIndex+index];\n"; compute << "float4 uniform = uniformValues[uniformIndex+index];\n";
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j], i, integrator, forceName[j], energyName[j]); compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], i, integrator, forceName[j], energyName[j]);
if (variable[j] == "x") { if (variable[j] == "x") {
if (storePosAsDelta[j]) { if (storePosAsDelta[j]) {
if (cl.getSupportsDoublePrecision()) if (cl.getSupportsDoublePrecision())
...@@ -6257,7 +6245,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6257,7 +6245,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel.setArg<cl::Buffer>(index++, cl.getForce().getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, cl.getForce().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
index += 4; index += 4;
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++)
...@@ -6270,41 +6257,10 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6270,41 +6257,10 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernels[step].push_back(kernel); kernels[step].push_back(kernel);
index = 0; index = 0;
kernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
bool found = false; kernel.setArg<cl::Buffer>(index++, summedValue->getDeviceBuffer());
for (int j = 0; j < integrator.getNumGlobalVariables() && !found; j++)
if (variable[step] == integrator.getGlobalVariableName(j)) {
kernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer());
kernel.setArg<cl_uint>(index++, j);
found = true;
}
for (int j = 0; j < (int) parameterNames.size() && !found; j++)
if (variable[step] == parameterNames[j]) {
kernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer());
kernel.setArg<cl_uint>(index++, j);
found = true;
modifiesParameters = true;
}
if (!found)
throw OpenMMException("Unknown global variable: "+variable[step]);
kernel.setArg<cl_int>(index++, 3*numAtoms); kernel.setArg<cl_int>(index++, 3*numAtoms);
} }
} }
else if (stepType[step] == CustomIntegrator::ComputeGlobal && !merged[step]) {
// Compute a global value.
stringstream compute;
for (int i = step; i < numSteps && (i == step || merged[i]); i++)
compute << "{\n" << createGlobalComputation(variable[i], expression[i], integrator, energyName[i]) << "}\n";
map<string, string> replacements;
replacements["COMPUTE_STEP"] = compute.str();
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customIntegratorGlobal, replacements), defines);
cl::Kernel kernel = cl::Kernel(program, "computeGlobal");
kernels[step].push_back(kernel);
int index = 0;
kernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer());
}
else if (stepType[step] == CustomIntegrator::ConstrainPositions) { else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
// Apply position constraints. // Apply position constraints.
...@@ -6377,7 +6333,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6377,7 +6333,6 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kineticEnergyKernel.setArg<cl::Buffer>(index++, cl.getForce().getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, cl.getForce().getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, integration.getStepSize().getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, globalValues->getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, contextParameterValues->getDeviceBuffer());
kineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
index += 2; index += 2;
kineticEnergyKernel.setArg<cl::Buffer>(index++, uniformRandoms->getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, uniformRandoms->getDeviceBuffer());
...@@ -6395,12 +6350,11 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6395,12 +6350,11 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
sumKineticEnergyKernel = cl::Kernel(program, useDouble ? "computeDoubleSum" : "computeFloatSum"); sumKineticEnergyKernel = cl::Kernel(program, useDouble ? "computeDoubleSum" : "computeFloatSum");
index = 0; index = 0;
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer()); sumKineticEnergyKernel.setArg<cl::Buffer>(index++, sumBuffer->getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl::Buffer>(index++, kineticEnergy->getDeviceBuffer()); sumKineticEnergyKernel.setArg<cl::Buffer>(index++, summedValue->getDeviceBuffer());
sumKineticEnergyKernel.setArg<cl_int>(index++, 0);
sumKineticEnergyKernel.setArg<cl_int>(index++, 3*numAtoms); sumKineticEnergyKernel.setArg<cl_int>(index++, 3*numAtoms);
} }
// Make sure all values (variables, parameters, etc.) stored on the device are up to date. // Make sure all values (variables, parameters, etc.) are up to date.
if (!deviceValuesAreCurrent) { if (!deviceValuesAreCurrent) {
if (useDouble) if (useDouble)
...@@ -6412,38 +6366,14 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -6412,38 +6366,14 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
localValuesAreCurrent = false; localValuesAreCurrent = false;
double stepSize = integrator.getStepSize(); double stepSize = integrator.getStepSize();
if (stepSize != prevStepSize) { if (stepSize != prevStepSize) {
if (useDouble) { recordGlobalValue(stepSize, GlobalTarget(DT, dtVariableIndex));
mm_double2 ss = mm_double2(0, stepSize);
integration.getStepSize().upload(&ss);
} }
else {
mm_float2 ss = mm_float2(0, (float) stepSize);
integration.getStepSize().upload(&ss);
}
prevStepSize = stepSize;
}
bool paramsChanged = false;
if (useDouble) {
for (int i = 0; i < (int) parameterNames.size(); i++) { for (int i = 0; i < (int) parameterNames.size(); i++) {
double value = context.getParameter(parameterNames[i]); double value = context.getParameter(parameterNames[i]);
if (value != contextValuesDouble[i]) { if (value != globalValuesDouble[parameterVariableIndex[i]]) {
contextValuesDouble[i] = value; globalValuesDouble[parameterVariableIndex[i]] = value;
paramsChanged = true; deviceGlobalsAreCurrent = false;
}
}
if (paramsChanged)
contextParameterValues->upload(contextValuesDouble);
}
else {
for (int i = 0; i < (int) parameterNames.size(); i++) {
float value = (float) context.getParameter(parameterNames[i]);
if (value != contextValuesFloat[i]) {
contextValuesFloat[i] = value;
paramsChanged = true;
}
} }
if (paramsChanged)
contextParameterValues->upload(contextValuesFloat);
} }
} }
...@@ -6455,9 +6385,10 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -6455,9 +6385,10 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
// Loop over computation steps in the integrator and execute them. // Loop over computation steps in the integrator and execute them.
for (int i = 0; i < numSteps; i++) { for (int step = 0; step < numSteps; ) {
int nextStep = step+1;
int lastForceGroups = context.getLastForceGroups(); int lastForceGroups = context.getLastForceGroups();
if ((needsForces[i] || needsEnergy[i]) && (!forcesAreValid || lastForceGroups != forceGroup[i])) { if ((needsForces[step] || needsEnergy[step]) && (!forcesAreValid || lastForceGroups != forceGroupFlags[step])) {
if (forcesAreValid && savedForces.find(lastForceGroups) != savedForces.end()) { if (forcesAreValid && savedForces.find(lastForceGroups) != savedForces.end()) {
// The forces are still valid. We just need a different force group right now. Save the old // The forces are still valid. We just need a different force group right now. Save the old
// forces in case we need them again. // forces in case we need them again.
...@@ -6471,79 +6402,99 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -6471,79 +6402,99 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
// Recompute forces and/or energy. Figure out what is actually needed // Recompute forces and/or energy. Figure out what is actually needed
// between now and the next time they get invalidated again. // between now and the next time they get invalidated again.
bool computeForce = false, computeEnergy = false; bool computeForce = (needsForces[step] || computeBothForceAndEnergy[step]);
for (int j = i; ; j++) { bool computeEnergy = (needsEnergy[step] || computeBothForceAndEnergy[step]);
if (needsForces[j]) if (!computeEnergy && validSavedForces.find(forceGroupFlags[step]) != validSavedForces.end()) {
computeForce = true;
if (needsEnergy[j])
computeEnergy = true;
if (invalidatesForces[j])
break;
if (j == numSteps-1)
j = -1;
if (j == i-1)
break;
}
if (!computeEnergy && validSavedForces.find(forceGroup[i]) != validSavedForces.end()) {
// We can just restore the forces we saved earlier. // We can just restore the forces we saved earlier.
savedForces[forceGroup[i]]->copyTo(cl.getForce()); savedForces[forceGroupFlags[step]]->copyTo(cl.getForce());
} }
else { else {
recordChangedParameters(context); recordChangedParameters(context);
energy = context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroup[i]); energy = context.calcForcesAndEnergy(computeForce, computeEnergy, forceGroupFlags[step]);
forcesAreValid = true; forcesAreValid = true;
} }
} }
if (stepType[i] == CustomIntegrator::ComputePerDof && !merged[i]) { if (needsGlobals[step] && !deviceGlobalsAreCurrent) {
kernels[i][0].setArg<cl_uint>(10, integration.prepareRandomNumbers(requiredGaussian[i])); // Upload the global values to the device.
kernels[i][0].setArg<cl::Buffer>(9, integration.getRandom().getDeviceBuffer());
kernels[i][0].setArg<cl::Buffer>(11, uniformRandoms->getDeviceBuffer()); if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision())
if (cl.getUseDoublePrecision()) globalValues->upload(globalValuesDouble);
kernels[i][0].setArg<cl_double>(12, energy); else {
else for (int j = 0; j < (int) globalValuesDouble.size(); j++)
kernels[i][0].setArg<cl_float>(12, (cl_float) energy); globalValuesFloat[j] = (float) globalValuesDouble[j];
if (requiredUniform[i] > 0) globalValues->upload(globalValuesFloat);
cl.executeKernel(randomKernel, numAtoms);
cl.executeKernel(kernels[i][0], numAtoms);
} }
else if (stepType[i] == CustomIntegrator::ComputeGlobal && !merged[i]) { }
kernels[i][0].setArg<cl_float>(3, (cl_float) SimTKOpenMMUtilities::getUniformlyDistributedRandomNumber()); if (stepType[step] == CustomIntegrator::ComputePerDof && !merged[step]) {
kernels[i][0].setArg<cl_float>(4, (cl_float) SimTKOpenMMUtilities::getNormallyDistributedRandomNumber()); kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step]));
kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer());
kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms->getDeviceBuffer());
if (cl.getUseDoublePrecision()) if (cl.getUseDoublePrecision())
kernels[i][0].setArg<cl_double>(5, energy); kernels[step][0].setArg<cl_double>(11, energy);
else else
kernels[i][0].setArg<cl_float>(5, (cl_float) energy); kernels[step][0].setArg<cl_float>(11, (cl_float) energy);
cl.executeKernel(kernels[i][0], 1, 1); if (requiredUniform[step] > 0)
} cl.executeKernel(randomKernel, numAtoms);
else if (stepType[i] == CustomIntegrator::ComputeSum) { cl.executeKernel(kernels[step][0], numAtoms);
kernels[i][0].setArg<cl_uint>(10, integration.prepareRandomNumbers(requiredGaussian[i])); }
kernels[i][0].setArg<cl::Buffer>(9, integration.getRandom().getDeviceBuffer()); else if (stepType[step] == CustomIntegrator::ComputeGlobal) {
kernels[i][0].setArg<cl::Buffer>(11, uniformRandoms->getDeviceBuffer()); expressionSet.setVariable(uniformVariableIndex, SimTKOpenMMUtilities::getUniformlyDistributedRandomNumber());
expressionSet.setVariable(gaussianVariableIndex, SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
expressionSet.setVariable(stepEnergyVariableIndex[step], energy);
recordGlobalValue(globalExpressions[step][0].evaluate(), stepTarget[step]);
}
else if (stepType[step] == CustomIntegrator::ComputeSum) {
kernels[step][0].setArg<cl_uint>(9, integration.prepareRandomNumbers(requiredGaussian[step]));
kernels[step][0].setArg<cl::Buffer>(8, integration.getRandom().getDeviceBuffer());
kernels[step][0].setArg<cl::Buffer>(10, uniformRandoms->getDeviceBuffer());
if (cl.getUseDoublePrecision()) if (cl.getUseDoublePrecision())
kernels[i][0].setArg<cl_double>(12, energy); kernels[step][0].setArg<cl_double>(11, energy);
else else
kernels[i][0].setArg<cl_float>(12, (cl_float) energy); kernels[step][0].setArg<cl_float>(11, (cl_float) energy);
if (requiredUniform[i] > 0) if (requiredUniform[step] > 0)
cl.executeKernel(randomKernel, numAtoms); cl.executeKernel(randomKernel, numAtoms);
cl.clearBuffer(*sumBuffer); cl.clearBuffer(*sumBuffer);
cl.executeKernel(kernels[i][0], numAtoms); cl.executeKernel(kernels[step][0], numAtoms);
cl.executeKernel(kernels[i][1], OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlockSize); cl.executeKernel(kernels[step][1], OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlockSize);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double value;
summedValue->download(&value);
globalValuesDouble[stepTarget[step].variableIndex] = value;
}
else {
float value;
summedValue->download(&value);
globalValuesDouble[stepTarget[step].variableIndex] = value;
} }
else if (stepType[i] == CustomIntegrator::UpdateContextState) { }
else if (stepType[step] == CustomIntegrator::UpdateContextState) {
recordChangedParameters(context); recordChangedParameters(context);
context.updateContextState(); context.updateContextState();
} }
else if (stepType[i] == CustomIntegrator::ConstrainPositions) { else if (stepType[step] == CustomIntegrator::ConstrainPositions) {
cl.getIntegrationUtilities().applyConstraints(integrator.getConstraintTolerance()); cl.getIntegrationUtilities().applyConstraints(integrator.getConstraintTolerance());
cl.executeKernel(kernels[i][0], numAtoms); cl.executeKernel(kernels[step][0], numAtoms);
cl.getIntegrationUtilities().computeVirtualSites(); cl.getIntegrationUtilities().computeVirtualSites();
} }
else if (stepType[i] == CustomIntegrator::ConstrainVelocities) { else if (stepType[step] == CustomIntegrator::ConstrainVelocities) {
cl.getIntegrationUtilities().applyVelocityConstraints(integrator.getConstraintTolerance()); cl.getIntegrationUtilities().applyVelocityConstraints(integrator.getConstraintTolerance());
} }
if (invalidatesForces[i]) else if (stepType[step] == CustomIntegrator::BeginIfBlock) {
if (!evaluateCondition(step))
nextStep = blockEnd[step]+1;
}
else if (stepType[step] == CustomIntegrator::BeginWhileBlock) {
if (!evaluateCondition(step))
nextStep = blockEnd[step]+1;
}
else if (stepType[step] == CustomIntegrator::EndBlock) {
if (blockEnd[step] != -1)
nextStep = blockEnd[step]; // Return to the start of a while block.
}
if (invalidatesForces[step])
forcesAreValid = false; forcesAreValid = false;
step = nextStep;
} }
recordChangedParameters(context); recordChangedParameters(context);
...@@ -6564,6 +6515,29 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -6564,6 +6515,29 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
#endif #endif
} }
bool OpenCLIntegrateCustomStepKernel::evaluateCondition(int step) {
expressionSet.setVariable(uniformVariableIndex, SimTKOpenMMUtilities::getUniformlyDistributedRandomNumber());
expressionSet.setVariable(gaussianVariableIndex, SimTKOpenMMUtilities::getNormallyDistributedRandomNumber());
expressionSet.setVariable(stepEnergyVariableIndex[step], energy);
double lhs = globalExpressions[step][0].evaluate();
double rhs = globalExpressions[step][1].evaluate();
switch (comparisons[step]) {
case CustomIntegratorUtilities::EQUAL:
return (lhs == rhs);
case CustomIntegratorUtilities::LESS_THAN:
return (lhs < rhs);
case CustomIntegratorUtilities::GREATER_THAN:
return (lhs > rhs);
case CustomIntegratorUtilities::NOT_EQUAL:
return (lhs != rhs);
case CustomIntegratorUtilities::LESS_THAN_OR_EQUAL:
return (lhs <= rhs);
case CustomIntegratorUtilities::GREATER_THAN_OR_EQUAL:
return (lhs >= rhs);
}
throw OpenMMException("Invalid comparison operator");
}
double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) { double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) {
prepareForComputation(context, integrator, forcesAreValid); prepareForComputation(context, integrator, forcesAreValid);
if (keNeedsForce && !forcesAreValid) { if (keNeedsForce && !forcesAreValid) {
...@@ -6577,69 +6551,81 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex ...@@ -6577,69 +6551,81 @@ double OpenCLIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& contex
forcesAreValid = true; forcesAreValid = true;
} }
cl.clearBuffer(*sumBuffer); cl.clearBuffer(*sumBuffer);
kineticEnergyKernel.setArg<cl::Buffer>(9, cl.getIntegrationUtilities().getRandom().getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(8, cl.getIntegrationUtilities().getRandom().getDeviceBuffer());
kineticEnergyKernel.setArg<cl_uint>(10, 0); kineticEnergyKernel.setArg<cl_uint>(9, 0);
cl.executeKernel(kineticEnergyKernel, cl.getNumAtoms()); cl.executeKernel(kineticEnergyKernel, cl.getNumAtoms());
cl.executeKernel(sumKineticEnergyKernel, OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlockSize); cl.executeKernel(sumKineticEnergyKernel, OpenCLContext::ThreadBlockSize, OpenCLContext::ThreadBlockSize);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) { if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double ke; double ke;
kineticEnergy->download(&ke); summedValue->download(&ke);
return ke; return ke;
} }
else { else {
float ke; float ke;
kineticEnergy->download(&ke); summedValue->download(&ke);
return ke; return ke;
} }
} }
void OpenCLIntegrateCustomStepKernel::recordGlobalValue(double value, GlobalTarget target) {
switch (target.type) {
case DT:
globalValuesDouble[dtVariableIndex] = value;
deviceGlobalsAreCurrent = false;
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double size[] = {0, value};
cl.getIntegrationUtilities().getStepSize().upload(size);
}
else {
float size[] = {0, (float) value};
cl.getIntegrationUtilities().getStepSize().upload(size);
}
prevStepSize = value;
break;
case VARIABLE:
case PARAMETER:
expressionSet.setVariable(target.variableIndex, value);
globalValuesDouble[target.variableIndex] = value;
deviceGlobalsAreCurrent = false;
break;
}
}
void OpenCLIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& context) { void OpenCLIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& context) {
if (!modifiesParameters) if (!modifiesParameters)
return; return;
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
contextParameterValues->download(contextValuesDouble);
for (int i = 0; i < (int) parameterNames.size(); i++) { for (int i = 0; i < (int) parameterNames.size(); i++) {
double value = context.getParameter(parameterNames[i]); double value = context.getParameter(parameterNames[i]);
if (value != contextValuesDouble[i]) if (value != globalValuesDouble[parameterVariableIndex[i]])
context.setParameter(parameterNames[i], contextValuesDouble[i]); context.setParameter(parameterNames[i], globalValuesDouble[parameterVariableIndex[i]]);
}
}
else {
contextParameterValues->download(contextValuesFloat);
for (int i = 0; i < (int) parameterNames.size(); i++) {
float value = (float) context.getParameter(parameterNames[i]);
if (value != contextValuesFloat[i])
context.setParameter(parameterNames[i], contextValuesFloat[i]);
}
} }
} }
void OpenCLIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vector<double>& values) const { void OpenCLIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vector<double>& values) const {
if (numGlobalVariables == 0) { if (globalValues == NULL) {
values.resize(0); // The data structures haven't been created yet, so just return the list of values that was given earlier.
return;
values = initialGlobalVariables;
} }
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) values.resize(numGlobalVariables);
globalValues->download(values);
else {
vector<cl_float> buffer;
globalValues->download(buffer);
for (int i = 0; i < numGlobalVariables; i++) for (int i = 0; i < numGlobalVariables; i++)
values[i] = buffer[i]; values[i] = globalValuesDouble[globalVariableIndex[i]];
}
} }
void OpenCLIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) { void OpenCLIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) {
if (numGlobalVariables == 0) if (numGlobalVariables == 0)
return; return;
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) if (globalValues == NULL) {
globalValues->upload(values); // The data structures haven't been created yet, so just store the list of values.
else {
vector<cl_float> buffer(numGlobalVariables); initialGlobalVariables = values;
for (int i = 0; i < numGlobalVariables; i++) return;
buffer[i] = (cl_float) values[i]; }
globalValues->upload(buffer); for (int i = 0; i < numGlobalVariables; i++) {
globalValuesDouble[globalVariableIndex[i]] = values[i];
expressionSet.setVariable(globalVariableIndex[i], values[i]);
} }
deviceGlobalsAreCurrent = false;
} }
void OpenCLIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int variable, vector<Vec3>& values) const { void OpenCLIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int variable, vector<Vec3>& values) const {
......
__kernel void computeFloatSum(__global const float* restrict sumBuffer, __global float* result, unsigned int outputIndex, int bufferSize) { __kernel void computeFloatSum(__global const float* restrict sumBuffer, __global float* result, int bufferSize) {
__local float tempBuffer[WORK_GROUP_SIZE]; __local float tempBuffer[WORK_GROUP_SIZE];
const unsigned int thread = get_local_id(0); const unsigned int thread = get_local_id(0);
float sum = 0; float sum = 0;
...@@ -11,11 +11,11 @@ __kernel void computeFloatSum(__global const float* restrict sumBuffer, __global ...@@ -11,11 +11,11 @@ __kernel void computeFloatSum(__global const float* restrict sumBuffer, __global
tempBuffer[thread] += tempBuffer[thread+i]; tempBuffer[thread] += tempBuffer[thread+i];
} }
if (thread == 0) if (thread == 0)
result[outputIndex] = tempBuffer[0]; *result = tempBuffer[0];
} }
#ifdef SUPPORTS_DOUBLE_PRECISION #ifdef SUPPORTS_DOUBLE_PRECISION
__kernel void computeDoubleSum(__global const double* restrict sumBuffer, __global double* result, unsigned int outputIndex, int bufferSize) { __kernel void computeDoubleSum(__global const double* restrict sumBuffer, __global double* result, int bufferSize) {
__local double tempBuffer[WORK_GROUP_SIZE]; __local double tempBuffer[WORK_GROUP_SIZE];
const unsigned int thread = get_local_id(0); const unsigned int thread = get_local_id(0);
double sum = 0; double sum = 0;
...@@ -28,7 +28,7 @@ __kernel void computeDoubleSum(__global const double* restrict sumBuffer, __glob ...@@ -28,7 +28,7 @@ __kernel void computeDoubleSum(__global const double* restrict sumBuffer, __glob
tempBuffer[thread] += tempBuffer[thread+i]; tempBuffer[thread] += tempBuffer[thread+i];
} }
if (thread == 0) if (thread == 0)
result[outputIndex] = tempBuffer[0]; *result = tempBuffer[0];
} }
#endif #endif
......
__kernel void computeGlobal(__global mixed2* restrict dt, __global mixed* restrict globals, __global mixed* restrict params,
float uniform, float gaussian, const real energy) {
COMPUTE_STEP
}
...@@ -25,8 +25,7 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec ...@@ -25,8 +25,7 @@ void storePos(__global real4* restrict posq, __global real4* restrict posqCorrec
__kernel void computePerDof(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta, __kernel void computePerDof(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta,
__global mixed4* restrict velm, __global const real4* restrict force, __global const mixed2* restrict dt, __global const mixed* restrict globals, __global mixed4* restrict velm, __global const real4* restrict force, __global const mixed2* restrict dt, __global const mixed* restrict globals,
__global const mixed* restrict params, __global mixed* restrict sum, __global const float4* restrict gaussianValues, __global mixed* restrict sum, __global const float4* restrict gaussianValues, unsigned int gaussianBaseIndex, __global const float4* restrict uniformValues, const real energy
unsigned int gaussianBaseIndex, __global const float4* restrict uniformValues, const real energy
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
mixed stepSize = dt[0].y; mixed stepSize = dt[0].y;
int index = get_global_id(0); int index = get_global_id(0);
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. * * Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -756,6 +756,66 @@ void testMergedRandoms() { ...@@ -756,6 +756,66 @@ void testMergedRandoms() {
} }
} }
void testIfBlock() {
System system;
system.addParticle(2.0);
system.addParticle(2.0);
const double dt = 0.01;
CustomIntegrator integrator(dt);
integrator.addGlobalVariable("a", 0);
integrator.addGlobalVariable("b", 0);
integrator.addComputeGlobal("b", "1");
integrator.beginIfBlock("a < 3.5");
integrator.addComputeGlobal("b", "a+1");
integrator.endBlock();
Context context(system, integrator, platform);
// Set "a" to 1.7 and verify that "b" gets set to a+1.
integrator.setGlobalVariable(0, 1.7);
integrator.step(1);
ASSERT_EQUAL_TOL(2.7, integrator.getGlobalVariable(1), 1e-6);
// Now set it to a value that should cause the block to be skipped.
integrator.setGlobalVariable(0, 4.1);
integrator.step(1);
ASSERT_EQUAL_TOL(1.0, integrator.getGlobalVariable(1), 1e-6);
}
void testWhileBlock() {
System system;
system.addParticle(2.0);
system.addParticle(2.0);
const double dt = 0.01;
CustomIntegrator integrator(dt);
integrator.addGlobalVariable("a", 0);
integrator.addGlobalVariable("b", 0);
integrator.addComputeGlobal("b", "1");
integrator.beginWhileBlock("b <= a");
integrator.addComputeGlobal("b", "b+1");
integrator.endBlock();
Context context(system, integrator, platform);
// Try a case where the loop should be skipped.
integrator.setGlobalVariable(0, -3.3);
integrator.step(1);
ASSERT_EQUAL_TOL(1.0, integrator.getGlobalVariable(1), 1e-6);
// In this case it should be executed exactly once.
integrator.setGlobalVariable(0, 1.2);
integrator.step(1);
ASSERT_EQUAL_TOL(2.0, integrator.getGlobalVariable(1), 1e-6);
// In this case, it should be executed several times.
integrator.setGlobalVariable(0, 5.3);
integrator.step(1);
ASSERT_EQUAL_TOL(6.0, integrator.getGlobalVariable(1), 1e-6);
}
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
try { try {
if (argc > 1) if (argc > 1)
...@@ -773,6 +833,8 @@ int main(int argc, char* argv[]) { ...@@ -773,6 +833,8 @@ int main(int argc, char* argv[]) {
testForceGroups(); testForceGroups();
testRespa(); testRespa();
testMergedRandoms(); testMergedRandoms();
testIfBlock();
testWhileBlock();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment