Commit 51fc3bd2 authored by Peter Eastman's avatar Peter Eastman
Browse files

Beginnings of OpenCL implementation of CustomGBForce

parent 7ded64ba
......@@ -318,3 +318,30 @@ void OpenCLExpressionUtilities::findRelatedPowers(const ExpressionTreeNode& node
for (int i = 0; i < (int) searchNode.getChildren().size(); i++)
findRelatedPowers(node, searchNode.getChildren()[i], powers);
}
vector<mm_float4> OpenCLExpressionUtilities::computeFunctionCoefficients(const vector<double>& values, bool interpolating) {
// First create a padded set of function values.
vector<double> padded(values.size()+2);
padded[0] = 2*values[0]-values[1];
for (int i = 0; i < (int) values.size(); i++)
padded[i+1] = values[i];
padded[padded.size()-1] = 2*values[values.size()-1]-values[values.size()-2];
// Now compute the spline coefficients.
vector<mm_float4> f(values.size()-1);
for (int i = 0; i < (int) values.size()-1; i++) {
if (interpolating)
f[i] = (mm_float4) {(cl_float) padded[i+1],
(cl_float) (0.5*(-padded[i]+padded[i+2])),
(cl_float) (0.5*(2.0*padded[i]-5.0*padded[i+1]+4.0*padded[i+2]-padded[i+3])),
(cl_float) (0.5*(-padded[i]+3.0*padded[i+1]-3.0*padded[i+2]+padded[i+3]))};
else
f[i] = (mm_float4) {(cl_float) ((padded[i]+4.0*padded[i+1]+padded[i+2])/6.0),
(cl_float) ((-3.0*padded[i]+3.0*padded[i+2])/6.0),
(cl_float) ((3.0*padded[i]-6.0*padded[i+1]+3.0*padded[i+2])/6.0),
(cl_float) ((-padded[i]+3.0*padded[i+1]-3.0*padded[i+2]+padded[i+3])/6.0)};
}
return f;
}
......@@ -27,6 +27,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLContext.h"
#include "lepton/CustomFunction.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/ParsedExpression.h"
#include <map>
......@@ -54,6 +56,15 @@ public:
*/
static std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::map<std::string, std::string>& variables,
const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams);
/**
* Calculate the spline coefficients for a tabulated function that appears in expressions.
*
* @param values the tabulated values of the function
* @param interpolating true if an interpolating spline should be used, false if an approximating spline should be used
* @return the spline coefficients
*/
static std::vector<mm_float4> computeFunctionCoefficients(const std::vector<double>& values, bool interpolating);
class FunctionPlaceholder;
private:
static void processExpression(std::stringstream& out, const Lepton::ExpressionTreeNode& node,
std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps, const std::map<std::string, std::string>& variables,
......@@ -66,6 +77,26 @@ private:
std::map<int, const Lepton::ExpressionTreeNode*>& powers);
};
/**
* This class serves as a placeholder for custom functions in expressions.
*/
class OpenCLExpressionUtilities::FunctionPlaceholder : public Lepton::CustomFunction {
public:
int getNumArguments() const {
return 1;
}
double evaluate(const double* arguments) const {
return 0.0;
}
double evaluateDerivative(const double* arguments, const int* derivOrder) const {
return 0.0;
}
CustomFunction* clone() const {
return new FunctionPlaceholder();
}
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLEXPRESSIONUTILITIES_H_*/
......@@ -54,6 +54,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return new OpenCLCalcCustomNonbondedForceKernel(name, platform, cl, context.getSystem());
if (name == CalcGBSAOBCForceKernel::Name())
return new OpenCLCalcGBSAOBCForceKernel(name, platform, cl);
if (name == CalcCustomGBForceKernel::Name())
return new OpenCLCalcCustomGBForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomExternalForceKernel::Name())
return new OpenCLCalcCustomExternalForceKernel(name, platform, cl, context.getSystem());
if (name == IntegrateVerletStepKernel::Name())
......
......@@ -33,7 +33,6 @@
#include "OpenCLExpressionUtilities.h"
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLNonbondedUtilities.h"
#include "lepton/CustomFunction.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include <cmath>
......@@ -208,6 +207,8 @@ OpenCLCalcHarmonicBondForceKernel::~OpenCLCalcHarmonicBondForceKernel() {
void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
numBonds = force.getNumBonds();
if (numBonds == 0)
return;
params = new OpenCLArray<mm_float2>(cl, numBonds, "bondParams");
indices = new OpenCLArray<mm_int4>(cl, numBonds, "bondIndices");
vector<int> forceBufferCounter(system.getNumParticles(), 0);
......@@ -231,6 +232,8 @@ void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const H
}
void OpenCLCalcHarmonicBondForceKernel::executeForces(ContextImpl& context) {
if (numBonds == 0)
return;
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg<cl_int>(0, cl.getPaddedNumAtoms());
......@@ -289,6 +292,8 @@ OpenCLCalcCustomBondForceKernel::~OpenCLCalcCustomBondForceKernel() {
void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
numBonds = force.getNumBonds();
if (numBonds == 0)
return;
params = new OpenCLParameterSet(cl, force.getNumPerBondParameters(), numBonds, "customBondParams");
indices = new OpenCLArray<mm_int4>(cl, numBonds, "customBondIndices");
string extraArguments;
......@@ -363,6 +368,8 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
}
void OpenCLCalcCustomBondForceKernel::executeForces(ContextImpl& context) {
if (numBonds == 0)
return;
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < globalParamNames.size(); i++) {
......@@ -434,6 +441,8 @@ OpenCLCalcHarmonicAngleForceKernel::~OpenCLCalcHarmonicAngleForceKernel() {
void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
numAngles = force.getNumAngles();
if (numAngles == 0)
return;
params = new OpenCLArray<mm_float2>(cl, numAngles, "angleParams");
indices = new OpenCLArray<mm_int8>(cl, numAngles, "angleIndices");
vector<int> forceBufferCounter(system.getNumParticles(), 0);
......@@ -459,6 +468,8 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const
}
void OpenCLCalcHarmonicAngleForceKernel::executeForces(ContextImpl& context) {
if (numAngles == 0)
return;
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg<cl_int>(0, cl.getPaddedNumAtoms());
......@@ -514,6 +525,8 @@ OpenCLCalcPeriodicTorsionForceKernel::~OpenCLCalcPeriodicTorsionForceKernel() {
void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
numTorsions = force.getNumTorsions();
if (numTorsions == 0)
return;
params = new OpenCLArray<mm_float4>(cl, numTorsions, "periodicTorsionParams");
indices = new OpenCLArray<mm_int8>(cl, numTorsions, "periodicTorsionIndices");
vector<int> forceBufferCounter(system.getNumParticles(), 0);
......@@ -539,6 +552,8 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons
}
void OpenCLCalcPeriodicTorsionForceKernel::executeForces(ContextImpl& context) {
if (numTorsions == 0)
return;
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg<cl_int>(0, cl.getPaddedNumAtoms());
......@@ -594,6 +609,8 @@ OpenCLCalcRBTorsionForceKernel::~OpenCLCalcRBTorsionForceKernel() {
void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
numTorsions = force.getNumTorsions();
if (numTorsions == 0)
return;
params = new OpenCLArray<mm_float8>(cl, numTorsions, "rbTorsionParams");
indices = new OpenCLArray<mm_int8>(cl, numTorsions, "rbTorsionIndices");
vector<int> forceBufferCounter(system.getNumParticles(), 0);
......@@ -619,6 +636,8 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo
}
void OpenCLCalcRBTorsionForceKernel::executeForces(ContextImpl& context) {
if (numTorsions == 0)
return;
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg<cl_int>(0, cl.getPaddedNumAtoms());
......@@ -896,12 +915,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
// Record parameters and exclusions.
int numParticles = force.getNumParticles();
string extraArguments;
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0) {
if (force.getNumGlobalParameters() > 0)
globals = new OpenCLArray<cl_float>(cl, force.getNumGlobalParameters(), "customNonbondedGlobals", false, CL_MEM_READ_ONLY);
extraArguments += ", __constant float* globals";
}
vector<vector<cl_float> > paramVector(numParticles);
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
......@@ -920,27 +936,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
}
params->setParameterValues(paramVector);
// This class serves as a placeholder for custom functions in expressions.
class FunctionPlaceholder : public Lepton::CustomFunction {
public:
int getNumArguments() const {
return 1;
}
double evaluate(const double* arguments) const {
return 0.0;
}
double evaluateDerivative(const double* arguments, const int* derivOrder) const {
return 0.0;
}
CustomFunction* clone() const {
return new FunctionPlaceholder();
}
};
// Record the tabulated functions.
FunctionPlaceholder* fp = new FunctionPlaceholder();
OpenCLExpressionUtilities::FunctionPlaceholder fp;
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions;
vector<mm_float4> tabulatedFunctionParamsVec(force.getNumFunctions());
......@@ -952,42 +950,17 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
force.getFunctionParameters(i, name, values, min, max, interpolating);
string arrayName = prefix+"table"+intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = fp;
functions[name] = &fp;
tabulatedFunctionParamsVec[i] = (mm_float4) {(float) min, (float) max, (float) ((values.size()-1)/(max-min)), 0.0f};
// First create a padded set of function values.
vector<double> padded(values.size()+2);
padded[0] = 2*values[0]-values[1];
for (int i = 0; i < (int) values.size(); i++)
padded[i+1] = values[i];
padded[padded.size()-1] = 2*values[values.size()-1]-values[values.size()-2];
// Now compute the spline coefficients.
vector<mm_float4> f(values.size()-1);
for (int i = 0; i < (int) values.size()-1; i++) {
if (interpolating)
f[i] = (mm_float4) {(cl_float) padded[i+1],
(cl_float) (0.5*(-padded[i]+padded[i+2])),
(cl_float) (0.5*(2.0*padded[i]-5.0*padded[i+1]+4.0*padded[i+2]-padded[i+3])),
(cl_float) (0.5*(-padded[i]+3.0*padded[i+1]-3.0*padded[i+2]+padded[i+3]))};
else
f[i] = (mm_float4) {(cl_float) ((padded[i]+4.0*padded[i+1]+padded[i+2])/6.0),
(cl_float) ((-3.0*padded[i]+3.0*padded[i+2])/6.0),
(cl_float) ((3.0*padded[i]-6.0*padded[i+1]+3.0*padded[i+2])/6.0),
(cl_float) ((-padded[i]+3.0*padded[i+1]-3.0*padded[i+2]+padded[i+3])/6.0)};
}
vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, interpolating);
tabulatedFunctions.push_back(new OpenCLArray<mm_float4>(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float4", sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer()));
extraArguments += ", __constant float4* "+arrayName;
}
if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = new OpenCLArray<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float4", sizeof(cl_float4), tabulatedFunctionParams->getDeviceBuffer()));
extraArguments += ", __constant float4* "+prefix+"functionParams";
}
// Record information for the expressions.
......@@ -1040,7 +1013,6 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", sizeof(cl_float), globals->getDeviceBuffer()));
}
cl.addForce(new OpenCLCustomNonbondedForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
delete fp;
}
void OpenCLCalcCustomNonbondedForceKernel::executeForces(ContextImpl& context) {
......@@ -1202,7 +1174,7 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
cl.clearBuffer(*bornForce);
cl.executeKernel(computeBornSumKernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(reduceBornSumKernel, cl.getPaddedNumAtoms());
cl.executeKernel(force1Kernel, cl.getPaddedNumAtoms());
cl.executeKernel(force1Kernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(reduceBornForceKernel, cl.getPaddedNumAtoms());
}
......@@ -1210,6 +1182,319 @@ double OpenCLCalcGBSAOBCForceKernel::executeEnergy(ContextImpl& context) {
executeForces(context);
return 0.0;
}
class OpenCLCustomGBForceInfo : public OpenCLForceInfo {
public:
OpenCLCustomGBForceInfo(int requiredBuffers, const CustomGBForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
vector<double> params1;
vector<double> params2;
force.getParticleParameters(particle1, params1);
force.getParticleParameters(particle2, params2);
for (int i = 0; i < params1.size(); i++)
if (params1[i] != params2[i])
return false;
return true;
}
int getNumParticleGroups() {
return force.getNumExclusions();
}
void getParticlesInGroup(int index, std::vector<int>& particles) {
int particle1, particle2;
force.getExclusionParticles(index, particle1, particle2);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
return true;
}
private:
const CustomGBForce& force;
};
OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() {
if (params != NULL)
delete params;
if (computedValues != NULL)
delete computedValues;
if (globals != NULL)
delete globals;
if (tabulatedFunctionParams != NULL)
delete tabulatedFunctionParams;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i];
}
void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) {
bool useExclusionsForValue = false;
string n2ValueExpression;
if (force.getNumComputedValues() > 0) {
string name, expression;
CustomGBForce::ComputationType type;
force.getComputedValueParameters(0, name, expression, type);
if (type == CustomGBForce::SingleParticle)
throw OpenMMException("OpenCLPlatform requires that the first computed value for a CustomGBForce be of type ParticlePair or ParticlePairNoExclusions.");
useExclusionsForValue = (type == CustomGBForce::ParticlePair);
n2ValueExpression = expression;
for (int i = 1; i < force.getNumComputedValues(); i++) {
force.getComputedValueParameters(i, name, expression, type);
if (type != CustomGBForce::SingleParticle)
throw OpenMMException("OpenCLPlatform requires that a CustomGBForce only have one computed value of type ParticlePair or ParticlePairNoExclusions.");
}
}
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
string prefix = "custom"+intToString(forceIndex)+"_";
// Record parameters and exclusions.
int numParticles = force.getNumParticles();
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customGBParameters");
computedValues = new OpenCLParameterSet(cl, force.getNumComputedValues(), numParticles, "customGBComputedValues");
if (force.getNumGlobalParameters() > 0)
globals = new OpenCLArray<cl_float>(cl, force.getNumGlobalParameters(), "customGBGlobals", false, CL_MEM_READ_ONLY);
vector<vector<cl_float> > paramVector(numParticles);
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
vector<double> parameters;
force.getParticleParameters(i, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
exclusionList[i].push_back(i);
}
for (int i = 0; i < force.getNumExclusions(); i++) {
int particle1, particle2;
force.getExclusionParticles(i, particle1, particle2);
exclusionList[particle1].push_back(particle2);
exclusionList[particle2].push_back(particle1);
}
params->setParameterValues(paramVector);
// Record the tabulated functions.
OpenCLExpressionUtilities::FunctionPlaceholder fp;
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions;
vector<mm_float4> tabulatedFunctionParamsVec(force.getNumFunctions());
for (int i = 0; i < force.getNumFunctions(); i++) {
string name;
vector<double> values;
double min, max;
bool interpolating;
force.getFunctionParameters(i, name, values, min, max, interpolating);
string arrayName = prefix+"table"+intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = &fp;
tabulatedFunctionParamsVec[i] = (mm_float4) {(float) min, (float) max, (float) ((values.size()-1)/(max-min)), 0.0f};
vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, interpolating);
tabulatedFunctions.push_back(new OpenCLArray<mm_float4>(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float4", sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer()));
}
if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = new OpenCLArray<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float4", sizeof(cl_float4), tabulatedFunctionParams->getDeviceBuffer()));
}
// Record information for the expressions.
vector<string> paramNames;
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
paramNames.push_back(force.getPerParticleParameterName(i));
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomGBForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomGBForce::NoCutoff && force.getNonbondedMethod() != CustomGBForce::CutoffNonPeriodic);
// Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
// Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
// map<string, Lepton::ParsedExpression> forceExpressions;
// forceExpressions["tempEnergy += "] = energyExpression;
// forceExpressions["tempForce -= "] = forceExpression;
//
// Create the kernels.
map<string, string> variables1;
map<string, string> variables2;
variables1["r"] = "r";
variables2["r"] = "r";
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables1[name+"1"] = prefix+"params"+params->getParameterSuffix(i, "1");
variables1[name+"2"] = prefix+"params"+params->getParameterSuffix(i, "2");
variables2[name+"2"] = prefix+"params"+params->getParameterSuffix(i, "1");
variables2[name+"1"] = prefix+"params"+params->getParameterSuffix(i, "2");
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+intToString(i)+"]";
variables1[name] = prefix+value;
variables2[name] = prefix+value;
}
{
// Create the N2 value kernel.
map<string, Lepton::ParsedExpression> n2ValueExpressions;
stringstream n2ValueSource;
n2ValueExpressions["tempValue1 = "] = Lepton::Parser::parse(n2ValueExpression, functions).optimize();
n2ValueSource << OpenCLExpressionUtilities::createExpressions(n2ValueExpressions, variables1, functionDefinitions, prefix+"tempA", prefix+"functionParams");
n2ValueExpressions.clear();
n2ValueExpressions["tempValue2 = "] = Lepton::Parser::parse(n2ValueExpression, functions).optimize();
n2ValueSource << OpenCLExpressionUtilities::createExpressions(n2ValueExpressions, variables2, functionDefinitions, prefix+"tempB", prefix+"functionParams");
map<string, string> replacements;
replacements["COMPUTE_VALUE"] = n2ValueSource.str();
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, ""); // **********
stringstream extraArgs, loadLocal1, loadLocal2, load1, load2;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", __constant float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = prefix+"params"+intToString(i+1);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getBuffer()));
extraArgs << ", __global " << buffer.getType() << "* global_" << paramName << ", __local " << buffer.getType() << "* local_" << paramName;
loadLocal1 << "local_" << paramName << "[get_local_id(0)] = " << paramName << "1;\n";
loadLocal2 << "local_" << paramName << "[get_local_id(0)] = global_" << paramName << "[j];\n";
load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n";
load2 << buffer.getType() << " " << paramName << "2 = local_" << paramName << "[atom2];\n";
}
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
map<string, string> defines;
if (cl.getNonbondedUtilities().getForceBufferPerAtomBlock())
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
if (useCutoff)
defines["USE_CUTOFF"] = "1";
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
if (useExclusionsForValue)
defines["USE_EXCLUSIONS"] = "1";
Vec3 boxVectors[3];
system.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
defines["PERIODIC_BOX_SIZE_X"] = doubleToString(boxVectors[0][0]);
defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(boxVectors[1][1]);
defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(boxVectors[2][2]);
defines["CUTOFF_SQUARED"] = doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
string filename = (cl.getSIMDWidth() == 32 ? "customGBValueN2_nvidia.cl" : "customGBValueN2_default.cl");
// printf("%s\n", cl.loadSourceFromFile(filename, replacements).c_str());
cl::Program program = cl.createProgram(cl.loadSourceFromFile(filename, replacements), defines);
pairValueKernel = cl::Kernel(program, "computeN2Value");
}
{
// Create the kernel to reduce the N2 value and calculate other values.
stringstream reductionSource, extraArgs;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", __constant float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = prefix+"params"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* global_" << paramName;
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = prefix+"values"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* global_" << valueName;
reductionSource << buffer.getType() << " local_" << valueName << ";\n";
}
reductionSource << "local_values" << computedValues->getParameterSuffix(0) << " = sum;\n";
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
string valueName = prefix+"values"+intToString(i+1);
reductionSource << "global_" << valueName << "[index] = local_" << valueName << ";\n";
}
map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str();
replacements["COMPUTE_VALUES"] = reductionSource.str();
map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
// printf("%s\n", cl.loadSourceFromFile("customGBValueReduction.cl", replacements).c_str());
cl::Program program = cl.createProgram(cl.loadSourceFromFile("customGBValueReduction.cl", replacements), defines);
reduceValueKernel = cl::Kernel(program, "reduceGBValue");
}
if (globals != NULL) {
globals->upload(globalParamValues);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", sizeof(cl_float), globals->getDeviceBuffer()));
}
cl.addForce(new OpenCLCustomGBForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
}
void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
if (!hasInitializedKernels) {
hasInitializedKernels = true;
valueBuffers = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms()*cl.getNumForceBuffers(), "customGBValueBuffers");
int index = 0;
pairValueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
pairValueKernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
pairValueKernel.setArg<cl::Buffer>(index++, valueBuffers->getDeviceBuffer());
pairValueKernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float), NULL);
pairValueKernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float), NULL);
if (nb.getUseCutoff()) {
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractionFlags().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
}
else {
pairValueKernel.setArg<cl::Buffer>(index++, nb.getTiles().getDeviceBuffer());
pairValueKernel.setArg<cl_uint>(index++, nb.getTiles().getSize());
}
if (globals != NULL)
pairValueKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
pairValueKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
pairValueKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
}
index = 0;
reduceValueKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
reduceValueKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
reduceValueKernel.setArg<cl::Buffer>(index++, valueBuffers->getDeviceBuffer());
if (globals != NULL)
reduceValueKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++)
reduceValueKernel.setArg<cl::Buffer>(index++, params->getBuffers()[i].getBuffer());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
reduceValueKernel.setArg<cl::Buffer>(index++, computedValues->getBuffers()[i].getBuffer());
}
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
cl.clearBuffer(*valueBuffers);
cl.executeKernel(pairValueKernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(reduceValueKernel, cl.getPaddedNumAtoms());
// vector<vector<cl_float> > values;
// computedValues->getParameterValues(values);
// for (int i = 0; i < cl.getNumAtoms(); i++)
// printf("%d: %f\n", i, values[i][0]);
}
double OpenCLCalcCustomGBForceKernel::executeEnergy(ContextImpl& context) {
executeForces(context);
return 0.0;
}
class OpenCLCustomExternalForceInfo : public OpenCLForceInfo {
public:
OpenCLCustomExternalForceInfo(const CustomExternalForce& force, int numParticles) : OpenCLForceInfo(1), force(force), indices(numParticles, -1) {
......
......@@ -470,6 +470,50 @@ private:
cl::Kernel reduceBornForceKernel;
};
/**
* This kernel is invoked by CustomGBForce to calculate the forces acting on the system.
*/
class OpenCLCalcCustomGBForceKernel : public CalcCustomGBForceKernel {
public:
OpenCLCalcCustomGBForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, System& system) : CalcCustomGBForceKernel(name, platform),
hasInitializedKernels(false), cl(cl), params(NULL), globals(NULL), valueBuffers(NULL), tabulatedFunctionParams(NULL), system(system) {
}
~OpenCLCalcCustomGBForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomGBForce this kernel will be used for
*/
void initialize(const System& system, const CustomGBForce& force);
/**
* Execute the kernel to calculate the forces.
*
* @param context the context in which to execute this kernel
*/
void executeForces(ContextImpl& context);
/**
* Execute the kernel to calculate the energy.
*
* @param context the context in which to execute this kernel
* @return the potential energy due to the CustomGBForce
*/
double executeEnergy(ContextImpl& context);
private:
bool hasInitializedKernels;
OpenCLContext& cl;
OpenCLParameterSet* params;
OpenCLParameterSet* computedValues;
OpenCLArray<cl_float>* globals;
OpenCLArray<cl_float>* valueBuffers;
OpenCLArray<mm_float4>* tabulatedFunctionParams;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray<mm_float4>*> tabulatedFunctions;
System& system;
cl::Kernel pairValueKernel, reduceValueKernel, pairForceKernel, particleForceKernel;
};
/**
* This kernel is invoked by CustomExternalForce to calculate the forces acting on the system and the energy of the system.
*/
......
......@@ -70,21 +70,79 @@ OpenCLParameterSet::~OpenCLParameterSet() {
delete &buffers[i].getBuffer();
}
void OpenCLParameterSet::setParameterValues(const vector<vector<cl_float> >& values) {
void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) const {
values.resize(numObjects);
for (int i = 0; i < numObjects; i++)
values[i].resize(numParameters);
try {
int base = 0;
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getType() == "float4") {
vector<mm_float4> data(numObjects);
context.getQueue().enqueueReadBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++) {
values[j][base] = data[j].x;
if (base+1 < numParameters)
values[j][base+1] = data[j].y;
if (base+2 < numParameters)
values[j][base+2] = data[j].z;
if (base+3 < numParameters)
values[j][base+3] = data[j].w;
}
base += 4;
}
else if (buffers[i].getType() == "float2") {
vector<mm_float2> data(numObjects);
context.getQueue().enqueueReadBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++) {
values[j][base] = data[j].x;
if (base+1 < numParameters)
values[j][base+1] = data[j].y;
}
base += 2;
}
else if (buffers[i].getType() == "float") {
vector<cl_float> data(numObjects);
context.getQueue().enqueueReadBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++)
data[j] = (mm_float4) {values[j][base], values[j][base+1], values[j][base+2], values[j][base+3]};
values[j][base] = data[j];
}
else
throw OpenMMException("Internal error: Unknown buffer type in OpenCLParameterSet");
}
}
catch (cl::Error err) {
stringstream str;
str<<"Error downloading parameter set "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
void OpenCLParameterSet::setParameterValues(const vector<vector<cl_float> >& values) {
try {
int base = 0;
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getType() == "float4") {
vector<mm_float4> data(numObjects);
for (int j = 0; j < numObjects; j++) {
data[j].x = values[j][base];
if (base+1 < numParameters)
data[j].y = values[j][base+1];
if (base+2 < numParameters)
data[j].z = values[j][base+2];
if (base+3 < numParameters)
data[j].w = values[j][base+3];
}
context.getQueue().enqueueWriteBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
base += 4;
}
else if (buffers[i].getType() == "float2") {
vector<mm_float2> data(numObjects);
for (int j = 0; j < numObjects; j++)
data[j] = (mm_float2) {values[j][base], values[j][base+1]};
for (int j = 0; j < numObjects; j++) {
data[j].x = values[j][base];
if (base+1 < numParameters)
data[j].y = values[j][base+1];
}
context.getQueue().enqueueWriteBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
base += 2;
}
......
......@@ -64,6 +64,12 @@ public:
int getNumObjects() const {
return numObjects;
}
/**
* Get the values of all parameters.
*
* @param values on exit, values[i][j] contains the value of parameter j for object i
*/
void getParameterValues(std::vector<std::vector<cl_float> >& values) const;
/**
* Set the values of all parameters.
*
......
......@@ -54,6 +54,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);
registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
registerKernelFactory(CalcCustomGBForceKernel::Name(), factory);
registerKernelFactory(CalcCustomExternalForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
......
#define TILE_SIZE 32
/**
* Compute a value based on pair interactions.
*/
__kernel void computeN2Value(__global float4* posq, __local float4* local_posq, __global float* global_value,
__local float* local_value, __local float* tempBuffer, __global unsigned int* tiles,
#ifdef USE_CUTOFF
__global unsigned int* interactionFlags, __global unsigned int* interactionCount
#else
unsigned int numTiles
#endif
PARAMETER_ARGUMENTS) {
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
#endif
unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
unsigned int warp = get_global_id(0)/TILE_SIZE;
unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps;
float energy = 0.0f;
unsigned int lasty = 0xFFFFFFFF;
while (pos < end) {
// Extract the coordinates of this tile
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff)*TILE_SIZE;
bool hasExclusions = (x & 0x1);
x = (x>>17)*TILE_SIZE;
unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
unsigned int tbx = get_local_id(0) - tgx;
unsigned int atom1 = x + tgx;
float value = 0.0f;
float4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS
if (x == y) {
// This tile is on the diagonal.
local_posq[get_local_id(0)] = posq1;
LOAD_LOCAL_PARAMETERS_FROM_1
unsigned int xi = x/TILE_SIZE;
unsigned int tile = xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*(xi+1)/2;
#ifdef USE_EXCLUSIONS
unsigned int excl = exclusions[exclusionIndices[tile]+tgx];
#endif
for (unsigned int j = 0; j < TILE_SIZE; j++) {
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
int atom2 = tbx+j;
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
float tempValue1 = 0.0f;
float tempValue2 = 0.0f;
#ifdef USE_EXCLUSIONS
if (!isExcluded && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
#else
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
#endif
COMPUTE_VALUE
}
value += tempValue1;
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset = x + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
#else
unsigned int offset = x + tgx + warp*PADDED_NUM_ATOMS;
#endif
global_value[offset] += value;
}
else {
// This is an off-diagonal tile.
if (lasty != y) {
unsigned int j = y + tgx;
local_posq[get_local_id(0)] = posq[j];
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
local_value[get_local_id(0)] = 0.0f;
#ifdef USE_CUTOFF
unsigned int flags = interactionFlags[pos];
if (!hasExclusions && flags != 0xFFFFFFFF) {
if (flags == 0) {
// No interactions in this tile.
}
else {
// Compute only a subset of the interactions in this tile.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) {
int atom2 = tbx+j;
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
float tempValue1 = 0.0f;
float tempValue2 = 0.0f;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_VALUE
}
value += tempValue1;
tempBuffer[get_local_id(0)] = tempValue2;
// Sum the forces on atom2.
if (tgx % 2 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+1];
if (tgx % 4 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+2];
if (tgx % 8 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+4];
if (tgx % 16 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+8];
if (tgx == 0)
local_value[tbx+j] += tempBuffer[get_local_id(0)] + tempBuffer[get_local_id(0)+16];
}
}
}
}
else
#endif
{
// Compute the full set of interactions in this tile.
unsigned int xi = x/TILE_SIZE;
unsigned int yi = y/TILE_SIZE;
unsigned int tile = xi+yi*PADDED_NUM_ATOMS/TILE_SIZE-yi*(yi+1)/2;
#ifdef USE_EXCLUSIONS
unsigned int excl = (hasExclusions ? exclusions[exclusionIndices[tile]+tgx] : 0xFFFFFFFF);
excl = (excl >> tgx) | (excl << (TILE_SIZE - tgx));
#endif
unsigned int tj = tgx;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
int atom2 = tbx+tj;
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+tj;
float tempValue1 = 0.0f;
float tempValue2 = 0.0f;
#ifdef USE_EXCLUSIONS
if (!isExcluded && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#else
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif
COMPUTE_VALUE
}
value += tempValue1;
local_value[tbx+tj] += tempValue2;
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
tj = (tj + 1) & (TILE_SIZE - 1);
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset1 = x + tgx + (y/TILE_SIZE)*PADDED_NUM_ATOMS;
unsigned int offset2 = y + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
#else
unsigned int offset1 = x + tgx + warp*PADDED_NUM_ATOMS;
unsigned int offset2 = y + tgx + warp*PADDED_NUM_ATOMS;
#endif
global_value[offset1] += value;
global_value[offset2] += local_value[get_local_id(0)];
lasty = y;
}
pos++;
}
}
/**
* Reduce a pairwise computed value, and compute per-particle values.
*/
__kernel void reduceGBValue(int bufferSize, int numBuffers, __global float* valueBuffers
PARAMETER_ARGUMENTS) {
unsigned int index = get_global_id(0);
while (index < NUM_ATOMS) {
// Reduce the pairwise value
int totalSize = bufferSize*numBuffers;
float sum = valueBuffers[index];
for (int i = index+bufferSize; i < totalSize; i += bufferSize)
sum += valueBuffers[i];
// Now calculate other values
COMPUTE_VALUES
index += get_global_size(0);
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment