"ssh:/git@developer.sourcefind.cn:2222/tsoc/openmm.git" did not exist on "68b065e5424f4c158c84a26cd0c9de980218126e"
Commit ff238051 authored by peastman's avatar peastman
Browse files

OpenCL implementation of vector functions for CustomIntegrator

parent ad285083
...@@ -1484,9 +1484,8 @@ class OpenCLIntegrateCustomStepKernel : public IntegrateCustomStepKernel { ...@@ -1484,9 +1484,8 @@ class OpenCLIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
public: public:
enum GlobalTargetType {DT, VARIABLE, PARAMETER}; enum GlobalTargetType {DT, VARIABLE, PARAMETER};
OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl), OpenCLIntegrateCustomStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateCustomStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), localValuesAreCurrent(false), perDofValues(NULL), needsEnergyParamDerivs(false) { hasInitializedKernels(false), needsEnergyParamDerivs(false) {
} }
~OpenCLIntegrateCustomStepKernel();
/** /**
* Initialize the kernel. * Initialize the kernel.
* *
...@@ -1550,7 +1549,7 @@ private: ...@@ -1550,7 +1549,7 @@ private:
class ReorderListener; class ReorderListener;
class GlobalTarget; class GlobalTarget;
class DerivFunction; class DerivFunction;
std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, std::string createPerDofComputation(const std::string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator,
const std::string& forceName, const std::string& energyName, std::vector<const TabulatedFunction*>& functions, const std::string& forceName, const std::string& energyName, std::vector<const TabulatedFunction*>& functions,
std::vector<std::pair<std::string, std::string> >& functionNames); std::vector<std::pair<std::string, std::string> >& functionNames);
void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid); void prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid);
...@@ -1563,21 +1562,21 @@ private: ...@@ -1563,21 +1562,21 @@ private:
double energy; double energy;
float energyFloat; float energyFloat;
int numGlobalVariables, sumWorkGroupSize; int numGlobalVariables, sumWorkGroupSize;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs; bool hasInitializedKernels, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
mutable bool localValuesAreCurrent; std::vector<bool> deviceValuesAreCurrent;
mutable std::vector<bool> localValuesAreCurrent;
OpenCLArray globalValues; OpenCLArray globalValues;
OpenCLArray sumBuffer; OpenCLArray sumBuffer;
OpenCLArray summedValue; OpenCLArray summedValue;
OpenCLArray uniformRandoms; OpenCLArray uniformRandoms;
OpenCLArray randomSeed; OpenCLArray randomSeed;
OpenCLArray perDofEnergyParamDerivs; OpenCLArray perDofEnergyParamDerivs;
std::vector<OpenCLArray> tabulatedFunctions; std::vector<OpenCLArray> tabulatedFunctions, perDofValues;
std::map<int, double> savedEnergy; std::map<int, double> savedEnergy;
std::map<int, OpenCLArray> savedForces; std::map<int, OpenCLArray> savedForces;
std::set<int> validSavedForces; std::set<int> validSavedForces;
OpenCLParameterSet* perDofValues; mutable std::vector<std::vector<mm_float4> > localPerDofValuesFloat;
mutable std::vector<std::vector<cl_float> > localPerDofValuesFloat; mutable std::vector<std::vector<mm_double4> > localPerDofValuesDouble;
mutable std::vector<std::vector<cl_double> > localPerDofValuesDouble;
std::map<std::string, double> energyParamDerivs; std::map<std::string, double> energyParamDerivs;
std::vector<std::string> perDofEnergyParamDerivNames; std::vector<std::string> perDofEnergyParamDerivNames;
std::vector<cl_float> localPerDofEnergyParamDerivsFloat; std::vector<cl_float> localPerDofEnergyParamDerivsFloat;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. * * Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -137,6 +137,71 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre ...@@ -137,6 +137,71 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? -periodicDistance_delta.z*periodicDistance_rinv : 0);\n"; out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? -periodicDistance_delta.z*periodicDistance_rinv : 0);\n";
} }
} }
else if (node.getOperation().getName() == "dot") {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
string child1 = getTempName(node.getChildren()[0], temps);
string child2 = getTempName(node.getChildren()[1], temps);
if (derivOrder[0] == 0 && derivOrder[1] == 0)
out << nodeNames[j] << " = dot(" << child1 << ", " << child2 << ");\n";
else
throw OpenMMException("Unsupported derivative order for cross()");
}
}
else if (node.getOperation().getName() == "cross") {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
string child1 = getTempName(node.getChildren()[0], temps);
string child2 = getTempName(node.getChildren()[1], temps);
if (derivOrder[0] == 0 && derivOrder[1] == 0)
out << nodeNames[j] << " = cross(" << child1 << ", " << child2 << ");\n";
else
throw OpenMMException("Unsupported derivative order for cross()");
}
}
else if (node.getOperation().getName() == "vector") {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 0) {
out << nodeNames[j] << ".x = " << getTempName(node.getChildren()[0], temps) << ".x;\n";
out << nodeNames[j] << ".y = " << getTempName(node.getChildren()[1], temps) << ".y;\n";
out << nodeNames[j] << ".z = " << getTempName(node.getChildren()[2], temps) << ".z;\n";
}
else if (derivOrder[0] == 1 && derivOrder[1] == 0 && derivOrder[2] == 0)
out << nodeNames[j] << ".x = 1;\n";
else if (derivOrder[0] == 0 && derivOrder[1] == 1 && derivOrder[2] == 0)
out << nodeNames[j] << ".y = 1;\n";
else if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 1)
out << nodeNames[j] << ".z = 1;\n";
}
}
else if (node.getOperation().getName() == "_x") {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0)
out << nodeNames[j] << " = " << getTempName(node.getChildren()[0], temps) << ".x;\n";
else
throw OpenMMException("Unsupported derivative order for _x()");
}
}
else if (node.getOperation().getName() == "_y") {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0)
out << nodeNames[j] << " = " << getTempName(node.getChildren()[0], temps) << ".y;\n";
else
throw OpenMMException("Unsupported derivative order for _y()");
}
}
else if (node.getOperation().getName() == "_z") {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0)
out << nodeNames[j] << " = " << getTempName(node.getChildren()[0], temps) << ".z;\n";
else
throw OpenMMException("Unsupported derivative order for _z()");
}
}
else { else {
// This is a tabulated function. // This is a tabulated function.
...@@ -150,172 +215,185 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre ...@@ -150,172 +215,185 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
paramsFloat.push_back(context.doubleToString(functionParams[i][j])); paramsFloat.push_back(context.doubleToString(functionParams[i][j]));
paramsInt.push_back(context.intToString((int) functionParams[i][j])); paramsInt.push_back(context.intToString((int) functionParams[i][j]));
} }
if (dynamic_cast<const Continuous1DFunction*>(functions[i]) != NULL) { vector<string> suffixes;
out << "real x = " << getTempName(node.getChildren()[0], temps) << ";\n"; if (tempType[tempType.size()-1] == '3') {
out << "if (x >= " << paramsFloat[0] << " && x <= " << paramsFloat[1] << ") {\n"; suffixes.push_back(".x");
out << "x = (x - " << paramsFloat[0] << ")*" << paramsFloat[2] << ";\n"; suffixes.push_back(".y");
out << "int index = (int) (floor(x));\n"; suffixes.push_back(".z");
out << "index = min(index, " << paramsInt[3] << ");\n";
out << "float4 coeff = " << functionNames[i].second << "[index];\n";
out << "real b = x-index;\n";
out << "real a = 1.0f-b;\n";
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0)
out << nodeNames[j] << " = a*coeff.x+b*coeff.y+((a*a*a-a)*coeff.z+(b*b*b-b)*coeff.w)/(" << paramsFloat[2] << "*" << paramsFloat[2] << ");\n";
else
out << nodeNames[j] << " = (coeff.y-coeff.x)*" << paramsFloat[2] << "+((1.0f-3.0f*a*a)*coeff.z+(3.0f*b*b-1.0f)*coeff.w)/" << paramsFloat[2] << ";\n";
}
out << "}\n";
} }
else if (dynamic_cast<const Continuous2DFunction*>(functions[i]) != NULL) { else {
out << "real x = " << getTempName(node.getChildren()[0], temps) << ";\n"; suffixes.push_back("");
out << "real y = " << getTempName(node.getChildren()[1], temps) << ";\n";
out << "if (x >= " << paramsFloat[2] << " && x <= " << paramsFloat[3] << " && y >= " << paramsFloat[4] << " && y <= " << paramsFloat[5] << ") {\n";
out << "x = (x - " << paramsFloat[2] << ")*" << paramsFloat[6] << ";\n";
out << "y = (y - " << paramsFloat[4] << ")*" << paramsFloat[7] << ";\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n";
out << "int coeffIndex = 4*(s+" << paramsInt[0] << "*t);\n";
out << "float4 c[4];\n";
for (int j = 0; j < 4; j++)
out << "c[" << j << "] = " << functionNames[i].second << "[coeffIndex+" << j << "];\n";
out << "real da = x-s;\n";
out << "real db = y-t;\n";
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0 && derivOrder[1] == 0) {
out << nodeNames[j] << " = da*" << nodeNames[j] << " + ((c[3].w*db + c[3].z)*db + c[3].y)*db + c[3].x;\n";
out << nodeNames[j] << " = da*" << nodeNames[j] << " + ((c[2].w*db + c[2].z)*db + c[2].y)*db + c[2].x;\n";
out << nodeNames[j] << " = da*" << nodeNames[j] << " + ((c[1].w*db + c[1].z)*db + c[1].y)*db + c[1].x;\n";
out << nodeNames[j] << " = da*" << nodeNames[j] << " + ((c[0].w*db + c[0].z)*db + c[0].y)*db + c[0].x;\n";
}
else if (derivOrder[0] == 1 && derivOrder[1] == 0) {
out << nodeNames[j] << " = db*" << nodeNames[j] << " + (3.0f*c[3].w*da + 2.0f*c[2].w)*da + c[1].w;\n";
out << nodeNames[j] << " = db*" << nodeNames[j] << " + (3.0f*c[3].z*da + 2.0f*c[2].z)*da + c[1].z;\n";
out << nodeNames[j] << " = db*" << nodeNames[j] << " + (3.0f*c[3].y*da + 2.0f*c[2].y)*da + c[1].y;\n";
out << nodeNames[j] << " = db*" << nodeNames[j] << " + (3.0f*c[3].x*da + 2.0f*c[2].x)*da + c[1].x;\n";
out << nodeNames[j] << " *= " << paramsFloat[6] << ";\n";
}
else if (derivOrder[0] == 0 && derivOrder[1] == 1) {
out << nodeNames[j] << " = da*" << nodeNames[j] << " + (3.0f*c[3].w*db + 2.0f*c[3].z)*db + c[3].y;\n";
out << nodeNames[j] << " = da*" << nodeNames[j] << " + (3.0f*c[2].w*db + 2.0f*c[2].z)*db + c[2].y;\n";
out << nodeNames[j] << " = da*" << nodeNames[j] << " + (3.0f*c[1].w*db + 2.0f*c[1].z)*db + c[1].y;\n";
out << nodeNames[j] << " = da*" << nodeNames[j] << " + (3.0f*c[0].w*db + 2.0f*c[0].z)*db + c[0].y;\n";
out << nodeNames[j] << " *= " << paramsFloat[7] << ";\n";
}
else
throw OpenMMException("Unsupported derivative order for Continuous2DFunction");
}
out << "}\n";
} }
else if (dynamic_cast<const Continuous3DFunction*>(functions[i]) != NULL) { for (auto& suffix : suffixes) {
out << "real x = " << getTempName(node.getChildren()[0], temps) << ";\n"; out << "{\n";
out << "real y = " << getTempName(node.getChildren()[1], temps) << ";\n"; if (dynamic_cast<const Continuous1DFunction*>(functions[i]) != NULL) {
out << "real z = " << getTempName(node.getChildren()[2], temps) << ";\n"; out << "real x = " << getTempName(node.getChildren()[0], temps) << suffix <<";\n";
out << "if (x >= " << paramsFloat[3] << " && x <= " << paramsFloat[4] << " && y >= " << paramsFloat[5] << " && y <= " << paramsFloat[6] << " && z >= " << paramsFloat[7] << " && z <= " << paramsFloat[8] << ") {\n"; out << "if (x >= " << paramsFloat[0] << " && x <= " << paramsFloat[1] << ") {\n";
out << "x = (x - " << paramsFloat[3] << ")*" << paramsFloat[9] << ";\n"; out << "x = (x - " << paramsFloat[0] << ")*" << paramsFloat[2] << ";\n";
out << "y = (y - " << paramsFloat[5] << ")*" << paramsFloat[10] << ";\n"; out << "int index = (int) (floor(x));\n";
out << "z = (z - " << paramsFloat[7] << ")*" << paramsFloat[11] << ";\n"; out << "index = min(index, " << paramsInt[3] << ");\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n"; out << "float4 coeff = " << functionNames[i].second << "[index];\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n"; out << "real b = x-index;\n";
out << "int u = min((int) floor(z), " << paramsInt[2] << "-1);\n"; out << "real a = 1.0f-b;\n";
out << "int coeffIndex = 16*(s+" << paramsInt[0] << "*(t+" << paramsInt[1] << "*u));\n"; for (int j = 0; j < nodes.size(); j++) {
out << "float4 c[16];\n"; const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
for (int j = 0; j < 16; j++) if (derivOrder[0] == 0)
out << "c[" << j << "] = " << functionNames[i].second << "[coeffIndex+" << j << "];\n"; out << nodeNames[j] << suffix <<" = a*coeff.x+b*coeff.y+((a*a*a-a)*coeff.z+(b*b*b-b)*coeff.w)/(" << paramsFloat[2] << "*" << paramsFloat[2] << ");\n";
out << "real da = x-s;\n"; else
out << "real db = y-t;\n"; out << nodeNames[j] << suffix <<" = (coeff.y-coeff.x)*" << paramsFloat[2] << "+((1.0f-3.0f*a*a)*coeff.z+(3.0f*b*b-1.0f)*coeff.w)/" << paramsFloat[2] << ";\n";
out << "real dc = z-u;\n";
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 0) {
out << "real value[4] = {0, 0, 0, 0};\n";
for (int k = 3; k >= 0; k--)
for (int m = 0; m < 4; m++) {
int base = k + 4*m;
out << "value[" << m << "] = db*value[" << m << "] + ((c[" << base << "].w*da + c[" << base << "].z)*da + c[" << base << "].y)*da + c[" << base << "].x;\n";
}
out << nodeNames[j] << " = value[0] + dc*(value[1] + dc*(value[2] + dc*value[3]));\n";
}
else if (derivOrder[0] == 1 && derivOrder[1] == 0 && derivOrder[2] == 0) {
out << "real derivx[4] = {0, 0, 0, 0};\n";
for (int k = 3; k >= 0; k--)
for (int m = 0; m < 4; m++) {
int base = k + 4*m;
out << "derivx[" << m << "] = db*derivx[" << m << "] + (3*c[" << base << "].w*da + 2*c[" << base << "].z)*da + c[" << base << "].y;\n";
}
out << nodeNames[j] << " = derivx[0] + dc*(derivx[1] + dc*(derivx[2] + dc*derivx[3]));\n";
out << nodeNames[j] << " *= " << paramsFloat[9] << ";\n";
} }
else if (derivOrder[0] == 0 && derivOrder[1] == 1 && derivOrder[2] == 0) { out << "}\n";
const string suffixes[] = {".x", ".y", ".z", ".w"}; }
out << "real derivy[4] = {0, 0, 0, 0};\n"; else if (dynamic_cast<const Continuous2DFunction*>(functions[i]) != NULL) {
for (int k = 3; k >= 0; k--) out << "real x = " << getTempName(node.getChildren()[0], temps) << suffix <<";\n";
for (int m = 0; m < 4; m++) { out << "real y = " << getTempName(node.getChildren()[1], temps) << suffix <<";\n";
int base = 4*m; out << "if (x >= " << paramsFloat[2] << " && x <= " << paramsFloat[3] << " && y >= " << paramsFloat[4] << " && y <= " << paramsFloat[5] << ") {\n";
string suffix = suffixes[k]; out << "x = (x - " << paramsFloat[2] << ")*" << paramsFloat[6] << ";\n";
out << "derivy[" << m << "] = da*derivy[" << m << "] + (3*c[" << (base+3) << "]" << suffix << "*db + 2*c[" << (base+2) << "]" << suffix << ")*db + c[" << (base+1) << "]" << suffix << ";\n"; out << "y = (y - " << paramsFloat[4] << ")*" << paramsFloat[7] << ";\n";
} out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n";
out << nodeNames[j] << " = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));\n"; out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n";
out << nodeNames[j] << " *= " << paramsFloat[10] << ";\n"; out << "int coeffIndex = 4*(s+" << paramsInt[0] << "*t);\n";
out << "float4 c[4];\n";
for (int j = 0; j < 4; j++)
out << "c[" << j << "] = " << functionNames[i].second << "[coeffIndex+" << j << "];\n";
out << "real da = x-s;\n";
out << "real db = y-t;\n";
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0 && derivOrder[1] == 0) {
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + ((c[3].w*db + c[3].z)*db + c[3].y)*db + c[3].x;\n";
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + ((c[2].w*db + c[2].z)*db + c[2].y)*db + c[2].x;\n";
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + ((c[1].w*db + c[1].z)*db + c[1].y)*db + c[1].x;\n";
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + ((c[0].w*db + c[0].z)*db + c[0].y)*db + c[0].x;\n";
}
else if (derivOrder[0] == 1 && derivOrder[1] == 0) {
out << nodeNames[j] << suffix << " = db*" << nodeNames[j] << suffix << " + (3.0f*c[3].w*da + 2.0f*c[2].w)*da + c[1].w;\n";
out << nodeNames[j] << suffix << " = db*" << nodeNames[j] << suffix << " + (3.0f*c[3].z*da + 2.0f*c[2].z)*da + c[1].z;\n";
out << nodeNames[j] << suffix << " = db*" << nodeNames[j] << suffix << " + (3.0f*c[3].y*da + 2.0f*c[2].y)*da + c[1].y;\n";
out << nodeNames[j] << suffix << " = db*" << nodeNames[j] << suffix << " + (3.0f*c[3].x*da + 2.0f*c[2].x)*da + c[1].x;\n";
out << nodeNames[j] << suffix << " *= " << paramsFloat[6] << ";\n";
}
else if (derivOrder[0] == 0 && derivOrder[1] == 1) {
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + (3.0f*c[3].w*db + 2.0f*c[3].z)*db + c[3].y;\n";
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + (3.0f*c[2].w*db + 2.0f*c[2].z)*db + c[2].y;\n";
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + (3.0f*c[1].w*db + 2.0f*c[1].z)*db + c[1].y;\n";
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + (3.0f*c[0].w*db + 2.0f*c[0].z)*db + c[0].y;\n";
out << nodeNames[j] << suffix << " *= " << paramsFloat[7] << ";\n";
}
else
throw OpenMMException("Unsupported derivative order for Continuous2DFunction");
} }
else if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 1) { out << "}\n";
out << "real derivz[4] = {0, 0, 0, 0};\n"; }
for (int k = 3; k >= 0; k--) else if (dynamic_cast<const Continuous3DFunction*>(functions[i]) != NULL) {
for (int m = 0; m < 4; m++) { out << "real x = " << getTempName(node.getChildren()[0], temps) << suffix <<";\n";
int base = k + 4*m; out << "real y = " << getTempName(node.getChildren()[1], temps) << suffix <<";\n";
out << "derivz[" << m << "] = db*derivz[" << m << "] + ((c[" << base << "].w*da + c[" << base << "].z)*da + c[" << base << "].y)*da + c[" << base << "].x;\n"; out << "real z = " << getTempName(node.getChildren()[2], temps) << suffix <<";\n";
} out << "if (x >= " << paramsFloat[3] << " && x <= " << paramsFloat[4] << " && y >= " << paramsFloat[5] << " && y <= " << paramsFloat[6] << " && z >= " << paramsFloat[7] << " && z <= " << paramsFloat[8] << ") {\n";
out << nodeNames[j] << " = derivz[1] + dc*(2*derivz[2] + dc*3*derivz[3]);\n"; out << "x = (x - " << paramsFloat[3] << ")*" << paramsFloat[9] << ";\n";
out << nodeNames[j] << " *= " << paramsFloat[11] << ";\n"; out << "y = (y - " << paramsFloat[5] << ")*" << paramsFloat[10] << ";\n";
out << "z = (z - " << paramsFloat[7] << ")*" << paramsFloat[11] << ";\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n";
out << "int u = min((int) floor(z), " << paramsInt[2] << "-1);\n";
out << "int coeffIndex = 16*(s+" << paramsInt[0] << "*(t+" << paramsInt[1] << "*u));\n";
out << "float4 c[16];\n";
for (int j = 0; j < 16; j++)
out << "c[" << j << "] = " << functionNames[i].second << "[coeffIndex+" << j << "];\n";
out << "real da = x-s;\n";
out << "real db = y-t;\n";
out << "real dc = z-u;\n";
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 0) {
out << "real value[4] = {0, 0, 0, 0};\n";
for (int k = 3; k >= 0; k--)
for (int m = 0; m < 4; m++) {
int base = k + 4*m;
out << "value[" << m << "] = db*value[" << m << "] + ((c[" << base << "].w*da + c[" << base << "].z)*da + c[" << base << "].y)*da + c[" << base << "].x;\n";
}
out << nodeNames[j] << suffix << " = value[0] + dc*(value[1] + dc*(value[2] + dc*value[3]));\n";
}
else if (derivOrder[0] == 1 && derivOrder[1] == 0 && derivOrder[2] == 0) {
out << "real derivx[4] = {0, 0, 0, 0};\n";
for (int k = 3; k >= 0; k--)
for (int m = 0; m < 4; m++) {
int base = k + 4*m;
out << "derivx[" << m << "] = db*derivx[" << m << "] + (3*c[" << base << "].w*da + 2*c[" << base << "].z)*da + c[" << base << "].y;\n";
}
out << nodeNames[j] << suffix << " = derivx[0] + dc*(derivx[1] + dc*(derivx[2] + dc*derivx[3]));\n";
out << nodeNames[j] << suffix << " *= " << paramsFloat[9] << ";\n";
}
else if (derivOrder[0] == 0 && derivOrder[1] == 1 && derivOrder[2] == 0) {
const string suffixes[] = {".x", ".y", ".z", ".w"};
out << "real derivy[4] = {0, 0, 0, 0};\n";
for (int k = 3; k >= 0; k--)
for (int m = 0; m < 4; m++) {
int base = 4*m;
string suffix = suffixes[k];
out << "derivy[" << m << "] = da*derivy[" << m << "] + (3*c[" << (base+3) << "]" << suffix << "*db + 2*c[" << (base+2) << "]" << suffix << ")*db + c[" << (base+1) << "]" << suffix << ";\n";
}
out << nodeNames[j] << suffix << " = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));\n";
out << nodeNames[j] << suffix << " *= " << paramsFloat[10] << ";\n";
}
else if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 1) {
out << "real derivz[4] = {0, 0, 0, 0};\n";
for (int k = 3; k >= 0; k--)
for (int m = 0; m < 4; m++) {
int base = k + 4*m;
out << "derivz[" << m << "] = db*derivz[" << m << "] + ((c[" << base << "].w*da + c[" << base << "].z)*da + c[" << base << "].y)*da + c[" << base << "].x;\n";
}
out << nodeNames[j] << suffix << " = derivz[1] + dc*(2*derivz[2] + dc*3*derivz[3]);\n";
out << nodeNames[j] << suffix << " *= " << paramsFloat[11] << ";\n";
}
else
throw OpenMMException("Unsupported derivative order for Continuous3DFunction");
} }
else out << "}\n";
throw OpenMMException("Unsupported derivative order for Continuous3DFunction");
} }
out << "}\n"; else if (dynamic_cast<const Discrete1DFunction*>(functions[i]) != NULL) {
} for (int j = 0; j < nodes.size(); j++) {
else if (dynamic_cast<const Discrete1DFunction*>(functions[i]) != NULL) { const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
for (int j = 0; j < nodes.size(); j++) { if (derivOrder[0] == 0) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder(); out << "real x = " << getTempName(node.getChildren()[0], temps) << suffix <<";\n";
if (derivOrder[0] == 0) { out << "if (x >= 0 && x < " << paramsInt[0] << ") {\n";
out << "real x = " << getTempName(node.getChildren()[0], temps) << ";\n"; out << "int index = (int) floor(x+0.5f);\n";
out << "if (x >= 0 && x < " << paramsInt[0] << ") {\n"; out << nodeNames[j] << suffix << " = " << functionNames[i].second << "[index];\n";
out << "int index = (int) floor(x+0.5f);\n"; out << "}\n";
out << nodeNames[j] << " = " << functionNames[i].second << "[index];\n"; }
out << "}\n";
} }
} }
} else if (dynamic_cast<const Discrete2DFunction*>(functions[i]) != NULL) {
else if (dynamic_cast<const Discrete2DFunction*>(functions[i]) != NULL) { for (int j = 0; j < nodes.size(); j++) {
for (int j = 0; j < nodes.size(); j++) { const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder(); if (derivOrder[0] == 0 && derivOrder[1] == 0) {
if (derivOrder[0] == 0 && derivOrder[1] == 0) { out << "int x = (int) floor(" << getTempName(node.getChildren()[0], temps) << suffix <<"+0.5f);\n";
out << "int x = (int) floor(" << getTempName(node.getChildren()[0], temps) << "+0.5f);\n"; out << "int y = (int) floor(" << getTempName(node.getChildren()[1], temps) << suffix <<"+0.5f);\n";
out << "int y = (int) floor(" << getTempName(node.getChildren()[1], temps) << "+0.5f);\n"; out << "int xsize = " << paramsInt[0] << ";\n";
out << "int xsize = " << paramsInt[0] << ";\n"; out << "int ysize = " << paramsInt[1] << ";\n";
out << "int ysize = " << paramsInt[1] << ";\n"; out << "int index = x+y*xsize;\n";
out << "int index = x+y*xsize;\n"; out << "if (index >= 0 && index < xsize*ysize)\n";
out << "if (index >= 0 && index < xsize*ysize)\n"; out << nodeNames[j] << suffix << " = " << functionNames[i].second << "[index];\n";
out << nodeNames[j] << " = " << functionNames[i].second << "[index];\n"; }
} }
} }
} else if (dynamic_cast<const Discrete3DFunction*>(functions[i]) != NULL) {
else if (dynamic_cast<const Discrete3DFunction*>(functions[i]) != NULL) { for (int j = 0; j < nodes.size(); j++) {
for (int j = 0; j < nodes.size(); j++) { const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder(); if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 0) {
if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 0) { out << "int x = (int) floor(" << getTempName(node.getChildren()[0], temps) << suffix <<"+0.5f);\n";
out << "int x = (int) floor(" << getTempName(node.getChildren()[0], temps) << "+0.5f);\n"; out << "int y = (int) floor(" << getTempName(node.getChildren()[1], temps) << suffix <<"+0.5f);\n";
out << "int y = (int) floor(" << getTempName(node.getChildren()[1], temps) << "+0.5f);\n"; out << "int z = (int) floor(" << getTempName(node.getChildren()[2], temps) << suffix <<"+0.5f);\n";
out << "int z = (int) floor(" << getTempName(node.getChildren()[2], temps) << "+0.5f);\n"; out << "int xsize = " << paramsInt[0] << ";\n";
out << "int xsize = " << paramsInt[0] << ";\n"; out << "int ysize = " << paramsInt[1] << ";\n";
out << "int ysize = " << paramsInt[1] << ";\n"; out << "int zsize = " << paramsInt[2] << ";\n";
out << "int zsize = " << paramsInt[2] << ";\n"; out << "int index = x+(y+z*ysize)*xsize;\n";
out << "int index = x+(y+z*ysize)*xsize;\n"; out << "if (index >= 0 && index < xsize*ysize*zsize)\n";
out << "if (index >= 0 && index < xsize*ysize*zsize)\n"; out << nodeNames[j] << suffix << " = " << functionNames[i].second << "[index];\n";
out << nodeNames[j] << " = " << functionNames[i].second << "[index];\n"; }
} }
} }
out << "}\n";
} }
} }
out << "}"; out << "}";
......
...@@ -7501,7 +7501,7 @@ double OpenCLIntegrateVariableLangevinStepKernel::computeKineticEnergy(ContextIm ...@@ -7501,7 +7501,7 @@ double OpenCLIntegrateVariableLangevinStepKernel::computeKineticEnergy(ContextIm
class OpenCLIntegrateCustomStepKernel::ReorderListener : public OpenCLContext::ReorderListener { class OpenCLIntegrateCustomStepKernel::ReorderListener : public OpenCLContext::ReorderListener {
public: public:
ReorderListener(OpenCLContext& cl, OpenCLParameterSet& perDofValues, vector<vector<cl_float> >& localPerDofValuesFloat, vector<vector<cl_double> >& localPerDofValuesDouble, bool& deviceValuesAreCurrent) : ReorderListener(OpenCLContext& cl, vector<OpenCLArray>& perDofValues, vector<vector<mm_float4> >& localPerDofValuesFloat, vector<vector<mm_double4> >& localPerDofValuesDouble, vector<bool>& deviceValuesAreCurrent) :
cl(cl), perDofValues(perDofValues), localPerDofValuesFloat(localPerDofValuesFloat), localPerDofValuesDouble(localPerDofValuesDouble), deviceValuesAreCurrent(deviceValuesAreCurrent) { cl(cl), perDofValues(perDofValues), localPerDofValuesFloat(localPerDofValuesFloat), localPerDofValuesDouble(localPerDofValuesDouble), deviceValuesAreCurrent(deviceValuesAreCurrent) {
int numAtoms = cl.getNumAtoms(); int numAtoms = cl.getNumAtoms();
lastAtomOrder.resize(numAtoms); lastAtomOrder.resize(numAtoms);
...@@ -7511,52 +7511,42 @@ public: ...@@ -7511,52 +7511,42 @@ public:
void execute() { void execute() {
// Reorder the per-DOF variables to reflect the new atom order. // Reorder the per-DOF variables to reflect the new atom order.
if (perDofValues.getNumParameters() == 0) if (perDofValues.size() == 0)
return; return;
int numAtoms = cl.getNumAtoms(); int numAtoms = cl.getNumAtoms();
const vector<int>& order = cl.getAtomIndex(); const vector<int>& order = cl.getAtomIndex();
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) { for (int index = 0; index < perDofValues.size(); index++) {
if (deviceValuesAreCurrent) if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
perDofValues.getParameterValues(localPerDofValuesDouble); if (deviceValuesAreCurrent[index])
vector<vector<cl_double> > swap(3*numAtoms); perDofValues[index].download(localPerDofValuesDouble[index]);
for (int i = 0; i < numAtoms; i++) { vector<mm_double4> swap(numAtoms);
swap[3*lastAtomOrder[i]] = localPerDofValuesDouble[3*i]; for (int i = 0; i < numAtoms; i++)
swap[3*lastAtomOrder[i]+1] = localPerDofValuesDouble[3*i+1]; swap[lastAtomOrder[i]] = localPerDofValuesDouble[index][i];
swap[3*lastAtomOrder[i]+2] = localPerDofValuesDouble[3*i+2]; for (int i = 0; i < numAtoms; i++)
} localPerDofValuesDouble[index][i] = swap[order[i]];
for (int i = 0; i < numAtoms; i++) { perDofValues[index].upload(localPerDofValuesDouble[index]);
localPerDofValuesDouble[3*i] = swap[3*order[i]];
localPerDofValuesDouble[3*i+1] = swap[3*order[i]+1];
localPerDofValuesDouble[3*i+2] = swap[3*order[i]+2];
}
perDofValues.setParameterValues(localPerDofValuesDouble);
}
else {
if (deviceValuesAreCurrent)
perDofValues.getParameterValues(localPerDofValuesFloat);
vector<vector<cl_float> > swap(3*numAtoms);
for (int i = 0; i < numAtoms; i++) {
swap[3*lastAtomOrder[i]] = localPerDofValuesFloat[3*i];
swap[3*lastAtomOrder[i]+1] = localPerDofValuesFloat[3*i+1];
swap[3*lastAtomOrder[i]+2] = localPerDofValuesFloat[3*i+2];
} }
for (int i = 0; i < numAtoms; i++) { else {
localPerDofValuesFloat[3*i] = swap[3*order[i]]; if (deviceValuesAreCurrent[index])
localPerDofValuesFloat[3*i+1] = swap[3*order[i]+1]; perDofValues[index].download(localPerDofValuesFloat[index]);
localPerDofValuesFloat[3*i+2] = swap[3*order[i]+2]; vector<mm_float4> swap(numAtoms);
for (int i = 0; i < numAtoms; i++)
swap[lastAtomOrder[i]] = localPerDofValuesFloat[index][i];
for (int i = 0; i < numAtoms; i++)
localPerDofValuesFloat[index][i] = swap[order[i]];
perDofValues[index].upload(localPerDofValuesFloat[index]);
} }
perDofValues.setParameterValues(localPerDofValuesFloat); deviceValuesAreCurrent[index] = true;
} }
for (int i = 0; i < numAtoms; i++) for (int i = 0; i < numAtoms; i++)
lastAtomOrder[i] = order[i]; lastAtomOrder[i] = order[i];
deviceValuesAreCurrent = true;
} }
private: private:
OpenCLContext& cl; OpenCLContext& cl;
OpenCLParameterSet& perDofValues; vector<OpenCLArray>& perDofValues;
vector<vector<cl_float> >& localPerDofValuesFloat; vector<vector<mm_float4> >& localPerDofValuesFloat;
vector<vector<cl_double> >& localPerDofValuesDouble; vector<vector<mm_double4> >& localPerDofValuesDouble;
bool& deviceValuesAreCurrent; vector<bool>& deviceValuesAreCurrent;
vector<int> lastAtomOrder; vector<int> lastAtomOrder;
}; };
...@@ -7581,47 +7571,47 @@ private: ...@@ -7581,47 +7571,47 @@ private:
string param; string param;
}; };
OpenCLIntegrateCustomStepKernel::~OpenCLIntegrateCustomStepKernel() {
if (perDofValues != NULL)
delete perDofValues;
}
void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) { void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const CustomIntegrator& integrator) {
cl.getPlatformData().initializeContexts(system); cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
numGlobalVariables = integrator.getNumGlobalVariables(); numGlobalVariables = integrator.getNumGlobalVariables();
int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
sumBuffer.initialize(cl, ((3*system.getNumParticles()+3)/4)*4, elementSize, "sumBuffer"); sumBuffer.initialize(cl, system.getNumParticles(), elementSize, "sumBuffer");
summedValue.initialize(cl, 1, elementSize, "summedValue"); summedValue.initialize(cl, 1, elementSize, "summedValue");
perDofValues = new OpenCLParameterSet(cl, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables", false, cl.getUseDoublePrecision() || cl.getUseMixedPrecision()); perDofValues.resize(integrator.getNumPerDofVariables());
cl.addReorderListener(new ReorderListener(cl, *perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent)); localPerDofValuesFloat.resize(perDofValues.size());
localPerDofValuesDouble.resize(perDofValues.size());
for (int i = 0; i < perDofValues.size(); i++)
perDofValues[i].initialize(cl, system.getNumParticles(), 4*elementSize, "perDofVariables");
localValuesAreCurrent.resize(integrator.getNumPerDofVariables(), false);
deviceValuesAreCurrent.resize(integrator.getNumPerDofVariables(), false);
cl.addReorderListener(new ReorderListener(cl, perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent));
SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed()); SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
} }
string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, CustomIntegrator& integrator,
const string& forceName, const string& energyName, vector<const TabulatedFunction*>& functions, vector<pair<string, string> >& functionNames) { const string& forceName, const string& energyName, vector<const TabulatedFunction*>& functions, vector<pair<string, string> >& functionNames) {
const string suffixes[] = {".x", ".y", ".z"}; string tempType = (cl.getSupportsDoublePrecision() ? "double3" : "float3");
string suffix = suffixes[component];
map<string, Lepton::ParsedExpression> expressions; map<string, Lepton::ParsedExpression> expressions;
if (variable == "x") if (variable == "x")
expressions["position"+suffix+" = "] = expr; expressions["position.xyz = "] = expr;
else if (variable == "v") else if (variable == "v")
expressions["velocity"+suffix+" = "] = expr; expressions["velocity.xyz = "] = expr;
else if (variable == "") else if (variable == "")
expressions["sum[3*index+"+cl.intToString(component)+"] = "] = expr; expressions[tempType+" tempSum = "] = expr;
else { else {
for (int i = 0; i < integrator.getNumPerDofVariables(); i++) for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
if (variable == integrator.getPerDofVariableName(i)) if (variable == integrator.getPerDofVariableName(i))
expressions["perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i)+" = "] = expr; expressions["perDof"+cl.intToString(i)+" = "] = expr;
} }
if (expressions.size() == 0) if (expressions.size() == 0)
throw OpenMMException("Unknown per-DOF variable: "+variable); throw OpenMMException("Unknown per-DOF variable: "+variable);
map<string, string> variables; map<string, string> variables;
variables["x"] = "position"+suffix; variables["x"] = "position.xyz";
variables["v"] = "velocity"+suffix; variables["v"] = "velocity.xyz";
variables[forceName] = "f"+suffix; variables[forceName] = "f.xyz";
variables["gaussian"] = "gaussian"+suffix; variables["gaussian"] = "gaussian.xyz";
variables["uniform"] = "uniform"+suffix; variables["uniform"] = "uniform.xyz";
variables["m"] = "mass"; variables["m"] = "mass";
variables["dt"] = "stepSize"; variables["dt"] = "stepSize";
if (energyName != "") if (energyName != "")
...@@ -7629,15 +7619,17 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va ...@@ -7629,15 +7619,17 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
for (int i = 0; i < integrator.getNumGlobalVariables(); i++) for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(globalVariableIndex[i])+"]"; variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(globalVariableIndex[i])+"]";
for (int i = 0; i < integrator.getNumPerDofVariables(); i++) for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i); variables[integrator.getPerDofVariableName(i)] = "perDof"+cl.intToString(i);
for (int i = 0; i < (int) parameterNames.size(); i++) for (int i = 0; i < (int) parameterNames.size(); i++)
variables[parameterNames[i]] = "globals["+cl.intToString(parameterVariableIndex[i])+"]"; variables[parameterNames[i]] = "globals["+cl.intToString(parameterVariableIndex[i])+"]";
string tempType = (cl.getSupportsDoublePrecision() ? "double" : "float");
vector<pair<ExpressionTreeNode, string> > variableNodes; vector<pair<ExpressionTreeNode, string> > variableNodes;
findExpressionsForDerivs(expr.getRootNode(), variableNodes); findExpressionsForDerivs(expr.getRootNode(), variableNodes);
for (auto& var : variables) for (auto& var : variables)
variableNodes.push_back(make_pair(ExpressionTreeNode(new Operation::Variable(var.first)), var.second)); variableNodes.push_back(make_pair(ExpressionTreeNode(new Operation::Variable(var.first)), var.second));
return cl.getExpressionUtilities().createExpressions(expressions, variableNodes, functions, functionNames, "temp"+cl.intToString(component)+"_", tempType); string result = cl.getExpressionUtilities().createExpressions(expressions, variableNodes, functions, functionNames, "temp", tempType);
if (variable == "")
result += "sum[index] = tempSum.x+tempSum.y+tempSum.z;\n";
return result;
} }
void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) { void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) {
...@@ -7645,6 +7637,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7645,6 +7637,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
int numAtoms = cl.getNumAtoms(); int numAtoms = cl.getNumAtoms();
int numSteps = integrator.getNumComputations(); int numSteps = integrator.getNumComputations();
bool useDouble = cl.getUseDoublePrecision() || cl.getUseMixedPrecision(); bool useDouble = cl.getUseDoublePrecision() || cl.getUseMixedPrecision();
string tempType = (useDouble ? "double3" : "float3");
string perDofType = (useDouble ? "double4" : "float4");
if (!hasInitializedKernels) { if (!hasInitializedKernels) {
hasInitializedKernels = true; hasInitializedKernels = true;
...@@ -7854,12 +7848,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7854,12 +7848,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
// Compute a per-DOF value. // Compute a per-DOF value.
stringstream compute; stringstream compute;
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) { for (int i = 0; i < perDofValues.size(); i++)
const OpenCLNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i]; compute << tempType<<" perDof"<<cl.intToString(i)<<" = perDofValues"<<cl.intToString(i)<<"[index].xyz;\n";
compute << buffer.getType()<<" perDofx"<<cl.intToString(i+1)<<" = perDofValues"<<cl.intToString(i+1)<<"[3*index];\n";
compute << buffer.getType()<<" perDofy"<<cl.intToString(i+1)<<" = perDofValues"<<cl.intToString(i+1)<<"[3*index+1];\n";
compute << buffer.getType()<<" perDofz"<<cl.intToString(i+1)<<" = perDofValues"<<cl.intToString(i+1)<<"[3*index+2];\n";
}
int numGaussian = 0, numUniform = 0; int numGaussian = 0, numUniform = 0;
for (int j = step; j < numSteps && (j == step || merged[j]); j++) { for (int j = step; j < numSteps && (j == step || merged[j]); j++) {
numGaussian += numAtoms*usesVariable(expression[j][0], "gaussian"); numGaussian += numAtoms*usesVariable(expression[j][0], "gaussian");
...@@ -7869,8 +7859,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7869,8 +7859,7 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
compute << "float4 gaussian = gaussianValues[gaussianIndex+index];\n"; compute << "float4 gaussian = gaussianValues[gaussianIndex+index];\n";
if (numUniform > 0) if (numUniform > 0)
compute << "float4 uniform = uniformValues[uniformIndex+index];\n"; compute << "float4 uniform = uniformValues[uniformIndex+index];\n";
for (int i = 0; i < 3; i++) compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], integrator, forceName[j], energyName[j], functionList, functionNames);
compute << createPerDofComputation(stepType[j] == CustomIntegrator::ComputePerDof ? variable[j] : "", expression[j][0], i, integrator, forceName[j], energyName[j], functionList, functionNames);
if (variable[j] == "x") { if (variable[j] == "x") {
if (storePosAsDelta[j]) { if (storePosAsDelta[j]) {
if (cl.getSupportsDoublePrecision()) if (cl.getSupportsDoublePrecision())
...@@ -7884,12 +7873,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7884,12 +7873,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
else if (variable[j] == "v") else if (variable[j] == "v")
compute << "velm[index] = convert_mixed4(velocity);\n"; compute << "velm[index] = convert_mixed4(velocity);\n";
else { else {
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) { for (int i = 0; i < perDofValues.size(); i++)
const OpenCLNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i]; compute << "perDofValues"<<cl.intToString(i)<<"[index] = ("<<perDofType<<") (perDof"<<cl.intToString(i)<<", 0);\n";
compute << "perDofValues"<<cl.intToString(i+1)<<"[3*index] = perDofx"<<cl.intToString(i+1)<<";\n";
compute << "perDofValues"<<cl.intToString(i+1)<<"[3*index+1] = perDofy"<<cl.intToString(i+1)<<";\n";
compute << "perDofValues"<<cl.intToString(i+1)<<"[3*index+2] = perDofz"<<cl.intToString(i+1)<<";\n";
}
} }
if (numGaussian > 0) if (numGaussian > 0)
compute << "gaussianIndex += NUM_ATOMS;\n"; compute << "gaussianIndex += NUM_ATOMS;\n";
...@@ -7900,10 +7885,9 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7900,10 +7885,9 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
map<string, string> replacements; map<string, string> replacements;
replacements["COMPUTE_STEP"] = compute.str(); replacements["COMPUTE_STEP"] = compute.str();
stringstream args; stringstream args;
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) { for (int i = 0; i < perDofValues.size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i]; string valueName = "perDofValues"+cl.intToString(i);
string valueName = "perDofValues"+cl.intToString(i+1); args << ", __global " << perDofType << "* restrict " << valueName;
args << ", __global " << buffer.getType() << "* restrict " << valueName;
} }
for (int i = 0; i < (int) tableTypes.size(); i++) for (int i = 0; i < (int) tableTypes.size(); i++)
args << ", __global const " << tableTypes[i]<< "* restrict table" << i; args << ", __global const " << tableTypes[i]<< "* restrict table" << i;
...@@ -7928,8 +7912,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7928,8 +7912,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
kernel.setArg<cl::Buffer>(index++, sumBuffer.getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, sumBuffer.getDeviceBuffer());
index += 4; index += 4;
kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs.getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs.getDeviceBuffer());
for (auto& buffer : perDofValues->getBuffers()) for (auto& array : perDofValues)
kernel.setArg<cl::Memory>(index++, buffer.getMemory()); kernel.setArg<cl::Memory>(index++, array.getDeviceBuffer());
for (auto& array : tabulatedFunctions) for (auto& array : tabulatedFunctions)
kernel.setArg<cl::Buffer>(index++, array.getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, array.getDeviceBuffer());
if (stepType[step] == CustomIntegrator::ComputeSum) { if (stepType[step] == CustomIntegrator::ComputeSum) {
...@@ -7986,22 +7970,16 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -7986,22 +7970,16 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
// Create the kernel for computing kinetic energy. // Create the kernel for computing kinetic energy.
stringstream computeKE; stringstream computeKE;
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) { for (int i = 0; i < perDofValues.size(); i++)
const OpenCLNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i]; computeKE << tempType<<" perDof"<<cl.intToString(i)<<" = perDofValues"<<cl.intToString(i)<<"[index].xyz;\n";
computeKE << buffer.getType()<<" perDofx"<<cl.intToString(i+1)<<" = perDofValues"<<cl.intToString(i+1)<<"[3*index];\n";
computeKE << buffer.getType()<<" perDofy"<<cl.intToString(i+1)<<" = perDofValues"<<cl.intToString(i+1)<<"[3*index+1];\n";
computeKE << buffer.getType()<<" perDofz"<<cl.intToString(i+1)<<" = perDofValues"<<cl.intToString(i+1)<<"[3*index+2];\n";
}
Lepton::ParsedExpression keExpression = Lepton::Parser::parse(integrator.getKineticEnergyExpression()).optimize(); Lepton::ParsedExpression keExpression = Lepton::Parser::parse(integrator.getKineticEnergyExpression()).optimize();
for (int i = 0; i < 3; i++) computeKE << createPerDofComputation("", keExpression, integrator, "f", "", functionList, functionNames);
computeKE << createPerDofComputation("", keExpression, i, integrator, "f", "", functionList, functionNames);
map<string, string> replacements; map<string, string> replacements;
replacements["COMPUTE_STEP"] = computeKE.str(); replacements["COMPUTE_STEP"] = computeKE.str();
stringstream args; stringstream args;
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) { for (int i = 0; i < perDofValues.size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i]; string valueName = "perDofValues"+cl.intToString(i);
string valueName = "perDofValues"+cl.intToString(i+1); args << ", __global " << perDofType << "* restrict " << valueName;
args << ", __global " << buffer.getType() << "* restrict " << valueName;
} }
for (int i = 0; i < (int) tableTypes.size(); i++) for (int i = 0; i < (int) tableTypes.size(); i++)
args << ", __global const " << tableTypes[i]<< "* restrict table" << i; args << ", __global const " << tableTypes[i]<< "* restrict table" << i;
...@@ -8026,8 +8004,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -8026,8 +8004,8 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
else else
kineticEnergyKernel.setArg<cl_float>(index++, 0.0f); kineticEnergyKernel.setArg<cl_float>(index++, 0.0f);
kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs.getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, perDofEnergyParamDerivs.getDeviceBuffer());
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) for (auto& array : perDofValues)
kineticEnergyKernel.setArg<cl::Memory>(index++, perDofValues->getBuffers()[i].getMemory()); kineticEnergyKernel.setArg<cl::Memory>(index++, array.getDeviceBuffer());
for (auto& array : tabulatedFunctions) for (auto& array : tabulatedFunctions)
kineticEnergyKernel.setArg<cl::Buffer>(index++, array.getDeviceBuffer()); kineticEnergyKernel.setArg<cl::Buffer>(index++, array.getDeviceBuffer());
keNeedsForce = usesVariable(keExpression, "f"); keNeedsForce = usesVariable(keExpression, "f");
...@@ -8049,14 +8027,16 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context ...@@ -8049,14 +8027,16 @@ void OpenCLIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context
// Make sure all values (variables, parameters, etc.) are up to date. // Make sure all values (variables, parameters, etc.) are up to date.
if (!deviceValuesAreCurrent) { for (int i = 0; i < perDofValues.size(); i++) {
if (useDouble) if (!deviceValuesAreCurrent[i]) {
perDofValues->setParameterValues(localPerDofValuesDouble); if (useDouble)
else perDofValues[i].upload(localPerDofValuesDouble[i]);
perDofValues->setParameterValues(localPerDofValuesFloat); else
deviceValuesAreCurrent = true; perDofValues[i].upload(localPerDofValuesFloat[i]);
deviceValuesAreCurrent[i] = true;
}
localValuesAreCurrent[i] = false;
} }
localValuesAreCurrent = false;
double stepSize = integrator.getStepSize(); double stepSize = integrator.getStepSize();
recordGlobalValue(stepSize, GlobalTarget(DT, dtVariableIndex), integrator); recordGlobalValue(stepSize, GlobalTarget(DT, dtVariableIndex), integrator);
for (int i = 0; i < (int) parameterNames.size(); i++) { for (int i = 0; i < (int) parameterNames.size(); i++) {
...@@ -8388,49 +8368,46 @@ void OpenCLIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, c ...@@ -8388,49 +8368,46 @@ void OpenCLIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, c
} }
void OpenCLIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int variable, vector<Vec3>& values) const { void OpenCLIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int variable, vector<Vec3>& values) const {
values.resize(perDofValues->getNumObjects()/3); values.resize(perDofValues[variable].getSize());
const vector<int>& order = cl.getAtomIndex(); const vector<int>& order = cl.getAtomIndex();
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) { if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
if (!localValuesAreCurrent) { if (!localValuesAreCurrent[variable]) {
perDofValues->getParameterValues(localPerDofValuesDouble); perDofValues[variable].download(localPerDofValuesDouble[variable]);
localValuesAreCurrent = true; localValuesAreCurrent[variable] = true;
}
for (int i = 0; i < (int) values.size(); i++) {
values[order[i]][0] = localPerDofValuesDouble[variable][i].x;
values[order[i]][1] = localPerDofValuesDouble[variable][i].y;
values[order[i]][2] = localPerDofValuesDouble[variable][i].z;
} }
for (int i = 0; i < (int) values.size(); i++)
for (int j = 0; j < 3; j++)
values[order[i]][j] = localPerDofValuesDouble[3*i+j][variable];
} }
else { else {
if (!localValuesAreCurrent) { if (!localValuesAreCurrent[variable]) {
perDofValues->getParameterValues(localPerDofValuesFloat); perDofValues[variable].download(localPerDofValuesFloat[variable]);
localValuesAreCurrent = true; localValuesAreCurrent[variable] = true;
}
for (int i = 0; i < (int) values.size(); i++) {
values[order[i]][0] = localPerDofValuesFloat[variable][i].x;
values[order[i]][1] = localPerDofValuesFloat[variable][i].y;
values[order[i]][2] = localPerDofValuesFloat[variable][i].z;
} }
for (int i = 0; i < (int) values.size(); i++)
for (int j = 0; j < 3; j++)
values[order[i]][j] = localPerDofValuesFloat[3*i+j][variable];
} }
} }
void OpenCLIntegrateCustomStepKernel::setPerDofVariable(ContextImpl& context, int variable, const vector<Vec3>& values) { void OpenCLIntegrateCustomStepKernel::setPerDofVariable(ContextImpl& context, int variable, const vector<Vec3>& values) {
const vector<int>& order = cl.getAtomIndex(); const vector<int>& order = cl.getAtomIndex();
localValuesAreCurrent[variable] = true;
deviceValuesAreCurrent[variable] = false;
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) { if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
if (!localValuesAreCurrent) { localPerDofValuesDouble[variable].resize(values.size());
perDofValues->getParameterValues(localPerDofValuesDouble);
localValuesAreCurrent = true;
}
for (int i = 0; i < (int) values.size(); i++) for (int i = 0; i < (int) values.size(); i++)
for (int j = 0; j < 3; j++) localPerDofValuesDouble[variable][i] = mm_double4(values[order[i]][0], values[order[i]][1], values[order[i]][2], 0);
localPerDofValuesDouble[3*i+j][variable] = values[order[i]][j];
} }
else { else {
if (!localValuesAreCurrent) { localPerDofValuesFloat[variable].resize(values.size());
perDofValues->getParameterValues(localPerDofValuesFloat);
localValuesAreCurrent = true;
}
for (int i = 0; i < (int) values.size(); i++) for (int i = 0; i < (int) values.size(); i++)
for (int j = 0; j < 3; j++) localPerDofValuesFloat[variable][i] = mm_float4(values[order[i]][0], values[order[i]][1], values[order[i]][2], 0);
localPerDofValuesFloat[3*i+j][variable] = (float) values[order[i]][j];
} }
deviceValuesAreCurrent = false;
} }
void OpenCLApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) { void OpenCLApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) {
......
...@@ -102,7 +102,6 @@ void testSingleBond() { ...@@ -102,7 +102,6 @@ void testSingleBond() {
*/ */
void testConstraints() { void testConstraints() {
const int numParticles = 8; const int numParticles = 8;
const double temp = 500.0;
System system; System system;
CustomIntegrator integrator(0.002); CustomIntegrator integrator(0.002);
integrator.addPerDofVariable("oldx", 0); integrator.addPerDofVariable("oldx", 0);
...@@ -1028,8 +1027,10 @@ void testVectorFunctions() { ...@@ -1028,8 +1027,10 @@ void testVectorFunctions() {
CustomIntegrator integrator(0.001); CustomIntegrator integrator(0.001);
integrator.addGlobalVariable("sumy", 0.0); integrator.addGlobalVariable("sumy", 0.0);
integrator.addPerDofVariable("angular", 0.0); integrator.addPerDofVariable("angular", 0.0);
integrator.addPerDofVariable("shuffle", 0.0);
integrator.addComputeSum("sumy", "x*vector(0, 1, 0)"); integrator.addComputeSum("sumy", "x*vector(0, 1, 0)");
integrator.addComputePerDof("angular", "cross(v, x)"); integrator.addComputePerDof("angular", "cross(v, x)");
integrator.addComputePerDof("shuffle", "dot(vector(_z(x), _x(x), _y(x)), v)");
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt); init_gen_rand(0, sfmt);
vector<Vec3> positions(numParticles); vector<Vec3> positions(numParticles);
...@@ -1047,10 +1048,12 @@ void testVectorFunctions() { ...@@ -1047,10 +1048,12 @@ void testVectorFunctions() {
// See if the expressions were computed correctly. // See if the expressions were computed correctly.
double sumy = 0; double sumy = 0;
vector<Vec3> values; vector<Vec3> angular, shuffle;
integrator.getPerDofVariable(0, values); integrator.getPerDofVariable(0, angular);
integrator.getPerDofVariable(1, shuffle);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL_VEC(velocities[i].cross(positions[i]), values[i], 1e-5); ASSERT_EQUAL_VEC(velocities[i].cross(positions[i]), angular[i], 1e-5);
ASSERT_EQUAL_VEC(Vec3(1, 1, 1)*velocities[i].dot(Vec3(positions[i][2], positions[i][0], positions[i][1])), shuffle[i], 1e-5);
sumy += positions[i][1]; sumy += positions[i][1];
} }
ASSERT_EQUAL_TOL(sumy, integrator.getGlobalVariable(0), 1e-5); ASSERT_EQUAL_TOL(sumy, integrator.getGlobalVariable(0), 1e-5);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment