Unverified Commit edbc8407 authored by peastman's avatar peastman Committed by GitHub
Browse files

Common compute framework to unify CUDA and OpenCL code (#2488)

* Began creating common compute framework to unify code between CUDA and OpenCL

* Began OpenCL implementation of common compute framework

* Common implementation of CMMotionRemover

* CUDA implementation of common compute interface

* Converted HarmonicBondForce to common compute API

* Converted standard bonded forces to common compute API

* Converted ExpressionUtilities to common compute API

* Created ComputeParameterSet

* Converted custom bonded forces to common compute API

* Converted CustomCentroidBondForce to common compute API

* Converted CustomManyParticleForce to common compute API

* Moved lots of duplicate code from CudaContext and OpenCLContext to ComputeContext

* Converted GayBerneForce to common compute API

* Removed obsolete kernels

* Converted verlet integrators to common compute API

* Converted Langevin and Brownian integrators to common compute API

* Converted CustomIntegrator to common compute API

* Converted CustomNonbondedForce to common compute API

* Removed uses of a deprecated API

* Fixed failing test cases

* Converted GBSAOBCForce to common compute API

* Began converting CustomGBForce to common compute API

* Finished converting CustomGBForce to common compute API

* Merged duplicated code in CudaIntegrationUtilities and OpenCLIntegrationUtilities

* Converted RMSDForce and AndersenThermostat to common compute API

* Converted CustomHbondForce to common compute API

* Merged scripts for encoding kernel sources

* Converted Drude plugin to common compute API

* Fixed errors in CMake scripts

* Attempt at fixing errors on Windows

* Added discussion of common compute API to developer guide

* Added Windows export macro for common classes

* Fixed error in CMMotionRemover

* Ubdated travis to newer Ubuntu version

* Fixed errors on CPU OpenCL

* Fixed Windows linking errors

* Added missing pragma for 32 bit atomics

* Replaced long long with mm_long

* More fixes to Windows linking

* Bug fix
parent 38beeefe
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLExpressionUtilities.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/SplineFitter.h"
#include "lepton/Operation.h"
using namespace OpenMM;
using namespace Lepton;
using namespace std;
OpenCLExpressionUtilities::OpenCLExpressionUtilities(OpenCLContext& context) : context(context), fp1(1), fp2(2), fp3(3), periodicDistance(6) {
}
string OpenCLExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const map<string, string>& variables,
const vector<const TabulatedFunction*>& functions, const vector<pair<string, string> >& functionNames, const string& prefix, const string& tempType) {
vector<pair<ExpressionTreeNode, string> > variableNodes;
for (map<string, string>::const_iterator iter = variables.begin(); iter != variables.end(); ++iter)
variableNodes.push_back(make_pair(ExpressionTreeNode(new Operation::Variable(iter->first)), iter->second));
return createExpressions(expressions, variableNodes, functions, functionNames, prefix, tempType);
}
string OpenCLExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const vector<pair<ExpressionTreeNode, string> >& variables,
const vector<const TabulatedFunction*>& functions, const vector<pair<string, string> >& functionNames, const string& prefix, const string& tempType) {
stringstream out;
vector<ParsedExpression> allExpressions;
for (map<string, ParsedExpression>::const_iterator iter = expressions.begin(); iter != expressions.end(); ++iter)
allExpressions.push_back(iter->second);
vector<pair<ExpressionTreeNode, string> > temps = variables;
vector<vector<double> > functionParams = computeFunctionParameters(functions);
for (map<string, ParsedExpression>::const_iterator iter = expressions.begin(); iter != expressions.end(); ++iter) {
processExpression(out, iter->second.getRootNode(), temps, functions, functionNames, prefix, functionParams, allExpressions, tempType);
out << iter->first << getTempName(iter->second.getRootNode(), temps) << ";\n";
}
return out.str();
}
void OpenCLExpressionUtilities::processExpression(stringstream& out, const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, string> >& temps,
const vector<const TabulatedFunction*>& functions, const vector<pair<string, string> >& functionNames, const string& prefix, const vector<vector<double> >& functionParams,
const vector<ParsedExpression>& allExpressions, const string& tempType) {
for (int i = 0; i < (int) temps.size(); i++)
if (temps[i].first == node)
return;
for (int i = 0; i < (int) node.getChildren().size(); i++)
processExpression(out, node.getChildren()[i], temps, functions, functionNames, prefix, functionParams, allExpressions, tempType);
string name = prefix+context.intToString(temps.size());
bool hasRecordedNode = false;
bool isDouble = (tempType[0] == 'd');
out << tempType << " " << name << " = ";
switch (node.getOperation().getId()) {
case Operation::CONSTANT:
out << context.doubleToString(dynamic_cast<const Operation::Constant*>(&node.getOperation())->getValue());
break;
case Operation::VARIABLE:
throw OpenMMException("Unknown variable in expression: "+node.getOperation().getName());
case Operation::CUSTOM:
{
out << "0.0f;\n";
temps.push_back(make_pair(node, name));
hasRecordedNode = true;
// If both the value and derivative of the function are needed, it's faster to calculate them both
// at once, so check to see if both are needed.
vector<const ExpressionTreeNode*> nodes;
nodes.push_back(&node);
for (int j = 0; j < (int) allExpressions.size(); j++)
findRelatedCustomFunctions(node, allExpressions[j].getRootNode(), nodes);
vector<string> nodeNames;
nodeNames.push_back(name);
for (int j = 1; j < (int) nodes.size(); j++) {
string name2 = prefix+context.intToString(temps.size());
out << tempType << " " << name2 << " = 0.0f;\n";
nodeNames.push_back(name2);
temps.push_back(make_pair(*nodes[j], name2));
}
out << "{\n";
if (node.getOperation().getName() == "periodicdistance") {
// This is the periodicdistance() function.
out << tempType << "3 periodicDistance_delta = (real3) (";
for (int i = 0; i < 3; i++) {
if (i > 0)
out << ", ";
out << getTempName(node.getChildren()[i], temps) << "-" << getTempName(node.getChildren()[i+3], temps);
}
out << ");\n";
out << "APPLY_PERIODIC_TO_DELTA(periodicDistance_delta)\n";
out << tempType << " periodicDistance_r2 = periodicDistance_delta.x*periodicDistance_delta.x + periodicDistance_delta.y*periodicDistance_delta.y + periodicDistance_delta.z*periodicDistance_delta.z;\n";
out << tempType << " periodicDistance_rinv = RSQRT(periodicDistance_r2);\n";
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
int argIndex = -1;
for (int k = 0; k < 6; k++) {
if (derivOrder[k] > 0) {
if (derivOrder[k] > 1 || argIndex != -1)
throw OpenMMException("Unsupported derivative of periodicdistance"); // Should be impossible for this to happen.
argIndex = k;
}
}
if (argIndex == -1)
out << nodeNames[j] << " = RECIP(periodicDistance_rinv);\n";
else if (argIndex == 0)
out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? periodicDistance_delta.x*periodicDistance_rinv : 0);\n";
else if (argIndex == 1)
out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? periodicDistance_delta.y*periodicDistance_rinv : 0);\n";
else if (argIndex == 2)
out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? periodicDistance_delta.z*periodicDistance_rinv : 0);\n";
else if (argIndex == 3)
out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? -periodicDistance_delta.x*periodicDistance_rinv : 0);\n";
else if (argIndex == 4)
out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? -periodicDistance_delta.y*periodicDistance_rinv : 0);\n";
else if (argIndex == 5)
out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? -periodicDistance_delta.z*periodicDistance_rinv : 0);\n";
}
}
else if (node.getOperation().getName() == "dot") {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
string child1 = getTempName(node.getChildren()[0], temps);
string child2 = getTempName(node.getChildren()[1], temps);
if (derivOrder[0] == 0 && derivOrder[1] == 0)
out << nodeNames[j] << " = dot(" << child1 << ", " << child2 << ");\n";
else
throw OpenMMException("Unsupported derivative order for dot()");
}
}
else if (node.getOperation().getName() == "cross") {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
string child1 = getTempName(node.getChildren()[0], temps);
string child2 = getTempName(node.getChildren()[1], temps);
if (derivOrder[0] == 0 && derivOrder[1] == 0)
out << nodeNames[j] << " = cross(" << child1 << ", " << child2 << ");\n";
else
throw OpenMMException("Unsupported derivative order for cross()");
}
}
else if (node.getOperation().getName() == "vector") {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 0) {
out << nodeNames[j] << ".x = " << getTempName(node.getChildren()[0], temps) << ".x;\n";
out << nodeNames[j] << ".y = " << getTempName(node.getChildren()[1], temps) << ".y;\n";
out << nodeNames[j] << ".z = " << getTempName(node.getChildren()[2], temps) << ".z;\n";
}
else if (derivOrder[0] == 1 && derivOrder[1] == 0 && derivOrder[2] == 0)
out << nodeNames[j] << ".x = 1;\n";
else if (derivOrder[0] == 0 && derivOrder[1] == 1 && derivOrder[2] == 0)
out << nodeNames[j] << ".y = 1;\n";
else if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 1)
out << nodeNames[j] << ".z = 1;\n";
}
}
else if (node.getOperation().getName() == "_x") {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0)
out << nodeNames[j] << " = " << getTempName(node.getChildren()[0], temps) << ".x;\n";
else
throw OpenMMException("Unsupported derivative order for _x()");
}
}
else if (node.getOperation().getName() == "_y") {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0)
out << nodeNames[j] << " = " << getTempName(node.getChildren()[0], temps) << ".y;\n";
else
throw OpenMMException("Unsupported derivative order for _y()");
}
}
else if (node.getOperation().getName() == "_z") {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0)
out << nodeNames[j] << " = " << getTempName(node.getChildren()[0], temps) << ".z;\n";
else
throw OpenMMException("Unsupported derivative order for _z()");
}
}
else {
// This is a tabulated function.
int i;
for (i = 0; i < (int) functionNames.size() && functionNames[i].first != node.getOperation().getName(); i++)
;
if (i == functionNames.size())
throw OpenMMException("Unknown function in expression: "+node.getOperation().getName());
vector<string> paramsFloat, paramsInt;
for (int j = 0; j < (int) functionParams[i].size(); j++) {
paramsFloat.push_back(context.doubleToString(functionParams[i][j]));
paramsInt.push_back(context.intToString((int) functionParams[i][j]));
}
vector<string> suffixes;
if (tempType[tempType.size()-1] == '3') {
suffixes.push_back(".x");
suffixes.push_back(".y");
suffixes.push_back(".z");
}
else {
suffixes.push_back("");
}
for (auto& suffix : suffixes) {
out << "{\n";
if (dynamic_cast<const Continuous1DFunction*>(functions[i]) != NULL) {
out << "real x = " << getTempName(node.getChildren()[0], temps) << suffix <<";\n";
out << "if (x >= " << paramsFloat[0] << " && x <= " << paramsFloat[1] << ") {\n";
out << "x = (x - " << paramsFloat[0] << ")*" << paramsFloat[2] << ";\n";
out << "int index = (int) (floor(x));\n";
out << "index = min(index, " << paramsInt[3] << ");\n";
out << "float4 coeff = " << functionNames[i].second << "[index];\n";
out << "real b = x-index;\n";
out << "real a = 1.0f-b;\n";
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0)
out << nodeNames[j] << suffix <<" = a*coeff.x+b*coeff.y+((a*a*a-a)*coeff.z+(b*b*b-b)*coeff.w)/(" << paramsFloat[2] << "*" << paramsFloat[2] << ");\n";
else
out << nodeNames[j] << suffix <<" = (coeff.y-coeff.x)*" << paramsFloat[2] << "+((1.0f-3.0f*a*a)*coeff.z+(3.0f*b*b-1.0f)*coeff.w)/" << paramsFloat[2] << ";\n";
}
out << "}\n";
}
else if (dynamic_cast<const Continuous2DFunction*>(functions[i]) != NULL) {
out << "real x = " << getTempName(node.getChildren()[0], temps) << suffix <<";\n";
out << "real y = " << getTempName(node.getChildren()[1], temps) << suffix <<";\n";
out << "if (x >= " << paramsFloat[2] << " && x <= " << paramsFloat[3] << " && y >= " << paramsFloat[4] << " && y <= " << paramsFloat[5] << ") {\n";
out << "x = (x - " << paramsFloat[2] << ")*" << paramsFloat[6] << ";\n";
out << "y = (y - " << paramsFloat[4] << ")*" << paramsFloat[7] << ";\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n";
out << "int coeffIndex = 4*(s+" << paramsInt[0] << "*t);\n";
out << "float4 c[4];\n";
for (int j = 0; j < 4; j++)
out << "c[" << j << "] = " << functionNames[i].second << "[coeffIndex+" << j << "];\n";
out << "real da = x-s;\n";
out << "real db = y-t;\n";
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0 && derivOrder[1] == 0) {
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + ((c[3].w*db + c[3].z)*db + c[3].y)*db + c[3].x;\n";
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + ((c[2].w*db + c[2].z)*db + c[2].y)*db + c[2].x;\n";
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + ((c[1].w*db + c[1].z)*db + c[1].y)*db + c[1].x;\n";
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + ((c[0].w*db + c[0].z)*db + c[0].y)*db + c[0].x;\n";
}
else if (derivOrder[0] == 1 && derivOrder[1] == 0) {
out << nodeNames[j] << suffix << " = db*" << nodeNames[j] << suffix << " + (3.0f*c[3].w*da + 2.0f*c[2].w)*da + c[1].w;\n";
out << nodeNames[j] << suffix << " = db*" << nodeNames[j] << suffix << " + (3.0f*c[3].z*da + 2.0f*c[2].z)*da + c[1].z;\n";
out << nodeNames[j] << suffix << " = db*" << nodeNames[j] << suffix << " + (3.0f*c[3].y*da + 2.0f*c[2].y)*da + c[1].y;\n";
out << nodeNames[j] << suffix << " = db*" << nodeNames[j] << suffix << " + (3.0f*c[3].x*da + 2.0f*c[2].x)*da + c[1].x;\n";
out << nodeNames[j] << suffix << " *= " << paramsFloat[6] << ";\n";
}
else if (derivOrder[0] == 0 && derivOrder[1] == 1) {
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + (3.0f*c[3].w*db + 2.0f*c[3].z)*db + c[3].y;\n";
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + (3.0f*c[2].w*db + 2.0f*c[2].z)*db + c[2].y;\n";
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + (3.0f*c[1].w*db + 2.0f*c[1].z)*db + c[1].y;\n";
out << nodeNames[j] << suffix << " = da*" << nodeNames[j] << suffix << " + (3.0f*c[0].w*db + 2.0f*c[0].z)*db + c[0].y;\n";
out << nodeNames[j] << suffix << " *= " << paramsFloat[7] << ";\n";
}
else
throw OpenMMException("Unsupported derivative order for Continuous2DFunction");
}
out << "}\n";
}
else if (dynamic_cast<const Continuous3DFunction*>(functions[i]) != NULL) {
out << "real x = " << getTempName(node.getChildren()[0], temps) << suffix <<";\n";
out << "real y = " << getTempName(node.getChildren()[1], temps) << suffix <<";\n";
out << "real z = " << getTempName(node.getChildren()[2], temps) << suffix <<";\n";
out << "if (x >= " << paramsFloat[3] << " && x <= " << paramsFloat[4] << " && y >= " << paramsFloat[5] << " && y <= " << paramsFloat[6] << " && z >= " << paramsFloat[7] << " && z <= " << paramsFloat[8] << ") {\n";
out << "x = (x - " << paramsFloat[3] << ")*" << paramsFloat[9] << ";\n";
out << "y = (y - " << paramsFloat[5] << ")*" << paramsFloat[10] << ";\n";
out << "z = (z - " << paramsFloat[7] << ")*" << paramsFloat[11] << ";\n";
out << "int s = min((int) floor(x), " << paramsInt[0] << "-1);\n";
out << "int t = min((int) floor(y), " << paramsInt[1] << "-1);\n";
out << "int u = min((int) floor(z), " << paramsInt[2] << "-1);\n";
out << "int coeffIndex = 16*(s+" << paramsInt[0] << "*(t+" << paramsInt[1] << "*u));\n";
out << "float4 c[16];\n";
for (int j = 0; j < 16; j++)
out << "c[" << j << "] = " << functionNames[i].second << "[coeffIndex+" << j << "];\n";
out << "real da = x-s;\n";
out << "real db = y-t;\n";
out << "real dc = z-u;\n";
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 0) {
out << "real value[4] = {0, 0, 0, 0};\n";
for (int k = 3; k >= 0; k--)
for (int m = 0; m < 4; m++) {
int base = k + 4*m;
out << "value[" << m << "] = db*value[" << m << "] + ((c[" << base << "].w*da + c[" << base << "].z)*da + c[" << base << "].y)*da + c[" << base << "].x;\n";
}
out << nodeNames[j] << suffix << " = value[0] + dc*(value[1] + dc*(value[2] + dc*value[3]));\n";
}
else if (derivOrder[0] == 1 && derivOrder[1] == 0 && derivOrder[2] == 0) {
out << "real derivx[4] = {0, 0, 0, 0};\n";
for (int k = 3; k >= 0; k--)
for (int m = 0; m < 4; m++) {
int base = k + 4*m;
out << "derivx[" << m << "] = db*derivx[" << m << "] + (3*c[" << base << "].w*da + 2*c[" << base << "].z)*da + c[" << base << "].y;\n";
}
out << nodeNames[j] << suffix << " = derivx[0] + dc*(derivx[1] + dc*(derivx[2] + dc*derivx[3]));\n";
out << nodeNames[j] << suffix << " *= " << paramsFloat[9] << ";\n";
}
else if (derivOrder[0] == 0 && derivOrder[1] == 1 && derivOrder[2] == 0) {
const string suffixes[] = {".x", ".y", ".z", ".w"};
out << "real derivy[4] = {0, 0, 0, 0};\n";
for (int k = 3; k >= 0; k--)
for (int m = 0; m < 4; m++) {
int base = 4*m;
string suffix = suffixes[k];
out << "derivy[" << m << "] = da*derivy[" << m << "] + (3*c[" << (base+3) << "]" << suffix << "*db + 2*c[" << (base+2) << "]" << suffix << ")*db + c[" << (base+1) << "]" << suffix << ";\n";
}
out << nodeNames[j] << suffix << " = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));\n";
out << nodeNames[j] << suffix << " *= " << paramsFloat[10] << ";\n";
}
else if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 1) {
out << "real derivz[4] = {0, 0, 0, 0};\n";
for (int k = 3; k >= 0; k--)
for (int m = 0; m < 4; m++) {
int base = k + 4*m;
out << "derivz[" << m << "] = db*derivz[" << m << "] + ((c[" << base << "].w*da + c[" << base << "].z)*da + c[" << base << "].y)*da + c[" << base << "].x;\n";
}
out << nodeNames[j] << suffix << " = derivz[1] + dc*(2*derivz[2] + dc*3*derivz[3]);\n";
out << nodeNames[j] << suffix << " *= " << paramsFloat[11] << ";\n";
}
else
throw OpenMMException("Unsupported derivative order for Continuous3DFunction");
}
out << "}\n";
}
else if (dynamic_cast<const Discrete1DFunction*>(functions[i]) != NULL) {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0) {
out << "real x = " << getTempName(node.getChildren()[0], temps) << suffix <<";\n";
out << "if (x >= 0 && x < " << paramsInt[0] << ") {\n";
out << "int index = (int) floor(x+0.5f);\n";
out << nodeNames[j] << suffix << " = " << functionNames[i].second << "[index];\n";
out << "}\n";
}
}
}
else if (dynamic_cast<const Discrete2DFunction*>(functions[i]) != NULL) {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0 && derivOrder[1] == 0) {
out << "int x = (int) floor(" << getTempName(node.getChildren()[0], temps) << suffix <<"+0.5f);\n";
out << "int y = (int) floor(" << getTempName(node.getChildren()[1], temps) << suffix <<"+0.5f);\n";
out << "int xsize = " << paramsInt[0] << ";\n";
out << "int ysize = " << paramsInt[1] << ";\n";
out << "int index = x+y*xsize;\n";
out << "if (index >= 0 && index < xsize*ysize)\n";
out << nodeNames[j] << suffix << " = " << functionNames[i].second << "[index];\n";
}
}
}
else if (dynamic_cast<const Discrete3DFunction*>(functions[i]) != NULL) {
for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 0) {
out << "int x = (int) floor(" << getTempName(node.getChildren()[0], temps) << suffix <<"+0.5f);\n";
out << "int y = (int) floor(" << getTempName(node.getChildren()[1], temps) << suffix <<"+0.5f);\n";
out << "int z = (int) floor(" << getTempName(node.getChildren()[2], temps) << suffix <<"+0.5f);\n";
out << "int xsize = " << paramsInt[0] << ";\n";
out << "int ysize = " << paramsInt[1] << ";\n";
out << "int zsize = " << paramsInt[2] << ";\n";
out << "int index = x+(y+z*ysize)*xsize;\n";
out << "if (index >= 0 && index < xsize*ysize*zsize)\n";
out << nodeNames[j] << suffix << " = " << functionNames[i].second << "[index];\n";
}
}
}
out << "}\n";
}
}
out << "}";
break;
}
case Operation::ADD:
out << getTempName(node.getChildren()[0], temps) << "+" << getTempName(node.getChildren()[1], temps);
break;
case Operation::SUBTRACT:
out << getTempName(node.getChildren()[0], temps) << "-" << getTempName(node.getChildren()[1], temps);
break;
case Operation::MULTIPLY:
out << getTempName(node.getChildren()[0], temps) << "*" << getTempName(node.getChildren()[1], temps);
break;
case Operation::DIVIDE:
{
bool haveReciprocal = false;
for (int i = 0; i < (int) temps.size(); i++)
if (temps[i].first.getOperation().getId() == Operation::RECIPROCAL && temps[i].first.getChildren()[0] == node.getChildren()[1]) {
haveReciprocal = true;
out << getTempName(node.getChildren()[0], temps) << "*" << temps[i].second;
}
if (!haveReciprocal)
out << getTempName(node.getChildren()[0], temps) << "/" << getTempName(node.getChildren()[1], temps);
break;
}
case Operation::POWER:
out << "pow((" << tempType << ") " << getTempName(node.getChildren()[0], temps) << ", (" << tempType << ") " << getTempName(node.getChildren()[1], temps) << ")";
break;
case Operation::NEGATE:
out << "-" << getTempName(node.getChildren()[0], temps);
break;
case Operation::SQRT:
out << (isDouble ? "sqrt(" : "SQRT(") << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::EXP:
out << (isDouble ? "exp(" : "EXP(") << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::LOG:
out << (isDouble ? "log(" : "LOG(") << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::SIN:
out << "sin(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::COS:
out << "cos(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::SEC:
out << "1.0f/cos(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::CSC:
out << "1.0f/sin(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::TAN:
out << "tan(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::COT:
out << "1.0f/tan(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ASIN:
out << "asin(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ACOS:
out << "acos(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ATAN:
out << "atan(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ATAN2:
out << "atan2(" << getTempName(node.getChildren()[0], temps) << ", " << getTempName(node.getChildren()[1], temps) << ")";
break;
case Operation::SINH:
out << "sinh(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::COSH:
out << "cosh(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::TANH:
out << "tanh(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ERF:
out << "erf(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ERFC:
out << "erfc(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::STEP:
out << getTempName(node.getChildren()[0], temps) << " >= 0.0f ? (" << tempType << ") 1 : (" << tempType << ") 0";
break;
case Operation::DELTA:
out << getTempName(node.getChildren()[0], temps) << " == 0.0f ? (" << tempType << ") 1 : (" << tempType << ") 0";
break;
case Operation::SQUARE:
{
string arg = getTempName(node.getChildren()[0], temps);
out << arg << "*" << arg;
break;
}
case Operation::CUBE:
{
string arg = getTempName(node.getChildren()[0], temps);
out << arg << "*" << arg << "*" << arg;
break;
}
case Operation::RECIPROCAL:
out << (isDouble ? "1.0/(" : "RECIP(") << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ADD_CONSTANT:
out << context.doubleToString(dynamic_cast<const Operation::AddConstant*>(&node.getOperation())->getValue()) << "+" << getTempName(node.getChildren()[0], temps);
break;
case Operation::MULTIPLY_CONSTANT:
out << context.doubleToString(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()) << "*" << getTempName(node.getChildren()[0], temps);
break;
case Operation::POWER_CONSTANT:
{
double exponent = dynamic_cast<const Operation::PowerConstant*>(&node.getOperation())->getValue();
if (exponent == 0.0)
out << "1.0f";
else if (exponent == (int) exponent) {
out << "0.0f;\n";
temps.push_back(make_pair(node, name));
hasRecordedNode = true;
// If multiple integral powers of the same base are needed, it's faster to calculate all of them
// at once, so check to see if others are also needed.
map<int, const ExpressionTreeNode*> powers;
powers[(int) exponent] = &node;
for (int j = 0; j < (int) allExpressions.size(); j++)
findRelatedPowers(node, allExpressions[j].getRootNode(), powers);
vector<int> exponents;
vector<string> names;
vector<bool> hasAssigned(powers.size(), false);
exponents.push_back((int) fabs(exponent));
names.push_back(name);
for (auto& power : powers) {
if (power.first != exponent) {
exponents.push_back(power.first >= 0 ? power.first : -power.first);
string name2 = prefix+context.intToString(temps.size());
names.push_back(name2);
temps.push_back(make_pair(*power.second, name2));
out << tempType << " " << name2 << " = 0.0f;\n";
}
}
out << "{\n";
out << "float multiplier = " << (exponent < 0.0 ? "1.0f/" : "") << getTempName(node.getChildren()[0], temps) << ";\n";
bool done = false;
while (!done) {
done = true;
for (int i = 0; i < (int) exponents.size(); i++) {
if (exponents[i]%2 == 1) {
if (!hasAssigned[i])
out << names[i] << " = multiplier;\n";
else
out << names[i] << " *= multiplier;\n";
hasAssigned[i] = true;
}
exponents[i] >>= 1;
if (exponents[i] != 0)
done = false;
}
if (!done)
out << "multiplier *= multiplier;\n";
}
out << "}";
}
else
out << "pow((" << tempType << ") " << getTempName(node.getChildren()[0], temps) << ", (" << tempType << ") " << context.doubleToString(exponent) << ")";
break;
}
case Operation::MIN:
out << "min((" << tempType << ") " << getTempName(node.getChildren()[0], temps) << ", (" << tempType << ") " << getTempName(node.getChildren()[1], temps) << ")";
break;
case Operation::MAX:
out << "max((" << tempType << ") " << getTempName(node.getChildren()[0], temps) << ", (" << tempType << ") " << getTempName(node.getChildren()[1], temps) << ")";
break;
case Operation::ABS:
out << "fabs(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::FLOOR:
out << "floor(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::CEIL:
out << "ceil(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::SELECT:
out << "(" << getTempName(node.getChildren()[0], temps) << " != 0 ? " << getTempName(node.getChildren()[1], temps) << " : " << getTempName(node.getChildren()[2], temps) << ")";
break;
default:
throw OpenMMException("Internal error: Unknown operation in user-defined expression: "+node.getOperation().getName());
}
out << ";\n";
if (!hasRecordedNode)
temps.push_back(make_pair(node, name));
}
string OpenCLExpressionUtilities::getTempName(const ExpressionTreeNode& node, const vector<pair<ExpressionTreeNode, string> >& temps) {
for (int i = 0; i < (int) temps.size(); i++)
if (temps[i].first == node)
return temps[i].second;
stringstream out;
out << "Internal error: No temporary variable for expression node: " << node;
throw OpenMMException(out.str());
}
void OpenCLExpressionUtilities::findRelatedCustomFunctions(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode,
vector<const Lepton::ExpressionTreeNode*>& nodes) {
if (searchNode.getOperation().getId() == Operation::CUSTOM && node.getOperation().getName() == searchNode.getOperation().getName()) {
// Make sure the arguments are identical.
for (int i = 0; i < (int) node.getChildren().size(); i++)
if (node.getChildren()[i] != searchNode.getChildren()[i])
return;
// See if we already have an identical node.
for (int i = 0; i < (int) nodes.size(); i++)
if (*nodes[i] == searchNode)
return;
// Add the node.
nodes.push_back(&searchNode);
}
else
for (int i = 0; i < (int) searchNode.getChildren().size(); i++)
findRelatedCustomFunctions(node, searchNode.getChildren()[i], nodes);
}
void OpenCLExpressionUtilities::findRelatedPowers(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode, map<int, const ExpressionTreeNode*>& powers) {
if (searchNode.getOperation().getId() == Operation::POWER_CONSTANT && node.getChildren()[0] == searchNode.getChildren()[0]) {
double realPower = dynamic_cast<const Operation::PowerConstant*>(&searchNode.getOperation())->getValue();
int power = (int) realPower;
if (power != realPower)
return; // We are only interested in integer powers.
if (powers.find(power) != powers.end())
return; // This power is already in the map.
if (powers.begin()->first*power < 0)
return; // All powers must have the same sign.
powers[power] = &searchNode;
}
else
for (int i = 0; i < (int) searchNode.getChildren().size(); i++)
findRelatedPowers(node, searchNode.getChildren()[i], powers);
}
vector<float> OpenCLExpressionUtilities::computeFunctionCoefficients(const TabulatedFunction& function, int& width) {
if (dynamic_cast<const Continuous1DFunction*>(&function) != NULL) {
// Compute the spline coefficients.
const Continuous1DFunction& fn = dynamic_cast<const Continuous1DFunction&>(function);
vector<double> values;
double min, max;
fn.getFunctionParameters(values, min, max);
int numValues = values.size();
vector<double> x(numValues), derivs;
for (int i = 0; i < numValues; i++)
x[i] = min+i*(max-min)/(numValues-1);
SplineFitter::createNaturalSpline(x, values, derivs);
vector<float> f(4*(numValues-1));
for (int i = 0; i < (int) values.size()-1; i++) {
f[4*i] = (float) values[i];
f[4*i+1] = (float) values[i+1];
f[4*i+2] = (float) (derivs[i]/6.0);
f[4*i+3] = (float) (derivs[i+1]/6.0);
}
width = 4;
return f;
}
if (dynamic_cast<const Continuous2DFunction*>(&function) != NULL) {
// Compute the spline coefficients.
const Continuous2DFunction& fn = dynamic_cast<const Continuous2DFunction&>(function);
vector<double> values;
int xsize, ysize;
double xmin, xmax, ymin, ymax;
fn.getFunctionParameters(xsize, ysize, values, xmin, xmax, ymin, ymax);
vector<double> x(xsize), y(ysize);
for (int i = 0; i < xsize; i++)
x[i] = xmin+i*(xmax-xmin)/(xsize-1);
for (int i = 0; i < ysize; i++)
y[i] = ymin+i*(ymax-ymin)/(ysize-1);
vector<vector<double> > c;
SplineFitter::create2DNaturalSpline(x, y, values, c);
vector<float> f(16*c.size());
for (int i = 0; i < (int) c.size(); i++) {
for (int j = 0; j < 16; j++)
f[16*i+j] = (float) c[i][j];
}
width = 4;
return f;
}
if (dynamic_cast<const Continuous3DFunction*>(&function) != NULL) {
// Compute the spline coefficients.
const Continuous3DFunction& fn = dynamic_cast<const Continuous3DFunction&>(function);
vector<double> values;
int xsize, ysize, zsize;
double xmin, xmax, ymin, ymax, zmin, zmax;
fn.getFunctionParameters(xsize, ysize, zsize, values, xmin, xmax, ymin, ymax, zmin, zmax);
vector<double> x(xsize), y(ysize), z(zsize);
for (int i = 0; i < xsize; i++)
x[i] = xmin+i*(xmax-xmin)/(xsize-1);
for (int i = 0; i < ysize; i++)
y[i] = ymin+i*(ymax-ymin)/(ysize-1);
for (int i = 0; i < zsize; i++)
z[i] = zmin+i*(zmax-zmin)/(zsize-1);
vector<vector<double> > c;
SplineFitter::create3DNaturalSpline(x, y, z, values, c);
vector<float> f(64*c.size());
for (int i = 0; i < (int) c.size(); i++) {
for (int j = 0; j < 64; j++)
f[64*i+j] = (float) c[i][j];
}
width = 4;
return f;
}
if (dynamic_cast<const Discrete1DFunction*>(&function) != NULL) {
// Record the tabulated values.
const Discrete1DFunction& fn = dynamic_cast<const Discrete1DFunction&>(function);
vector<double> values;
fn.getFunctionParameters(values);
int numValues = values.size();
vector<float> f(numValues);
for (int i = 0; i < numValues; i++)
f[i] = (float) values[i];
width = 1;
return f;
}
if (dynamic_cast<const Discrete2DFunction*>(&function) != NULL) {
// Record the tabulated values.
const Discrete2DFunction& fn = dynamic_cast<const Discrete2DFunction&>(function);
int xsize, ysize;
vector<double> values;
fn.getFunctionParameters(xsize, ysize, values);
int numValues = values.size();
vector<float> f(numValues);
for (int i = 0; i < numValues; i++)
f[i] = (float) values[i];
width = 1;
return f;
}
if (dynamic_cast<const Discrete3DFunction*>(&function) != NULL) {
// Record the tabulated values.
const Discrete3DFunction& fn = dynamic_cast<const Discrete3DFunction&>(function);
int xsize, ysize, zsize;
vector<double> values;
fn.getFunctionParameters(xsize, ysize, zsize, values);
int numValues = values.size();
vector<float> f(numValues);
for (int i = 0; i < numValues; i++)
f[i] = (float) values[i];
width = 1;
return f;
}
throw OpenMMException("computeFunctionCoefficients: Unknown function type");
}
vector<vector<double> > OpenCLExpressionUtilities::computeFunctionParameters(const vector<const TabulatedFunction*>& functions) {
vector<vector<double> > params(functions.size());
for (int i = 0; i < (int) functions.size(); i++) {
if (dynamic_cast<const Continuous1DFunction*>(functions[i]) != NULL) {
const Continuous1DFunction& fn = dynamic_cast<const Continuous1DFunction&>(*functions[i]);
vector<double> values;
double min, max;
fn.getFunctionParameters(values, min, max);
params[i].push_back(min);
params[i].push_back(max);
params[i].push_back((values.size()-1)/(max-min));
params[i].push_back(values.size()-2);
}
else if (dynamic_cast<const Continuous2DFunction*>(functions[i]) != NULL) {
const Continuous2DFunction& fn = dynamic_cast<const Continuous2DFunction&>(*functions[i]);
vector<double> values;
int xsize, ysize;
double xmin, xmax, ymin, ymax;
fn.getFunctionParameters(xsize, ysize, values, xmin, xmax, ymin, ymax);
params[i].push_back(xsize-1);
params[i].push_back(ysize-1);
params[i].push_back(xmin);
params[i].push_back(xmax);
params[i].push_back(ymin);
params[i].push_back(ymax);
params[i].push_back((xsize-1)/(xmax-xmin));
params[i].push_back((ysize-1)/(ymax-ymin));
}
else if (dynamic_cast<const Continuous3DFunction*>(functions[i]) != NULL) {
const Continuous3DFunction& fn = dynamic_cast<const Continuous3DFunction&>(*functions[i]);
vector<double> values;
int xsize, ysize, zsize;
double xmin, xmax, ymin, ymax, zmin, zmax;
fn.getFunctionParameters(xsize, ysize, zsize, values, xmin, xmax, ymin, ymax, zmin, zmax);
params[i].push_back(xsize-1);
params[i].push_back(ysize-1);
params[i].push_back(zsize-1);
params[i].push_back(xmin);
params[i].push_back(xmax);
params[i].push_back(ymin);
params[i].push_back(ymax);
params[i].push_back(zmin);
params[i].push_back(zmax);
params[i].push_back((xsize-1)/(xmax-xmin));
params[i].push_back((ysize-1)/(ymax-ymin));
params[i].push_back((zsize-1)/(zmax-zmin));
}
else if (dynamic_cast<const Discrete1DFunction*>(functions[i]) != NULL) {
const Discrete1DFunction& fn = dynamic_cast<const Discrete1DFunction&>(*functions[i]);
vector<double> values;
fn.getFunctionParameters(values);
params[i].push_back(values.size());
}
else if (dynamic_cast<const Discrete2DFunction*>(functions[i]) != NULL) {
const Discrete2DFunction& fn = dynamic_cast<const Discrete2DFunction&>(*functions[i]);
int xsize, ysize;
vector<double> values;
fn.getFunctionParameters(xsize, ysize, values);
params[i].push_back(xsize);
params[i].push_back(ysize);
}
else if (dynamic_cast<const Discrete3DFunction*>(functions[i]) != NULL) {
const Discrete3DFunction& fn = dynamic_cast<const Discrete3DFunction&>(*functions[i]);
int xsize, ysize, zsize;
vector<double> values;
fn.getFunctionParameters(xsize, ysize, zsize, values);
params[i].push_back(xsize);
params[i].push_back(ysize);
params[i].push_back(zsize);
}
else
throw OpenMMException("computeFunctionParameters: Unknown function type");
}
return params;
}
Lepton::CustomFunction* OpenCLExpressionUtilities::getFunctionPlaceholder(const TabulatedFunction& function) {
if (dynamic_cast<const Continuous1DFunction*>(&function) != NULL)
return &fp1;
if (dynamic_cast<const Continuous2DFunction*>(&function) != NULL)
return &fp2;
if (dynamic_cast<const Continuous3DFunction*>(&function) != NULL)
return &fp3;
if (dynamic_cast<const Discrete1DFunction*>(&function) != NULL)
return &fp1;
if (dynamic_cast<const Discrete2DFunction*>(&function) != NULL)
return &fp2;
if (dynamic_cast<const Discrete3DFunction*>(&function) != NULL)
return &fp3;
throw OpenMMException("getFunctionPlaceholder: Unknown function type");
}
Lepton::CustomFunction* OpenCLExpressionUtilities::getPeriodicDistancePlaceholder() {
return &periodicDistance;
}
......@@ -25,6 +25,7 @@
* -------------------------------------------------------------------------- */
#include "OpenCLFFT3D.h"
#include "OpenCLContext.h"
#include "OpenCLExpressionUtilities.h"
#include "OpenCLKernelSources.h"
#include "SimTKOpenMMRealType.h"
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -25,738 +25,90 @@
* -------------------------------------------------------------------------- */
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLKernelSources.h"
#include "openmm/internal/OSRngSeed.h"
#include "openmm/HarmonicAngleForce.h"
#include "openmm/VirtualSite.h"
#include "quern.h"
#include "OpenCLExpressionUtilities.h"
#include "ReferenceCCMAAlgorithm.h"
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <map>
#include "OpenCLContext.h"
#include <cl.hpp>
using namespace OpenMM;
using namespace std;
struct OpenCLIntegrationUtilities::ShakeCluster {
int centralID;
int peripheralID[3];
int size;
bool valid;
double distance;
double centralInvMass, peripheralInvMass;
ShakeCluster() : valid(true) {
}
ShakeCluster(int centralID, double invMass) : centralID(centralID), centralInvMass(invMass), size(0), valid(true) {
}
void addAtom(int id, double dist, double invMass) {
if (size == 3 || (size > 0 && abs(dist-distance)/distance > 1e-8) || (size > 0 && abs(invMass-peripheralInvMass)/peripheralInvMass > 1e-8))
valid = false;
else {
peripheralID[size++] = id;
distance = dist;
peripheralInvMass = invMass;
}
}
void markInvalid(map<int, ShakeCluster>& allClusters, vector<bool>& invalidForShake)
{
valid = false;
invalidForShake[centralID] = true;
for (int i = 0; i < size; i++) {
invalidForShake[peripheralID[i]] = true;
map<int, ShakeCluster>::iterator otherCluster = allClusters.find(peripheralID[i]);
if (otherCluster != allClusters.end() && otherCluster->second.valid)
otherCluster->second.markInvalid(allClusters, invalidForShake);
}
}
};
struct OpenCLIntegrationUtilities::ConstraintOrderer : public binary_function<int, int, bool> {
const vector<int>& atom1;
const vector<int>& atom2;
const vector<int>& constraints;
ConstraintOrderer(const vector<int>& atom1, const vector<int>& atom2, const vector<int>& constraints) : atom1(atom1), atom2(atom2), constraints(constraints) {
}
bool operator()(int x, int y) {
int ix = constraints[x];
int iy = constraints[y];
if (atom1[ix] != atom1[iy])
return atom1[ix] < atom1[iy];
return atom2[ix] < atom2[iy];
}
};
static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseMixedPrecision())
kernel.setArg<cl::Buffer>(index, cl.getPosqCorrection().getDeviceBuffer());
else
kernel.setArg<void*>(index, NULL);
}
OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, const System& system) : context(context),
randomPos(0), hasInitializedPosConstraintKernels(false), hasInitializedVelConstraintKernels(false), hasOverlappingVsites(false) {
// Create workspace arrays.
lastStepSize = mm_double2(0.0, 0.0);
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) {
posDelta.initialize<mm_double4>(context, context.getPaddedNumAtoms(), "posDelta");
vector<mm_double4> deltas(posDelta.getSize(), mm_double4(0.0, 0.0, 0.0, 0.0));
posDelta.upload(deltas);
stepSize.initialize<mm_double2>(context, 1, "stepSize");
stepSize.upload(&lastStepSize);
}
else {
posDelta.initialize<mm_float4>(context, context.getPaddedNumAtoms(), "posDelta");
vector<mm_float4> deltas(posDelta.getSize(), mm_float4(0.0f, 0.0f, 0.0f, 0.0f));
posDelta.upload(deltas);
stepSize.initialize<mm_float2>(context, 1, "stepSize");
mm_float2 lastStepSizeFloat = mm_float2(0.0f, 0.0f);
stepSize.upload(&lastStepSizeFloat);
}
// Create the time shift kernel for calculating kinetic energy.
map<string, string> timeShiftDefines;
timeShiftDefines["NUM_ATOMS"] = context.intToString(system.getNumParticles());
cl::Program utilitiesProgram = context.createProgram(OpenCLKernelSources::integrationUtilities, timeShiftDefines);
timeShiftKernel = cl::Kernel(utilitiesProgram, "timeShiftVelocities");
// Create kernels for enforcing constraints.
map<string, string> velocityDefines;
velocityDefines["CONSTRAIN_VELOCITIES"] = "1";
cl::Program settleProgram = context.createProgram(OpenCLKernelSources::settle);
settlePosKernel = cl::Kernel(settleProgram, "applySettle");
settleVelKernel = cl::Kernel(settleProgram, "constrainVelocities");
cl::Program shakeProgram = context.createProgram(OpenCLKernelSources::shakeHydrogens);
shakePosKernel = cl::Kernel(shakeProgram, "applyShakeToHydrogens");
shakeProgram = context.createProgram(OpenCLKernelSources::shakeHydrogens, velocityDefines);
shakeVelKernel = cl::Kernel(shakeProgram, "applyShakeToHydrogens");
// Record the set of constraints and how many constraints each atom is involved in.
vector<int> atom1;
vector<int> atom2;
vector<double> distance;
vector<int> constraintCount(context.getNumAtoms(), 0);
for (int i = 0; i < system.getNumConstraints(); i++) {
int p1, p2;
double d;
system.getConstraintParameters(i, p1, p2, d);
if (system.getParticleMass(p1) != 0 || system.getParticleMass(p2) != 0) {
atom1.push_back(p1);
atom2.push_back(p2);
distance.push_back(d);
constraintCount[p1]++;
constraintCount[p2]++;
}
}
// Identify clusters of three atoms that can be treated with SETTLE. First, for every
// atom that might be part of such a cluster, make a list of the two other atoms it is
// connected to.
int numAtoms = system.getNumParticles();
vector<map<int, float> > settleConstraints(numAtoms);
for (int i = 0; i < (int)atom1.size(); i++) {
if (constraintCount[atom1[i]] == 2 && constraintCount[atom2[i]] == 2) {
settleConstraints[atom1[i]][atom2[i]] = (float) distance[i];
settleConstraints[atom2[i]][atom1[i]] = (float) distance[i];
}
}
// Now remove the ones that don't actually form closed loops of three atoms.
vector<int> settleClusters;
for (int i = 0; i < (int)settleConstraints.size(); i++) {
if (settleConstraints[i].size() == 2) {
int partner1 = settleConstraints[i].begin()->first;
int partner2 = (++settleConstraints[i].begin())->first;
if (settleConstraints[partner1].size() != 2 || settleConstraints[partner2].size() != 2 ||
settleConstraints[partner1].find(partner2) == settleConstraints[partner1].end())
settleConstraints[i].clear();
else if (i < partner1 && i < partner2)
settleClusters.push_back(i);
}
else
settleConstraints[i].clear();
}
// Record the SETTLE clusters.
vector<bool> isShakeAtom(numAtoms, false);
if (settleClusters.size() > 0) {
vector<mm_int4> atoms;
vector<mm_float2> params;
for (int i = 0; i < (int) settleClusters.size(); i++) {
int atom1 = settleClusters[i];
int atom2 = settleConstraints[atom1].begin()->first;
int atom3 = (++settleConstraints[atom1].begin())->first;
float dist12 = settleConstraints[atom1].find(atom2)->second;
float dist13 = settleConstraints[atom1].find(atom3)->second;
float dist23 = settleConstraints[atom2].find(atom3)->second;
if (dist12 == dist13) {
// atom1 is the central atom
atoms.push_back(mm_int4(atom1, atom2, atom3, 0));
params.push_back(mm_float2(dist12, dist23));
}
else if (dist12 == dist23) {
// atom2 is the central atom
atoms.push_back(mm_int4(atom2, atom1, atom3, 0));
params.push_back(mm_float2(dist12, dist13));
}
else if (dist13 == dist23) {
// atom3 is the central atom
atoms.push_back(mm_int4(atom3, atom1, atom2, 0));
params.push_back(mm_float2(dist13, dist12));
}
else
continue; // We can't handle this with SETTLE
isShakeAtom[atom1] = true;
isShakeAtom[atom2] = true;
isShakeAtom[atom3] = true;
}
if (atoms.size() > 0) {
settleAtoms.initialize<mm_int4>(context, atoms.size(), "settleAtoms");
settleParams.initialize<mm_float2>(context, params.size(), "settleParams");
settleAtoms.upload(atoms);
settleParams.upload(params);
}
}
// Find clusters consisting of a central atom with up to three peripheral atoms.
map<int, ShakeCluster> clusters;
vector<bool> invalidForShake(numAtoms, false);
for (int i = 0; i < (int) atom1.size(); i++) {
if (isShakeAtom[atom1[i]])
continue; // This is being taken care of with SETTLE.
// Determine which is the central atom.
bool firstIsCentral;
if (constraintCount[atom1[i]] > 1)
firstIsCentral = true;
else if (constraintCount[atom2[i]] > 1)
firstIsCentral = false;
else if (atom1[i] < atom2[i])
firstIsCentral = true;
else
firstIsCentral = false;
int centralID, peripheralID;
if (firstIsCentral) {
centralID = atom1[i];
peripheralID = atom2[i];
}
else {
centralID = atom2[i];
peripheralID = atom1[i];
}
// Add it to the cluster.
if (clusters.find(centralID) == clusters.end()) {
clusters[centralID] = ShakeCluster(centralID, 1.0/system.getParticleMass(centralID));
}
ShakeCluster& cluster = clusters[centralID];
cluster.addAtom(peripheralID, distance[i], 1.0/system.getParticleMass(peripheralID));
if (constraintCount[peripheralID] != 1 || invalidForShake[atom1[i]] || invalidForShake[atom2[i]]) {
cluster.markInvalid(clusters, invalidForShake);
map<int, ShakeCluster>::iterator otherCluster = clusters.find(peripheralID);
if (otherCluster != clusters.end() && otherCluster->second.valid)
otherCluster->second.markInvalid(clusters, invalidForShake);
}
}
int validShakeClusters = 0;
for (map<int, ShakeCluster>::iterator iter = clusters.begin(); iter != clusters.end(); ++iter) {
ShakeCluster& cluster = iter->second;
if (cluster.valid) {
cluster.valid = !invalidForShake[cluster.centralID] && cluster.size == constraintCount[cluster.centralID];
for (int i = 0; i < cluster.size; i++)
if (invalidForShake[cluster.peripheralID[i]])
cluster.valid = false;
if (cluster.valid)
++validShakeClusters;
}
}
// Record the SHAKE clusters.
if (validShakeClusters > 0) {
vector<mm_int4> atoms;
vector<mm_float4> params;
int index = 0;
for (map<int, ShakeCluster>::const_iterator iter = clusters.begin(); iter != clusters.end(); ++iter) {
const ShakeCluster& cluster = iter->second;
if (!cluster.valid)
continue;
atoms.push_back(mm_int4(cluster.centralID, cluster.peripheralID[0], (cluster.size > 1 ? cluster.peripheralID[1] : -1), (cluster.size > 2 ? cluster.peripheralID[2] : -1)));
params.push_back(mm_float4((cl_float) cluster.centralInvMass, (cl_float) (0.5/(cluster.centralInvMass+cluster.peripheralInvMass)), (cl_float) (cluster.distance*cluster.distance), (cl_float) cluster.peripheralInvMass));
isShakeAtom[cluster.centralID] = true;
isShakeAtom[cluster.peripheralID[0]] = true;
if (cluster.size > 1)
isShakeAtom[cluster.peripheralID[1]] = true;
if (cluster.size > 2)
isShakeAtom[cluster.peripheralID[2]] = true;
++index;
}
shakeAtoms.initialize<mm_int4>(context, atoms.size(), "shakeAtoms");
shakeParams.initialize<mm_float4>(context, params.size(), "shakeParams");
shakeAtoms.upload(atoms);
shakeParams.upload(params);
}
// Find connected constraints for CCMA.
vector<int> ccmaConstraints;
for (unsigned i = 0; i < atom1.size(); i++)
if (!isShakeAtom[atom1[i]])
ccmaConstraints.push_back(i);
// Record the connections between constraints.
int numCCMA = (int) ccmaConstraints.size();
if (numCCMA > 0) {
// Record information needed by ReferenceCCMAAlgorithm.
vector<pair<int, int> > refIndices(numCCMA);
vector<double> refDistance(numCCMA);
for (int i = 0; i < numCCMA; i++) {
int index = ccmaConstraints[i];
refIndices[i] = make_pair(atom1[index], atom2[index]);
refDistance[i] = distance[index];
}
vector<double> refMasses(numAtoms);
for (int i = 0; i < numAtoms; ++i)
refMasses[i] = (double) system.getParticleMass(i);
// Look up angles for CCMA.
vector<ReferenceCCMAAlgorithm::AngleInfo> angles;
for (int i = 0; i < system.getNumForces(); i++) {
const HarmonicAngleForce* force = dynamic_cast<const HarmonicAngleForce*>(&system.getForce(i));
if (force != NULL) {
for (int j = 0; j < force->getNumAngles(); j++) {
int atom1, atom2, atom3;
double angle, k;
force->getAngleParameters(j, atom1, atom2, atom3, angle, k);
angles.push_back(ReferenceCCMAAlgorithm::AngleInfo(atom1, atom2, atom3, angle));
}
}
}
// Create a ReferenceCCMAAlgorithm. It will build and invert the constraint matrix for us.
ReferenceCCMAAlgorithm ccma(numAtoms, numCCMA, refIndices, refDistance, refMasses, angles, 0.1);
vector<vector<pair<int, double> > > matrix = ccma.getMatrix();
int maxRowElements = 0;
for (unsigned i = 0; i < matrix.size(); i++)
maxRowElements = max(maxRowElements, (int) matrix[i].size());
maxRowElements++;
// Build the list of constraints for each atom.
vector<vector<int> > atomConstraints(context.getNumAtoms());
for (int i = 0; i < numCCMA; i++) {
atomConstraints[atom1[ccmaConstraints[i]]].push_back(i);
atomConstraints[atom2[ccmaConstraints[i]]].push_back(i);
}
int maxAtomConstraints = 0;
for (unsigned i = 0; i < atomConstraints.size(); i++)
maxAtomConstraints = max(maxAtomConstraints, (int) atomConstraints[i].size());
// Sort the constraints.
vector<int> constraintOrder(numCCMA);
for (int i = 0; i < numCCMA; ++i)
constraintOrder[i] = i;
sort(constraintOrder.begin(), constraintOrder.end(), ConstraintOrderer(atom1, atom2, ccmaConstraints));
vector<int> inverseOrder(numCCMA);
for (int i = 0; i < numCCMA; ++i)
inverseOrder[constraintOrder[i]] = i;
for (int i = 0; i < (int)matrix.size(); ++i)
for (int j = 0; j < (int)matrix[i].size(); ++j)
matrix[i][j].first = inverseOrder[matrix[i][j].first];
// Record the CCMA data structures.
ccmaAtoms.initialize<mm_int2>(context, numCCMA, "CcmaAtoms");
ccmaAtomConstraints.initialize<cl_int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints");
ccmaNumAtomConstraints.initialize<cl_int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaConstraintMatrixColumn.initialize<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConverged.initialize<cl_int>(context, 2, "CcmaConverged");
OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, const System& system) : IntegrationUtilities(context, system) {
ccmaConvergedHostBuffer.initialize<cl_int>(context, 1, "CcmaConvergedHostBuffer", CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR);
// Different communication mechanisms give optimal performance on AMD and on NVIDIA.
string vendor = context.getDevice().getInfo<CL_DEVICE_VENDOR>();
ccmaUseDirectBuffer = (vendor.size() >= 28 && vendor.substr(0, 28) == "Advanced Micro Devices, Inc.");
vector<mm_int2> atomsVec(ccmaAtoms.getSize());
vector<cl_int> atomConstraintsVec(ccmaAtomConstraints.getSize());
vector<cl_int> numAtomConstraintsVec(ccmaNumAtomConstraints.getSize());
vector<cl_int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn.getSize());
int elementSize = (context.getUseDoublePrecision() || context.getUseMixedPrecision() ? sizeof(cl_double) : sizeof(cl_float));
ccmaDistance.initialize(context, numCCMA, 4*elementSize, "CcmaDistance");
ccmaDelta1.initialize(context, numCCMA, elementSize, "CcmaDelta1");
ccmaDelta2.initialize(context, numCCMA, elementSize, "CcmaDelta2");
ccmaReducedMass.initialize(context, numCCMA, elementSize, "CcmaReducedMass");
ccmaConstraintMatrixValue.initialize(context, numCCMA*maxRowElements, elementSize, "ConstraintMatrixValue");
vector<mm_double4> distanceVec(ccmaDistance.getSize());
vector<cl_double> reducedMassVec(ccmaReducedMass.getSize());
vector<cl_double> constraintMatrixValueVec(ccmaConstraintMatrixValue.getSize());
for (int i = 0; i < numCCMA; i++) {
int index = constraintOrder[i];
int c = ccmaConstraints[index];
atomsVec[i].x = atom1[c];
atomsVec[i].y = atom2[c];
distanceVec[i].w = distance[c];
reducedMassVec[i] = (0.5/(1.0/system.getParticleMass(atom1[c])+1.0/system.getParticleMass(atom2[c])));
for (unsigned int j = 0; j < matrix[index].size(); j++) {
constraintMatrixColumnVec[i+j*numCCMA] = matrix[index][j].first;
constraintMatrixValueVec[i+j*numCCMA] = matrix[index][j].second;
}
constraintMatrixColumnVec[i+matrix[index].size()*numCCMA] = numCCMA;
}
for (unsigned int i = 0; i < atomConstraints.size(); i++) {
numAtomConstraintsVec[i] = atomConstraints[i].size();
for (unsigned int j = 0; j < atomConstraints[i].size(); j++) {
bool forward = (atom1[ccmaConstraints[atomConstraints[i][j]]] == i);
atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1);
}
}
ccmaDistance.upload(distanceVec, true, true);
ccmaReducedMass.upload(reducedMassVec, true, true);
ccmaConstraintMatrixValue.upload(constraintMatrixValueVec, true, true);
ccmaAtoms.upload(atomsVec);
ccmaAtomConstraints.upload(atomConstraintsVec);
ccmaNumAtomConstraints.upload(numAtomConstraintsVec);
ccmaConstraintMatrixColumn.upload(constraintMatrixColumnVec);
// Create the CCMA kernels.
map<string, string> defines;
defines["NUM_CONSTRAINTS"] = context.intToString(numCCMA);
defines["NUM_ATOMS"] = context.intToString(numAtoms);
cl::Program ccmaProgram = context.createProgram(OpenCLKernelSources::ccma, defines);
ccmaDirectionsKernel = cl::Kernel(ccmaProgram, "computeConstraintDirections");
ccmaPosForceKernel = cl::Kernel(ccmaProgram, "computeConstraintForce");
ccmaMultiplyKernel = cl::Kernel(ccmaProgram, "multiplyByConstraintMatrix");
ccmaPosUpdateKernel = cl::Kernel(ccmaProgram, "updateAtomPositions");
defines["CONSTRAIN_VELOCITIES"] = "1";
ccmaProgram = context.createProgram(OpenCLKernelSources::ccma, defines);
ccmaVelForceKernel = cl::Kernel(ccmaProgram, "computeConstraintForce");
ccmaVelUpdateKernel = cl::Kernel(ccmaProgram, "updateAtomPositions");
}
// Build the list of virtual sites.
vector<mm_int4> vsite2AvgAtomVec;
vector<mm_double2> vsite2AvgWeightVec;
vector<mm_int4> vsite3AvgAtomVec;
vector<mm_double4> vsite3AvgWeightVec;
vector<mm_int4> vsiteOutOfPlaneAtomVec;
vector<mm_double4> vsiteOutOfPlaneWeightVec;
vector<cl_int> vsiteLocalCoordsIndexVec;
vector<cl_int> vsiteLocalCoordsAtomVec;
vector<cl_int> vsiteLocalCoordsStartVec;
vector<cl_double> vsiteLocalCoordsWeightVec;
vector<mm_double4> vsiteLocalCoordsPosVec;
for (int i = 0; i < numAtoms; i++) {
if (system.isVirtualSite(i)) {
if (dynamic_cast<const TwoParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
// A two particle average.
const TwoParticleAverageSite& site = dynamic_cast<const TwoParticleAverageSite&>(system.getVirtualSite(i));
vsite2AvgAtomVec.push_back(mm_int4(i, site.getParticle(0), site.getParticle(1), 0));
vsite2AvgWeightVec.push_back(mm_double2(site.getWeight(0), site.getWeight(1)));
}
else if (dynamic_cast<const ThreeParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
// A three particle average.
const ThreeParticleAverageSite& site = dynamic_cast<const ThreeParticleAverageSite&>(system.getVirtualSite(i));
vsite3AvgAtomVec.push_back(mm_int4(i, site.getParticle(0), site.getParticle(1), site.getParticle(2)));
vsite3AvgWeightVec.push_back(mm_double4(site.getWeight(0), site.getWeight(1), site.getWeight(2), 0.0));
}
else if (dynamic_cast<const OutOfPlaneSite*>(&system.getVirtualSite(i)) != NULL) {
// An out of plane site.
const OutOfPlaneSite& site = dynamic_cast<const OutOfPlaneSite&>(system.getVirtualSite(i));
vsiteOutOfPlaneAtomVec.push_back(mm_int4(i, site.getParticle(0), site.getParticle(1), site.getParticle(2)));
vsiteOutOfPlaneWeightVec.push_back(mm_double4(site.getWeight12(), site.getWeight13(), site.getWeightCross(), 0.0));
}
else if (dynamic_cast<const LocalCoordinatesSite*>(&system.getVirtualSite(i)) != NULL) {
// A local coordinates site.
const LocalCoordinatesSite& site = dynamic_cast<const LocalCoordinatesSite&>(system.getVirtualSite(i));
int numParticles = site.getNumParticles();
vector<double> origin, x, y;
site.getOriginWeights(origin);
site.getXWeights(x);
site.getYWeights(y);
vsiteLocalCoordsIndexVec.push_back(i);
vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
for (int j = 0; j < numParticles; j++) {
vsiteLocalCoordsAtomVec.push_back(site.getParticle(j));
vsiteLocalCoordsWeightVec.push_back(origin[j]);
vsiteLocalCoordsWeightVec.push_back(x[j]);
vsiteLocalCoordsWeightVec.push_back(y[j]);
}
Vec3 pos = site.getLocalPosition();
vsiteLocalCoordsPosVec.push_back(mm_double4(pos[0], pos[1], pos[2], 0.0));
}
}
}
vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
int num2Avg = vsite2AvgAtomVec.size();
int num3Avg = vsite3AvgAtomVec.size();
int numOutOfPlane = vsiteOutOfPlaneAtomVec.size();
int numLocalCoords = vsiteLocalCoordsPosVec.size();
numVsites = num2Avg+num3Avg+numOutOfPlane+numLocalCoords;
vsite2AvgAtoms.initialize<mm_int4>(context, max(1, num2Avg), "vsite2AvgAtoms");
vsite3AvgAtoms.initialize<mm_int4>(context, max(1, num3Avg), "vsite3AvgAtoms");
vsiteOutOfPlaneAtoms.initialize<mm_int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms");
vsiteLocalCoordsIndex.initialize<cl_int>(context, max(1, (int) vsiteLocalCoordsIndexVec.size()), "vsiteLocalCoordsIndex");
vsiteLocalCoordsAtoms.initialize<cl_int>(context, max(1, (int) vsiteLocalCoordsAtomVec.size()), "vsiteLocalCoordsAtoms");
vsiteLocalCoordsStartIndex.initialize<cl_int>(context, max(1, (int) vsiteLocalCoordsStartVec.size()), "vsiteLocalCoordsStartIndex");
if (num2Avg > 0)
vsite2AvgAtoms.upload(vsite2AvgAtomVec);
if (num3Avg > 0)
vsite3AvgAtoms.upload(vsite3AvgAtomVec);
if (numOutOfPlane > 0)
vsiteOutOfPlaneAtoms.upload(vsiteOutOfPlaneAtomVec);
if (numLocalCoords > 0) {
vsiteLocalCoordsIndex.upload(vsiteLocalCoordsIndexVec);
vsiteLocalCoordsAtoms.upload(vsiteLocalCoordsAtomVec);
vsiteLocalCoordsStartIndex.upload(vsiteLocalCoordsStartVec);
}
int elementSize = (context.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
vsite2AvgWeights.initialize(context, max(1, num2Avg), 2*elementSize, "vsite2AvgWeights");
vsite3AvgWeights.initialize(context, max(1, num3Avg), 4*elementSize, "vsite3AvgWeights");
vsiteOutOfPlaneWeights.initialize(context, max(1, numOutOfPlane), 4*elementSize, "vsiteOutOfPlaneWeights");
vsiteLocalCoordsWeights.initialize(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), elementSize, "vsiteLocalCoordsWeights");
vsiteLocalCoordsPos.initialize(context, max(1, (int) vsiteLocalCoordsPosVec.size()), 4*elementSize, "vsiteLocalCoordsPos");
if (num2Avg > 0)
vsite2AvgWeights.upload(vsite2AvgWeightVec, true, true);
if (num3Avg > 0)
vsite3AvgWeights.upload(vsite3AvgWeightVec, true, true);
if (numOutOfPlane > 0)
vsiteOutOfPlaneWeights.upload(vsiteOutOfPlaneWeightVec, true, true);
if (numLocalCoords > 0) {
vsiteLocalCoordsWeights.upload(vsiteLocalCoordsWeightVec, true, true);
vsiteLocalCoordsPos.upload(vsiteLocalCoordsPosVec, true, true);
}
// If multiple virtual sites depend on the same particle, make sure the force distribution
// can be done safely.
vector<int> atomCounts(numAtoms, 0);
for (int i = 0; i < numAtoms; i++)
if (system.isVirtualSite(i))
for (int j = 0; j < system.getVirtualSite(i).getNumParticles(); j++)
atomCounts[system.getVirtualSite(i).getParticle(j)]++;
for (int i = 0; i < numAtoms; i++)
if (atomCounts[i] > 1)
hasOverlappingVsites = true;
if (hasOverlappingVsites && context.getUseDoublePrecision() && !context.getSupports64BitGlobalAtomics())
throw OpenMMException("This device does not support 64 bit atomics. Cannot use double precision when multiple virtual sites depend on the same atom.");
// Create the kernels for virtual sites.
map<string, string> defines;
defines["NUM_2_AVERAGE"] = context.intToString(num2Avg);
defines["NUM_3_AVERAGE"] = context.intToString(num3Avg);
defines["NUM_OUT_OF_PLANE"] = context.intToString(numOutOfPlane);
defines["NUM_LOCAL_COORDS"] = context.intToString(numLocalCoords);
defines["NUM_ATOMS"] = context.intToString(numAtoms);
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
if (hasOverlappingVsites)
defines["HAS_OVERLAPPING_VSITES"] = "1";
cl::Program vsiteProgram = context.createProgram(OpenCLKernelSources::virtualSites, defines);
vsitePositionKernel = cl::Kernel(vsiteProgram, "computeVirtualSites");
int index = 0;
vsitePositionKernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
if (context.getUseMixedPrecision())
vsitePositionKernel.setArg<cl::Buffer>(index++, context.getPosqCorrection().getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsite2AvgAtoms.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsite2AvgWeights.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsite3AvgAtoms.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsite3AvgWeights.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneAtoms.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneWeights.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsIndex.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsAtoms.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsWeights.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsPos.getDeviceBuffer());
vsitePositionKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsStartIndex.getDeviceBuffer());
vsiteForceKernel = cl::Kernel(vsiteProgram, "distributeForces");
index = 0;
vsiteForceKernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
index++; // Skip argument 1: the force array hasn't been created yet.
if (context.getSupports64BitGlobalAtomics())
index++; // Skip argument 2: the force array hasn't been created yet.
if (context.getUseMixedPrecision())
vsiteForceKernel.setArg<cl::Buffer>(index++, context.getPosqCorrection().getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsite2AvgAtoms.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsite2AvgWeights.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsite3AvgAtoms.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsite3AvgWeights.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneAtoms.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteOutOfPlaneWeights.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsIndex.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsAtoms.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsWeights.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsPos.getDeviceBuffer());
vsiteForceKernel.setArg<cl::Buffer>(index++, vsiteLocalCoordsStartIndex.getDeviceBuffer());
if (hasOverlappingVsites && context.getSupports64BitGlobalAtomics())
vsiteAddForcesKernel = cl::Kernel(vsiteProgram, "addDistributedForces");
}
void OpenCLIntegrationUtilities::setNextStepSize(double size) {
if (size != lastStepSize.x || size != lastStepSize.y) {
lastStepSize = mm_double2(size, size);
if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
stepSize.upload(&lastStepSize);
else {
mm_float2 lastStepSizeFloat = mm_float2((float) size, (float) size);
stepSize.upload(&lastStepSizeFloat);
}
}
OpenCLArray& OpenCLIntegrationUtilities::getPosDelta() {
return dynamic_cast<OpenCLContext&>(context).unwrap(posDelta);
}
double OpenCLIntegrationUtilities::getLastStepSize() {
if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
stepSize.download(&lastStepSize);
else {
mm_float2 lastStepSizeFloat;
stepSize.download(&lastStepSizeFloat);
lastStepSize = mm_double2(lastStepSizeFloat.x, lastStepSizeFloat.y);
}
return lastStepSize.y;
}
void OpenCLIntegrationUtilities::applyConstraints(double tol) {
applyConstraints(false, tol);
OpenCLArray& OpenCLIntegrationUtilities::getRandom() {
return dynamic_cast<OpenCLContext&>(context).unwrap(random);
}
void OpenCLIntegrationUtilities::applyVelocityConstraints(double tol) {
applyConstraints(true, tol);
OpenCLArray& OpenCLIntegrationUtilities::getStepSize() {
return dynamic_cast<OpenCLContext&>(context).unwrap(stepSize);
}
void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, double tol) {
bool hasInitialized;
cl::Kernel settleKernel, shakeKernel, ccmaForceKernel, ccmaUpdateKernel;
void OpenCLIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, double tol) {
ComputeKernel settleKernel, shakeKernel, ccmaForceKernel;
if (constrainVelocities) {
hasInitialized = hasInitializedVelConstraintKernels;
settleKernel = settleVelKernel;
shakeKernel = shakeVelKernel;
ccmaForceKernel = ccmaVelForceKernel;
ccmaUpdateKernel = ccmaVelUpdateKernel;
hasInitializedVelConstraintKernels = true;
}
else {
hasInitialized = hasInitializedPosConstraintKernels;
settleKernel = settlePosKernel;
shakeKernel = shakePosKernel;
ccmaForceKernel = ccmaPosForceKernel;
ccmaUpdateKernel = ccmaPosUpdateKernel;
hasInitializedPosConstraintKernels = true;
}
if (settleAtoms.isInitialized()) {
if (!hasInitialized) {
settleKernel.setArg<cl_int>(0, settleAtoms.getSize());
settleKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
if (context.getUseMixedPrecision())
settleKernel.setArg<cl::Buffer>(3, context.getPosqCorrection().getDeviceBuffer());
else
settleKernel.setArg<void*>(3, NULL);
settleKernel.setArg<cl::Buffer>(4, posDelta.getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(5, context.getVelm().getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(6, settleAtoms.getDeviceBuffer());
settleKernel.setArg<cl::Buffer>(7, settleParams.getDeviceBuffer());
}
if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
settleKernel.setArg<cl_double>(1, (cl_double) tol);
settleKernel->setArg(1, tol);
else
settleKernel.setArg<cl_float>(1, (cl_float) tol);
context.executeKernel(settleKernel, settleAtoms.getSize());
settleKernel->setArg(1, (float) tol);
settleKernel->execute(settleAtoms.getSize());
}
if (shakeAtoms.isInitialized()) {
if (!hasInitialized) {
shakeKernel.setArg<cl_int>(0, shakeAtoms.getSize());
shakeKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
if (context.getUseMixedPrecision())
shakeKernel.setArg<cl::Buffer>(3, context.getPosqCorrection().getDeviceBuffer());
else
shakeKernel.setArg<void*>(3, NULL);
shakeKernel.setArg<cl::Buffer>(4, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta.getDeviceBuffer());
shakeKernel.setArg<cl::Buffer>(5, shakeAtoms.getDeviceBuffer());
shakeKernel.setArg<cl::Buffer>(6, shakeParams.getDeviceBuffer());
}
if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
shakeKernel.setArg<cl_double>(1, (cl_double) tol);
shakeKernel->setArg(1, tol);
else
shakeKernel.setArg<cl_float>(1, (cl_float) tol);
context.executeKernel(shakeKernel, shakeAtoms.getSize());
shakeKernel->setArg(1, (float) tol);
shakeKernel->execute(shakeAtoms.getSize());
}
if (ccmaAtoms.isInitialized()) {
if (!hasInitialized) {
ccmaDirectionsKernel.setArg<cl::Buffer>(0, ccmaAtoms.getDeviceBuffer());
ccmaDirectionsKernel.setArg<cl::Buffer>(1, ccmaDistance.getDeviceBuffer());
ccmaDirectionsKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
if (context.getUseMixedPrecision())
ccmaDirectionsKernel.setArg<cl::Buffer>(3, context.getPosqCorrection().getDeviceBuffer());
else
ccmaDirectionsKernel.setArg<void*>(3, NULL);
ccmaDirectionsKernel.setArg<cl::Buffer>(4, ccmaConverged.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(0, ccmaAtoms.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(1, ccmaDistance.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(2, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(3, ccmaReducedMass.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(4, ccmaDelta1.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(5, ccmaConverged.getDeviceBuffer());
ccmaForceKernel.setArg<cl::Buffer>(6, ccmaConvergedHostBuffer.getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(0, ccmaDelta1.getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(1, ccmaDelta2.getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(2, ccmaConstraintMatrixColumn.getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(3, ccmaConstraintMatrixValue.getDeviceBuffer());
ccmaMultiplyKernel.setArg<cl::Buffer>(4, ccmaConverged.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(0, ccmaNumAtomConstraints.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(1, ccmaAtomConstraints.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(2, ccmaDistance.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(3, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(4, context.getVelm().getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(5, ccmaDelta1.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(6, ccmaDelta2.getDeviceBuffer());
ccmaUpdateKernel.setArg<cl::Buffer>(7, ccmaConverged.getDeviceBuffer());
}
ccmaForceKernel->setArg(6, ccmaConvergedHostBuffer);
if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
ccmaForceKernel.setArg<cl_double>(7, (cl_double) tol);
ccmaForceKernel->setArg(7, tol);
else
ccmaForceKernel.setArg<cl_float>(7, (cl_float) tol);
context.executeKernel(ccmaDirectionsKernel, ccmaAtoms.getSize());
ccmaForceKernel->setArg(7, (float) tol);
ccmaDirectionsKernel->execute(ccmaAtoms.getSize());
const int checkInterval = 4;
OpenCLContext& cl = dynamic_cast<OpenCLContext&>(context);
cl::CommandQueue queue = cl.getQueue();
int* converged = (int*) context.getPinnedBuffer();
int* ccmaConvergedHostMemory = (int*) context.getQueue().enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_int));
int* ccmaConvergedHostMemory = (int*) queue.enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_int));
ccmaConvergedHostMemory[0] = 0;
context.getQueue().enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory);
queue.enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory);
ccmaUpdateKernel->setArg(3, constrainVelocities ? context.getVelm() : posDelta);
for (int i = 0; i < 150; i++) {
ccmaForceKernel.setArg<cl_int>(8, i);
context.executeKernel(ccmaForceKernel, ccmaAtoms.getSize());
ccmaForceKernel->setArg(8, i);
ccmaForceKernel->execute(ccmaAtoms.getSize());
cl::Event event;
if ((i+1)%checkInterval == 0 && !ccmaUseDirectBuffer)
context.getQueue().enqueueReadBuffer(ccmaConverged.getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(cl_int), converged, NULL, &event);
ccmaMultiplyKernel.setArg<cl_int>(5, i);
context.executeKernel(ccmaMultiplyKernel, ccmaAtoms.getSize());
ccmaUpdateKernel.setArg<cl_int>(8, i);
context.executeKernel(ccmaUpdateKernel, context.getNumAtoms());
queue.enqueueReadBuffer(cl.unwrap(ccmaConverged).getDeviceBuffer(), CL_FALSE, 0, 2*sizeof(int), converged, NULL, &event);
ccmaMultiplyKernel->setArg(5, i);
ccmaMultiplyKernel->execute(ccmaAtoms.getSize());
ccmaUpdateKernel->setArg(8, i);
ccmaUpdateKernel->execute(context.getNumAtoms());
if ((i+1)%checkInterval == 0) {
if (ccmaUseDirectBuffer) {
ccmaConvergedHostMemory = (int*) context.getQueue().enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_FALSE, CL_MAP_READ, 0, sizeof(cl_int), NULL, &event);
context.getQueue().flush();
ccmaConvergedHostMemory = (int*) queue.enqueueMapBuffer(ccmaConvergedHostBuffer.getDeviceBuffer(), CL_FALSE, CL_MAP_READ, 0, sizeof(cl_int), NULL, &event);
queue.flush();
while (event.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>() != CL_COMPLETE)
;
converged[i%2] = ccmaConvergedHostMemory[0];
context.getQueue().enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory);
queue.enqueueUnmapMemObject(ccmaConvergedHostBuffer.getDeviceBuffer(), ccmaConvergedHostMemory);
}
else
event.wait();
......@@ -767,198 +119,12 @@ void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, doub
}
}
void OpenCLIntegrationUtilities::computeVirtualSites() {
if (numVsites > 0)
context.executeKernel(vsitePositionKernel, numVsites);
}
void OpenCLIntegrationUtilities::distributeForcesFromVirtualSites() {
if (numVsites > 0) {
// Set arguments that didn't exist yet in the constructor.
vsiteForceKernel.setArg<cl::Buffer>(1, context.getForce().getDeviceBuffer());
if (context.getSupports64BitGlobalAtomics()) {
vsiteForceKernel.setArg<cl::Buffer>(2, context.getLongForceBuffer().getDeviceBuffer());
if (hasOverlappingVsites) {
// We'll be using 64 bit atomics for the force redistribution, so clear the buffer.
context.clearBuffer(context.getLongForceBuffer());
}
}
context.executeKernel(vsiteForceKernel, numVsites);
if (context.getSupports64BitGlobalAtomics() && hasOverlappingVsites) {
// Add the redistributed forces from the virtual sites to the main force array.
vsiteAddForcesKernel.setArg<cl::Buffer>(0, context.getLongForceBuffer().getDeviceBuffer());
vsiteAddForcesKernel.setArg<cl::Buffer>(1, context.getForce().getDeviceBuffer());
context.executeKernel(vsiteAddForcesKernel, context.getNumAtoms());
}
}
vsiteForceKernel->setArg(2, context.getLongForceBuffer());
vsiteForceKernel->execute(numVsites);
vsiteSaveForcesKernel->setArg(0, context.getLongForceBuffer());
vsiteSaveForcesKernel->setArg(1, context.getForceBuffers());
vsiteSaveForcesKernel->execute(context.getNumAtoms());
}
}
void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSeed) {
if (random.isInitialized()) {
if (randomNumberSeed != lastSeed)
throw OpenMMException("OpenCLIntegrationUtilities::initRandomNumberGenerator(): Requested two different values for the random number seed");
return;
}
// Create the random number arrays.
lastSeed = randomNumberSeed;
random.initialize<mm_float4>(context, 4*context.getPaddedNumAtoms(), "random");
randomSeed.initialize<mm_int4>(context, context.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed");
randomPos = random.getSize();
// Use a quick and dirty RNG to pick seeds for the real random number generator.
vector<mm_int4> seed(randomSeed.getSize());
unsigned int r = randomNumberSeed;
// A seed of 0 means use a unique one
if (r == 0) r = (unsigned int) osrngseed();
for (int i = 0; i < randomSeed.getSize(); i++) {
seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
}
randomSeed.upload(seed);
// Create the kernel.
cl::Program randomProgram = context.createProgram(OpenCLKernelSources::random);
randomKernel = cl::Kernel(randomProgram, "generateRandomNumbers");
}
int OpenCLIntegrationUtilities::prepareRandomNumbers(int numValues) {
if (randomPos+numValues <= random.getSize()) {
int oldPos = randomPos;
randomPos += numValues;
return oldPos;
}
if (numValues > random.getSize()) {
random.resize(numValues);
}
randomKernel.setArg<cl_int>(0, random.getSize());
randomKernel.setArg<cl::Buffer>(1, random.getDeviceBuffer());
randomKernel.setArg<cl::Buffer>(2, randomSeed.getDeviceBuffer());
context.executeKernel(randomKernel, random.getSize());
randomPos = numValues;
return 0;
}
void OpenCLIntegrationUtilities::createCheckpoint(ostream& stream) {
size_t numChains = noseHooverChainState.size();
bool useDouble = context.getUseDoublePrecision() || context.getUseMixedPrecision();
stream.write((char*) &numChains, sizeof(size_t));
for (auto &chainState: noseHooverChainState){
int chainID = chainState.first;
size_t chainLength = chainState.second.getSize();
stream.write((char*) &chainID, sizeof(int));
stream.write((char*) &chainLength, sizeof(size_t));
if (useDouble) {
vector<mm_double2> stateVec;
chainState.second.download(stateVec);
stream.write((char*) stateVec.data(), sizeof(mm_double2)*chainLength);
} else {
vector<mm_float2> stateVec;
chainState.second.download(stateVec);
stream.write((char*) stateVec.data(), sizeof(mm_float2)*chainLength);
}
}
if (!random.isInitialized())
return;
stream.write((char*) &randomPos, sizeof(int));
vector<mm_float4> randomVec;
random.download(randomVec);
stream.write((char*) &randomVec[0], sizeof(mm_float4)*random.getSize());
vector<mm_int4> randomSeedVec;
randomSeed.download(randomSeedVec);
stream.write((char*) &randomSeedVec[0], sizeof(mm_int4)*randomSeed.getSize());
}
void OpenCLIntegrationUtilities::loadCheckpoint(istream& stream) {
size_t numChains, chainLength;
bool useDouble = context.getUseDoublePrecision() || context.getUseMixedPrecision();
stream.read((char*) &numChains, sizeof(size_t));
noseHooverChainState.clear();
for (size_t i=0; i<numChains; i++){
int chainID;
stream.read((char*) &chainID, sizeof(int));
stream.read((char*) &chainLength, sizeof(size_t));
if (useDouble) {
noseHooverChainState[chainID] = OpenCLArray();
noseHooverChainState[chainID].initialize<mm_double2>(context, chainLength, "chainState" + std::to_string(chainID));
std::vector<mm_double2> stateVec(chainLength);
stream.read((char*) &stateVec[0], sizeof(mm_double2)*chainLength);
noseHooverChainState[chainID].upload(stateVec);
} else {
noseHooverChainState[chainID] = OpenCLArray();
noseHooverChainState[chainID].initialize<mm_float2>(context, chainLength, "chainState" + std::to_string(chainID));
std::vector<mm_float2> stateVec(chainLength);
stream.read((char*) &stateVec[0], sizeof(mm_float2)*chainLength);
noseHooverChainState[chainID].upload(stateVec);
}
}
if (!random.isInitialized())
return;
stream.read((char*) &randomPos, sizeof(int));
vector<mm_float4> randomVec(random.getSize());
stream.read((char*) &randomVec[0], sizeof(mm_float4)*random.getSize());
random.upload(randomVec);
vector<mm_int4> randomSeedVec(randomSeed.getSize());
stream.read((char*) &randomSeedVec[0], sizeof(mm_int4)*randomSeed.getSize());
randomSeed.upload(randomSeedVec);
}
double OpenCLIntegrationUtilities::computeKineticEnergy(double timeShift) {
int numParticles = context.getNumAtoms();
if (timeShift != 0) {
// Copy the velocities into the posDelta array while we temporarily modify them.
context.getVelm().copyTo(posDelta);
// Apply the time shift.
timeShiftKernel.setArg<cl::Buffer>(0, context.getVelm().getDeviceBuffer());
timeShiftKernel.setArg<cl::Buffer>(1, context.getForce().getDeviceBuffer());
if (context.getUseDoublePrecision())
timeShiftKernel.setArg<cl_double>(2, timeShift);
else
timeShiftKernel.setArg<cl_float>(2, (cl_float) timeShift);
context.executeKernel(timeShiftKernel, numParticles);
applyConstraints(true, 1e-4);
}
// Compute the kinetic energy.
double energy = 0.0;
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) {
vector<mm_double4> velm;
context.getVelm().download(velm);
for (int i = 0; i < numParticles; i++) {
mm_double4 v = velm[i];
if (v.w != 0)
energy += (v.x*v.x+v.y*v.y+v.z*v.z)/v.w;
}
}
else {
vector<mm_float4> velm;
context.getVelm().download(velm);
for (int i = 0; i < numParticles; i++) {
mm_float4 v = velm[i];
if (v.w != 0)
energy += (v.x*v.x+v.y*v.y+v.z*v.z)/v.w;
}
}
// Restore the velocities.
if (timeShift != 0)
posDelta.copyTo(context.getVelm());
return 0.5*energy;
}
std::map<int, OpenCLArray>& OpenCLIntegrationUtilities::getNoseHooverChainState(){
return noseHooverChainState;
};
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLKernel.h"
#include "openmm/common/ComputeArray.h"
using namespace OpenMM;
using namespace std;
OpenCLKernel::OpenCLKernel(OpenCLContext& context, cl::Kernel kernel) : context(context), kernel(kernel) {
}
string OpenCLKernel::getName() const {
return kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
}
void OpenCLKernel::execute(int threads, int blockSize) {
// Set args that are specified by OpenCLArrays. We can't do this earlier, because it's
// possible resize() will get called on an array, causing its internal storage to be
// recreated.
for (int i = 0; i < arrayArgs.size(); i++)
if (arrayArgs[i] != NULL)
kernel.setArg<cl::Buffer>(i, arrayArgs[i]->getDeviceBuffer());
context.executeKernel(kernel, threads, blockSize);
}
void OpenCLKernel::addArrayArg(ArrayInterface& value) {
int index = arrayArgs.size();
addEmptyArg();
setArrayArg(index, value);
}
void OpenCLKernel::addPrimitiveArg(const void* value, int size) {
int index = arrayArgs.size();
addEmptyArg();
setPrimitiveArg(index, value, size);
}
void OpenCLKernel::addEmptyArg() {
arrayArgs.push_back(NULL);
}
void OpenCLKernel::setArrayArg(int index, ArrayInterface& value) {
arrayArgs[index] = &context.unwrap(value);
}
void OpenCLKernel::setPrimitiveArg(int index, const void* value, int size) {
// The const_cast is needed because of a bug in the OpenCL C++ wrappers. clSetKernelArg()
// declares the value to be const, but the C++ wrapper doesn't.
kernel.setArg(index, size, const_cast<void*>(value));
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2018 Stanford University and the Authors. *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -26,6 +26,7 @@
#include "OpenCLKernelFactory.h"
#include "OpenCLParallelKernels.h"
#include "openmm/common/CommonKernels.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
......@@ -75,61 +76,61 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
if (name == VirtualSitesKernel::Name())
return new OpenCLVirtualSitesKernel(name, platform, cl);
if (name == CalcHarmonicBondForceKernel::Name())
return new OpenCLCalcHarmonicBondForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcHarmonicBondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomBondForceKernel::Name())
return new OpenCLCalcCustomBondForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcCustomBondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcHarmonicAngleForceKernel::Name())
return new OpenCLCalcHarmonicAngleForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcHarmonicAngleForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomAngleForceKernel::Name())
return new OpenCLCalcCustomAngleForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcCustomAngleForceKernel(name, platform, cl, context.getSystem());
if (name == CalcPeriodicTorsionForceKernel::Name())
return new OpenCLCalcPeriodicTorsionForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcPeriodicTorsionForceKernel(name, platform, cl, context.getSystem());
if (name == CalcRBTorsionForceKernel::Name())
return new OpenCLCalcRBTorsionForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcRBTorsionForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCMAPTorsionForceKernel::Name())
return new OpenCLCalcCMAPTorsionForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcCMAPTorsionForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomTorsionForceKernel::Name())
return new OpenCLCalcCustomTorsionForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcCustomTorsionForceKernel(name, platform, cl, context.getSystem());
if (name == CalcNonbondedForceKernel::Name())
return new OpenCLCalcNonbondedForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomNonbondedForceKernel::Name())
return new OpenCLCalcCustomNonbondedForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcCustomNonbondedForceKernel(name, platform, cl, context.getSystem());
if (name == CalcGBSAOBCForceKernel::Name())
return new OpenCLCalcGBSAOBCForceKernel(name, platform, cl);
return new CommonCalcGBSAOBCForceKernel(name, platform, cl);
if (name == CalcCustomGBForceKernel::Name())
return new OpenCLCalcCustomGBForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcCustomGBForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomExternalForceKernel::Name())
return new OpenCLCalcCustomExternalForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcCustomExternalForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomHbondForceKernel::Name())
return new OpenCLCalcCustomHbondForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcCustomHbondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomCentroidBondForceKernel::Name())
return new OpenCLCalcCustomCentroidBondForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcCustomCentroidBondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomCompoundBondForceKernel::Name())
return new OpenCLCalcCustomCompoundBondForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcCustomCompoundBondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomCVForceKernel::Name())
return new OpenCLCalcCustomCVForceKernel(name, platform, cl);
if (name == CalcRMSDForceKernel::Name())
return new OpenCLCalcRMSDForceKernel(name, platform, cl);
return new CommonCalcRMSDForceKernel(name, platform, cl);
if (name == CalcCustomManyParticleForceKernel::Name())
return new OpenCLCalcCustomManyParticleForceKernel(name, platform, cl, context.getSystem());
return new CommonCalcCustomManyParticleForceKernel(name, platform, cl, context.getSystem());
if (name == CalcGayBerneForceKernel::Name())
return new OpenCLCalcGayBerneForceKernel(name, platform, cl);
return new CommonCalcGayBerneForceKernel(name, platform, cl);
if (name == IntegrateVerletStepKernel::Name())
return new OpenCLIntegrateVerletStepKernel(name, platform, cl);
return new CommonIntegrateVerletStepKernel(name, platform, cl);
if (name == IntegrateLangevinStepKernel::Name())
return new OpenCLIntegrateLangevinStepKernel(name, platform, cl);
return new CommonIntegrateLangevinStepKernel(name, platform, cl);
if (name == IntegrateBAOABStepKernel::Name())
return new OpenCLIntegrateBAOABStepKernel(name, platform, cl);
return new CommonIntegrateBAOABStepKernel(name, platform, cl);
if (name == IntegrateBrownianStepKernel::Name())
return new OpenCLIntegrateBrownianStepKernel(name, platform, cl);
return new CommonIntegrateBrownianStepKernel(name, platform, cl);
if (name == IntegrateVariableVerletStepKernel::Name())
return new OpenCLIntegrateVariableVerletStepKernel(name, platform, cl);
return new CommonIntegrateVariableVerletStepKernel(name, platform, cl);
if (name == IntegrateVariableLangevinStepKernel::Name())
return new OpenCLIntegrateVariableLangevinStepKernel(name, platform, cl);
return new CommonIntegrateVariableLangevinStepKernel(name, platform, cl);
if (name == IntegrateCustomStepKernel::Name())
return new OpenCLIntegrateCustomStepKernel(name, platform, cl);
return new CommonIntegrateCustomStepKernel(name, platform, cl);
if (name == ApplyAndersenThermostatKernel::Name())
return new OpenCLApplyAndersenThermostatKernel(name, platform, cl);
return new CommonApplyAndersenThermostatKernel(name, platform, cl);
if (name == NoseHooverChainKernel::Name())
return new OpenCLNoseHooverChainKernel(name, platform, cl);
if (name == IntegrateVelocityVerletStepKernel::Name())
......@@ -137,6 +138,6 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
if (name == ApplyMonteCarloBarostatKernel::Name())
return new OpenCLApplyMonteCarloBarostatKernel(name, platform, cl);
if (name == RemoveCMMotionKernel::Name())
return new OpenCLRemoveCMMotionKernel(name, platform, cl);
return new CommonRemoveCMMotionKernel(name, platform, cl);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
}
......@@ -27,7 +27,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "windowsExportOpenCL.h"
#include "openmm/common/windowsExportCommon.h"
#include <string>
namespace OpenMM {
......@@ -38,9 +38,9 @@ namespace OpenMM {
* kernels subfolder.
*/
class OPENMM_EXPORT_OPENCL OpenCLKernelSources {
class OPENMM_EXPORT_COMMON OpenCLKernelSources {
public:
@CL_FILE_DECLARATIONS@
@KERNEL_FILE_DECLARATIONS@
};
} // namespace OpenMM
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -27,6 +27,7 @@
#include "openmm/OpenMMException.h"
#include "OpenCLNonbondedUtilities.h"
#include "OpenCLArray.h"
#include "OpenCLContext.h"
#include "OpenCLKernelSources.h"
#include "OpenCLExpressionUtilities.h"
#include "OpenCLSort.h"
......@@ -124,10 +125,20 @@ void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic
}
}
void OpenCLNonbondedUtilities::addParameter(ComputeParameterInfo parameter) {
parameters.push_back(ParameterInfo(parameter.getName(), parameter.getComponentType(), parameter.getNumComponents(),
parameter.getSize(), context.unwrap(parameter.getArray()).getDeviceBuffer()));
}
void OpenCLNonbondedUtilities::addParameter(const ParameterInfo& parameter) {
parameters.push_back(parameter);
}
void OpenCLNonbondedUtilities::addArgument(ComputeParameterInfo parameter) {
arguments.push_back(ParameterInfo(parameter.getName(), parameter.getComponentType(), parameter.getNumComponents(),
parameter.getSize(), context.unwrap(parameter.getArray()).getDeviceBuffer()));
}
void OpenCLNonbondedUtilities::addArgument(const ParameterInfo& parameter) {
arguments.push_back(parameter);
}
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. *
* Portions copyright (c) 2011-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -156,7 +156,7 @@ void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& cont
for (int i = 0; i < (int) data.contexts.size(); i++) {
data.contextEnergy[i] = 0.0;
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new BeginComputationTask(context, cl, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionMemory, tileCounts[i]));
}
}
......@@ -164,7 +164,7 @@ void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& cont
double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new FinishComputationTask(context, cl, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceMemory, valid, tileCounts[i]));
}
data.syncContexts();
......@@ -210,7 +210,7 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
class OpenCLParallelCalcHarmonicBondForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcHarmonicBondForceKernel& kernel, bool includeForce,
Task(ContextImpl& context, CommonCalcHarmonicBondForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
......@@ -219,7 +219,7 @@ public:
}
private:
ContextImpl& context;
OpenCLCalcHarmonicBondForceKernel& kernel;
CommonCalcHarmonicBondForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
......@@ -227,7 +227,7 @@ private:
OpenCLParallelCalcHarmonicBondForceKernel::OpenCLParallelCalcHarmonicBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system) :
CalcHarmonicBondForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcHarmonicBondForceKernel(name, platform, *data.contexts[i], system)));
kernels.push_back(Kernel(new CommonCalcHarmonicBondForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
......@@ -238,7 +238,7 @@ void OpenCLParallelCalcHarmonicBondForceKernel::initialize(const System& system,
double OpenCLParallelCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
......@@ -251,7 +251,7 @@ void OpenCLParallelCalcHarmonicBondForceKernel::copyParametersToContext(ContextI
class OpenCLParallelCalcCustomBondForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomBondForceKernel& kernel, bool includeForce,
Task(ContextImpl& context, CommonCalcCustomBondForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
......@@ -260,7 +260,7 @@ public:
}
private:
ContextImpl& context;
OpenCLCalcCustomBondForceKernel& kernel;
CommonCalcCustomBondForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
......@@ -268,7 +268,7 @@ private:
OpenCLParallelCalcCustomBondForceKernel::OpenCLParallelCalcCustomBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system) :
CalcCustomBondForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomBondForceKernel(name, platform, *data.contexts[i], system)));
kernels.push_back(Kernel(new CommonCalcCustomBondForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
......@@ -279,7 +279,7 @@ void OpenCLParallelCalcCustomBondForceKernel::initialize(const System& system, c
double OpenCLParallelCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
......@@ -292,7 +292,7 @@ void OpenCLParallelCalcCustomBondForceKernel::copyParametersToContext(ContextImp
class OpenCLParallelCalcHarmonicAngleForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcHarmonicAngleForceKernel& kernel, bool includeForce,
Task(ContextImpl& context, CommonCalcHarmonicAngleForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
......@@ -301,7 +301,7 @@ public:
}
private:
ContextImpl& context;
OpenCLCalcHarmonicAngleForceKernel& kernel;
CommonCalcHarmonicAngleForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
......@@ -309,7 +309,7 @@ private:
OpenCLParallelCalcHarmonicAngleForceKernel::OpenCLParallelCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system) :
CalcHarmonicAngleForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcHarmonicAngleForceKernel(name, platform, *data.contexts[i], system)));
kernels.push_back(Kernel(new CommonCalcHarmonicAngleForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
......@@ -320,7 +320,7 @@ void OpenCLParallelCalcHarmonicAngleForceKernel::initialize(const System& system
double OpenCLParallelCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
......@@ -333,7 +333,7 @@ void OpenCLParallelCalcHarmonicAngleForceKernel::copyParametersToContext(Context
class OpenCLParallelCalcCustomAngleForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomAngleForceKernel& kernel, bool includeForce,
Task(ContextImpl& context, CommonCalcCustomAngleForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
......@@ -342,7 +342,7 @@ public:
}
private:
ContextImpl& context;
OpenCLCalcCustomAngleForceKernel& kernel;
CommonCalcCustomAngleForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
......@@ -350,7 +350,7 @@ private:
OpenCLParallelCalcCustomAngleForceKernel::OpenCLParallelCalcCustomAngleForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system) :
CalcCustomAngleForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomAngleForceKernel(name, platform, *data.contexts[i], system)));
kernels.push_back(Kernel(new CommonCalcCustomAngleForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
......@@ -361,7 +361,7 @@ void OpenCLParallelCalcCustomAngleForceKernel::initialize(const System& system,
double OpenCLParallelCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
......@@ -374,7 +374,7 @@ void OpenCLParallelCalcCustomAngleForceKernel::copyParametersToContext(ContextIm
class OpenCLParallelCalcPeriodicTorsionForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcPeriodicTorsionForceKernel& kernel, bool includeForce,
Task(ContextImpl& context, CommonCalcPeriodicTorsionForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
......@@ -383,7 +383,7 @@ public:
}
private:
ContextImpl& context;
OpenCLCalcPeriodicTorsionForceKernel& kernel;
CommonCalcPeriodicTorsionForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
......@@ -391,7 +391,7 @@ private:
OpenCLParallelCalcPeriodicTorsionForceKernel::OpenCLParallelCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system) :
CalcPeriodicTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcPeriodicTorsionForceKernel(name, platform, *data.contexts[i], system)));
kernels.push_back(Kernel(new CommonCalcPeriodicTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
......@@ -402,7 +402,7 @@ void OpenCLParallelCalcPeriodicTorsionForceKernel::initialize(const System& syst
double OpenCLParallelCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
......@@ -415,7 +415,7 @@ void OpenCLParallelCalcPeriodicTorsionForceKernel::copyParametersToContext(Conte
class OpenCLParallelCalcRBTorsionForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcRBTorsionForceKernel& kernel, bool includeForce,
Task(ContextImpl& context, CommonCalcRBTorsionForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
......@@ -424,7 +424,7 @@ public:
}
private:
ContextImpl& context;
OpenCLCalcRBTorsionForceKernel& kernel;
CommonCalcRBTorsionForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
......@@ -432,7 +432,7 @@ private:
OpenCLParallelCalcRBTorsionForceKernel::OpenCLParallelCalcRBTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system) :
CalcRBTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcRBTorsionForceKernel(name, platform, *data.contexts[i], system)));
kernels.push_back(Kernel(new CommonCalcRBTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
......@@ -443,7 +443,7 @@ void OpenCLParallelCalcRBTorsionForceKernel::initialize(const System& system, co
double OpenCLParallelCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
......@@ -456,7 +456,7 @@ void OpenCLParallelCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl
class OpenCLParallelCalcCMAPTorsionForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCMAPTorsionForceKernel& kernel, bool includeForce,
Task(ContextImpl& context, CommonCalcCMAPTorsionForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
......@@ -465,7 +465,7 @@ public:
}
private:
ContextImpl& context;
OpenCLCalcCMAPTorsionForceKernel& kernel;
CommonCalcCMAPTorsionForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
......@@ -473,7 +473,7 @@ private:
OpenCLParallelCalcCMAPTorsionForceKernel::OpenCLParallelCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system) :
CalcCMAPTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCMAPTorsionForceKernel(name, platform, *data.contexts[i], system)));
kernels.push_back(Kernel(new CommonCalcCMAPTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
......@@ -484,7 +484,7 @@ void OpenCLParallelCalcCMAPTorsionForceKernel::initialize(const System& system,
double OpenCLParallelCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
......@@ -497,7 +497,7 @@ void OpenCLParallelCalcCMAPTorsionForceKernel::copyParametersToContext(ContextIm
class OpenCLParallelCalcCustomTorsionForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomTorsionForceKernel& kernel, bool includeForce,
Task(ContextImpl& context, CommonCalcCustomTorsionForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
......@@ -506,7 +506,7 @@ public:
}
private:
ContextImpl& context;
OpenCLCalcCustomTorsionForceKernel& kernel;
CommonCalcCustomTorsionForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
......@@ -514,7 +514,7 @@ private:
OpenCLParallelCalcCustomTorsionForceKernel::OpenCLParallelCalcCustomTorsionForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system) :
CalcCustomTorsionForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomTorsionForceKernel(name, platform, *data.contexts[i], system)));
kernels.push_back(Kernel(new CommonCalcCustomTorsionForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
......@@ -525,7 +525,7 @@ void OpenCLParallelCalcCustomTorsionForceKernel::initialize(const System& system
double OpenCLParallelCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
......@@ -566,7 +566,7 @@ void OpenCLParallelCalcNonbondedForceKernel::initialize(const System& system, co
double OpenCLParallelCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, includeDirect, includeReciprocal, data.contextEnergy[i]));
}
return 0.0;
......@@ -587,7 +587,7 @@ void OpenCLParallelCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, i
class OpenCLParallelCalcCustomNonbondedForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomNonbondedForceKernel& kernel, bool includeForce,
Task(ContextImpl& context, CommonCalcCustomNonbondedForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
......@@ -596,7 +596,7 @@ public:
}
private:
ContextImpl& context;
OpenCLCalcCustomNonbondedForceKernel& kernel;
CommonCalcCustomNonbondedForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
......@@ -604,7 +604,7 @@ private:
OpenCLParallelCalcCustomNonbondedForceKernel::OpenCLParallelCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system) :
CalcCustomNonbondedForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomNonbondedForceKernel(name, platform, *data.contexts[i], system)));
kernels.push_back(Kernel(new CommonCalcCustomNonbondedForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
......@@ -615,7 +615,7 @@ void OpenCLParallelCalcCustomNonbondedForceKernel::initialize(const System& syst
double OpenCLParallelCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
......@@ -628,7 +628,7 @@ void OpenCLParallelCalcCustomNonbondedForceKernel::copyParametersToContext(Conte
class OpenCLParallelCalcCustomExternalForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomExternalForceKernel& kernel, bool includeForce,
Task(ContextImpl& context, CommonCalcCustomExternalForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
......@@ -637,7 +637,7 @@ public:
}
private:
ContextImpl& context;
OpenCLCalcCustomExternalForceKernel& kernel;
CommonCalcCustomExternalForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
......@@ -645,7 +645,7 @@ private:
OpenCLParallelCalcCustomExternalForceKernel::OpenCLParallelCalcCustomExternalForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system) :
CalcCustomExternalForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomExternalForceKernel(name, platform, *data.contexts[i], system)));
kernels.push_back(Kernel(new CommonCalcCustomExternalForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
......@@ -656,7 +656,7 @@ void OpenCLParallelCalcCustomExternalForceKernel::initialize(const System& syste
double OpenCLParallelCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
......@@ -669,7 +669,7 @@ void OpenCLParallelCalcCustomExternalForceKernel::copyParametersToContext(Contex
class OpenCLParallelCalcCustomHbondForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomHbondForceKernel& kernel, bool includeForce,
Task(ContextImpl& context, CommonCalcCustomHbondForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
......@@ -678,7 +678,7 @@ public:
}
private:
ContextImpl& context;
OpenCLCalcCustomHbondForceKernel& kernel;
CommonCalcCustomHbondForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
......@@ -686,7 +686,7 @@ private:
OpenCLParallelCalcCustomHbondForceKernel::OpenCLParallelCalcCustomHbondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system) :
CalcCustomHbondForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomHbondForceKernel(name, platform, *data.contexts[i], system)));
kernels.push_back(Kernel(new CommonCalcCustomHbondForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomHbondForceKernel::initialize(const System& system, const CustomHbondForce& force) {
......@@ -697,7 +697,7 @@ void OpenCLParallelCalcCustomHbondForceKernel::initialize(const System& system,
double OpenCLParallelCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
......@@ -710,7 +710,7 @@ void OpenCLParallelCalcCustomHbondForceKernel::copyParametersToContext(ContextIm
class OpenCLParallelCalcCustomCompoundBondForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomCompoundBondForceKernel& kernel, bool includeForce,
Task(ContextImpl& context, CommonCalcCustomCompoundBondForceKernel& kernel, bool includeForce,
bool includeEnergy, double& energy) : context(context), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), energy(energy) {
}
......@@ -719,7 +719,7 @@ public:
}
private:
ContextImpl& context;
OpenCLCalcCustomCompoundBondForceKernel& kernel;
CommonCalcCustomCompoundBondForceKernel& kernel;
bool includeForce, includeEnergy;
double& energy;
};
......@@ -727,7 +727,7 @@ private:
OpenCLParallelCalcCustomCompoundBondForceKernel::OpenCLParallelCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, OpenCLPlatform::PlatformData& data, const System& system) :
CalcCustomCompoundBondForceKernel(name, platform), data(data) {
for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new OpenCLCalcCustomCompoundBondForceKernel(name, platform, *data.contexts[i], system)));
kernels.push_back(Kernel(new CommonCalcCustomCompoundBondForceKernel(name, platform, *data.contexts[i], system)));
}
void OpenCLParallelCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) {
......@@ -738,7 +738,7 @@ void OpenCLParallelCalcCustomCompoundBondForceKernel::initialize(const System& s
double OpenCLParallelCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
OpenCLContext& cl = *data.contexts[i];
OpenCLContext::WorkThread& thread = cl.getWorkThread();
ComputeContext::WorkThread& thread = cl.getWorkThread();
thread.addTask(new Task(context, getKernel(i), includeForces, includeEnergy, data.contextEnergy[i]));
}
return 0.0;
......
......@@ -25,181 +25,13 @@
* -------------------------------------------------------------------------- */
#include "OpenCLParameterSet.h"
#include "openmm/OpenMMException.h"
#include <cmath>
#include <sstream>
using namespace OpenMM;
using namespace std;
OpenCLParameterSet::OpenCLParameterSet(OpenCLContext& context, int numParameters, int numObjects, const string& name, bool bufferPerParameter, bool useDoublePrecision) :
context(context), numParameters(numParameters), numObjects(numObjects), name(name) {
int params = numParameters;
int bufferCount = 0;
elementSize = (useDoublePrecision ? sizeof(double) : sizeof(float));
string elementType = (useDoublePrecision ? "double" : "float");
try {
if (!bufferPerParameter) {
while (params > 2) {
cl::Buffer* buf = new cl::Buffer(context.getContext(), CL_MEM_READ_WRITE, numObjects*elementSize*4);
std::stringstream name;
name << "param" << (++bufferCount);
buffers.push_back(OpenCLNonbondedUtilities::ParameterInfo(name.str(), elementType, 4, elementSize*4, *buf));
params -= 4;
}
if (params > 1) {
cl::Buffer* buf = new cl::Buffer(context.getContext(), CL_MEM_READ_WRITE, numObjects*elementSize*2);
std::stringstream name;
name << "param" << (++bufferCount);
buffers.push_back(OpenCLNonbondedUtilities::ParameterInfo(name.str(), elementType, 2, elementSize*2, *buf));
params -= 2;
}
}
while (params > 0) {
cl::Buffer* buf = new cl::Buffer(context.getContext(), CL_MEM_READ_WRITE, numObjects*elementSize);
std::stringstream name;
name << "param" << (++bufferCount);
buffers.push_back(OpenCLNonbondedUtilities::ParameterInfo(name.str(), elementType, 1, elementSize, *buf));
params--;
}
}
catch (cl::Error err) {
stringstream str;
str<<"Error creating parameter set "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
OpenCLParameterSet::~OpenCLParameterSet() {
for (int i = 0; i < (int) buffers.size(); i++)
delete &buffers[i].getMemory();
}
template <class T>
void OpenCLParameterSet::getParameterValues(vector<vector<T> >& values) const {
if (sizeof(T) != elementSize)
throw OpenMMException("Called getParameterValues() with vector of wrong type");
values.resize(numObjects);
for (int i = 0; i < numObjects; i++)
values[i].resize(numParameters);
try {
int base = 0;
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getSize() == 4*elementSize) {
vector<T> data(4*numObjects);
context.getQueue().enqueueReadBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++) {
values[j][base] = data[4*j];
if (base+1 < numParameters)
values[j][base+1] = data[4*j+1];
if (base+2 < numParameters)
values[j][base+2] = data[4*j+2];
if (base+3 < numParameters)
values[j][base+3] = data[4*j+3];
}
base += 4;
}
else if (buffers[i].getSize() == 2*elementSize) {
vector<T> data(2*numObjects);
context.getQueue().enqueueReadBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++) {
values[j][base] = data[2*j];
if (base+1 < numParameters)
values[j][base+1] = data[2*j+1];
}
base += 2;
}
else if (buffers[i].getSize() == elementSize) {
vector<T> data(numObjects);
context.getQueue().enqueueReadBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++)
values[j][base] = data[j];
base++;
}
else
throw OpenMMException("Internal error: Unknown buffer type in OpenCLParameterSet");
}
}
catch (cl::Error err) {
stringstream str;
str<<"Error downloading parameter set "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
ComputeParameterSet(context, numParameters, numObjects, name, bufferPerParameter, useDoublePrecision) {
for (auto& info : getParameterInfos()) {
buffers.push_back(OpenCLNonbondedUtilities::ParameterInfo(info.getName(), info.getComponentType(), info.getNumComponents(), info.getSize(), context.unwrap(info.getArray()).getDeviceBuffer()));
}
}
template <class T>
void OpenCLParameterSet::setParameterValues(const vector<vector<T> >& values) {
if (sizeof(T) != elementSize)
throw OpenMMException("Called setParameterValues() with vector of wrong type");
try {
int base = 0;
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getSize() == 4*elementSize) {
vector<T> data(4*numObjects);
for (int j = 0; j < numObjects; j++) {
data[4*j] = values[j][base];
if (base+1 < numParameters)
data[4*j+1] = values[j][base+1];
if (base+2 < numParameters)
data[4*j+2] = values[j][base+2];
if (base+3 < numParameters)
data[4*j+3] = values[j][base+3];
}
context.getQueue().enqueueWriteBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
base += 4;
}
else if (buffers[i].getSize() == 2*elementSize) {
vector<T> data(2*numObjects);
for (int j = 0; j < numObjects; j++) {
data[2*j] = values[j][base];
if (base+1 < numParameters)
data[2*j+1] = values[j][base+1];
}
context.getQueue().enqueueWriteBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
base += 2;
}
else if (buffers[i].getSize() == elementSize) {
vector<T> data(numObjects);
for (int j = 0; j < numObjects; j++)
data[j] = values[j][base];
context.getQueue().enqueueWriteBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
base++;
}
else
throw OpenMMException("Internal error: Unknown buffer type in OpenCLParameterSet");
}
}
catch (cl::Error err) {
stringstream str;
str<<"Error uploading parameter set "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
string OpenCLParameterSet::getParameterSuffix(int index, const std::string& extraSuffix) const {
const string suffixes[] = {".x", ".y", ".z", ".w"};
int buffer = -1;
for (int i = 0; buffer == -1 && i < (int) buffers.size(); i++) {
if (index*elementSize < buffers[i].getSize())
buffer = i;
else
index -= buffers[i].getSize()/elementSize;
}
if (buffer == -1)
throw OpenMMException("Internal error: Illegal argument to OpenCLParameterSet::getParameterSuffix() ("+name+")");
stringstream suffix;
suffix << (buffer+1) << extraSuffix;
if (buffers[buffer].getSize() != elementSize)
suffix << suffixes[index];
return suffix.str();
}
/**
* Define template instantiations for float and double versions of getParameterValues() and setParameterValues().
*/
namespace OpenMM {
template OPENMM_EXPORT_OPENCL void OpenCLParameterSet::getParameterValues<float>(vector<vector<float> >& values) const;
template OPENMM_EXPORT_OPENCL void OpenCLParameterSet::setParameterValues<float>(const vector<vector<float> >& values);
template OPENMM_EXPORT_OPENCL void OpenCLParameterSet::getParameterValues<double>(vector<vector<double> >& values) const;
template OPENMM_EXPORT_OPENCL void OpenCLParameterSet::setParameterValues<double>(const vector<vector<double> >& values);
}
\ No newline at end of file
......@@ -43,13 +43,13 @@
using namespace OpenMM;
using namespace std;
#ifdef OPENMM_OPENCL_BUILDING_STATIC_LIBRARY
#ifdef OPENMM_COMMON_BUILDING_STATIC_LIBRARY
extern "C" void registerOpenCLPlatform() {
if (OpenCLPlatform::isPlatformSupported())
Platform::registerPlatform(new OpenCLPlatform());
}
#else
extern "C" OPENMM_EXPORT_OPENCL void registerPlatforms() {
extern "C" OPENMM_EXPORT_COMMON void registerPlatforms() {
if (OpenCLPlatform::isPlatformSupported())
Platform::registerPlatform(new OpenCLPlatform());
}
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLProgram.h"
#include "OpenCLKernel.h"
using namespace OpenMM;
using namespace std;
OpenCLProgram::OpenCLProgram(OpenCLContext& context, cl::Program program) : context(context), program(program) {
}
ComputeKernel OpenCLProgram::createKernel(const string& name) {
cl::Kernel kernel = cl::Kernel(program, name.c_str());
return shared_ptr<ComputeKernelImpl>(new OpenCLKernel(context, kernel));
}
\ No newline at end of file
/**
* Apply the Andersen thermostat to adjust particle velocities.
*/
__kernel void applyAndersenThermostat(float collisionFrequency, float kT, __global mixed4* velm, __global const mixed2* restrict stepSize, __global const float4* restrict random,
unsigned int randomIndex, __global const int* restrict atomGroups) {
float collisionProbability = (float) (1.0f-exp(-collisionFrequency*stepSize[0].y));
float randomRange = (float) erf(collisionProbability/exp(2.0f));
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
mixed4 velocity = velm[index];
float4 selectRand = random[randomIndex+atomGroups[index]];
float4 velRand = random[randomIndex+index];
real scale = (selectRand.w > -randomRange && selectRand.w < randomRange ? 0 : 1);
real add = (1-scale)*sqrt(kT*velocity.w);
velocity.x = scale*velocity.x + add*velRand.x;
velocity.y = scale*velocity.y + add*velRand.y;
velocity.z = scale*velocity.z + add*velRand.z;
velm[index] = velocity;
}
}
enum {VelScale, NoiseScale};
/**
* Perform the first part of BAOAB integration: velocity half step, then position half step.
*/
__kernel void integrateBAOABPart1(__global mixed4* restrict velm, __global const real4* restrict force, __global mixed4* restrict posDelta,
__global mixed4* restrict oldDelta, __global const mixed2* restrict dt) {
mixed halfdt = 0.5*dt[0].y;
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
velocity.x += halfdt*velocity.w*force[index].x;
velocity.y += halfdt*velocity.w*force[index].y;
velocity.z += halfdt*velocity.w*force[index].z;
velm[index] = velocity;
mixed4 delta = halfdt*velocity;
posDelta[index] = delta;
oldDelta[index] = delta;
}
}
}
/**
* Perform the second part of BAOAB integration: apply constraint forces to velocities, then interact with heat bath,
* then position half step.
*/
__kernel void integrateBAOABPart2(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm, __global mixed4* restrict posDelta,
__global mixed4* restrict oldDelta, __global const mixed* restrict paramBuffer, __global const mixed2* restrict dt, __global const float4* restrict random, unsigned int randomIndex) {
mixed vscale = paramBuffer[VelScale];
mixed noisescale = paramBuffer[NoiseScale];
mixed halfdt = 0.5*dt[0].y;
mixed invHalfdt = 1/halfdt;
int index = get_global_id(0);
randomIndex += index;
while (index < NUM_ATOMS) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
mixed4 delta = posDelta[index];
mixed sqrtInvMass = SQRT(velocity.w);
velocity.xyz += (delta.xyz-oldDelta[index].xyz)*invHalfdt;
velocity.x = vscale*velocity.x + noisescale*sqrtInvMass*random[randomIndex].x;
velocity.y = vscale*velocity.y + noisescale*sqrtInvMass*random[randomIndex].y;
velocity.z = vscale*velocity.z + noisescale*sqrtInvMass*random[randomIndex].z;
velm[index] = velocity;
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
pos.xyz += delta.xyz;
#ifdef USE_MIXED_PRECISION
posq[index] = convert_real4(pos);
posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
delta = halfdt*velocity;
posDelta[index] = delta;
oldDelta[index] = delta;
}
randomIndex += get_global_size(0);
index += get_global_size(0);
}
}
/**
* Perform the third part of BAOAB integration: apply constraint forces to velocities, then record
* the constrained positions in preparation for computing forces.
*/
__kernel void integrateBAOABPart3(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm,
__global mixed4* restrict posDelta, __global mixed4* restrict oldDelta, __global const mixed2* restrict dt) {
mixed halfdt = 0.5*dt[0].y;
mixed invHalfdt = 1/halfdt;
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
mixed4 delta = posDelta[index];
velocity.x += (delta.x-oldDelta[index].x)*invHalfdt;
velocity.y += (delta.y-oldDelta[index].y)*invHalfdt;
velocity.z += (delta.z-oldDelta[index].z)*invHalfdt;
velm[index] = velocity;
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
pos.xyz += delta.xyz;
#ifdef USE_MIXED_PRECISION
posq[index] = convert_real4(pos);
posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
}
}
}
/**
* Perform the fourth part of BAOAB integration: velocity half step.
*/
__kernel void integrateBAOABPart4(__global mixed4* restrict velm, __global const real4* restrict force, __global const mixed2* restrict dt) {
mixed halfdt = 0.5*dt[0].y;
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
velocity.x += halfdt*velocity.w*force[index].x;
velocity.y += halfdt*velocity.w*force[index].y;
velocity.z += halfdt*velocity.w*force[index].z;
velm[index] = velocity;
}
}
}
/**
* Perform the first step of Brownian integration.
*/
__kernel void integrateBrownianPart1(mixed tauDeltaT, mixed noiseAmplitude, __global const real4* restrict force,
__global mixed4* restrict posDelta, __global const mixed4* restrict velm, __global const float4* restrict random, unsigned int randomIndex) {
randomIndex += get_global_id(0);
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
mixed invMass = velm[index].w;
if (invMass != 0) {
posDelta[index] = (mixed4) (tauDeltaT*invMass*force[index].x + noiseAmplitude*sqrt(invMass)*random[randomIndex].x,
tauDeltaT*invMass*force[index].y + noiseAmplitude*sqrt(invMass)*random[randomIndex].y,
tauDeltaT*invMass*force[index].z + noiseAmplitude*sqrt(invMass)*random[randomIndex].z, 0);
}
randomIndex += get_global_size(0);
}
}
/**
* Perform the second step of Brownian integration.
*/
__kernel void integrateBrownianPart2(mixed oneOverDeltaT, __global real4* posq, __global real4* posqCorrection, __global mixed4* velm, __global const mixed4* restrict posDelta) {
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
if (velm[index].w != 0) {
mixed4 delta = posDelta[index];
velm[index].x = oneOverDeltaT*delta.x;
velm[index].y = oneOverDeltaT*delta.y;
velm[index].z = oneOverDeltaT*delta.z;
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
real4 pos = posq[index];
#endif
pos.x += delta.x;
pos.y += delta.y;
pos.z += delta.z;
#ifdef USE_MIXED_PRECISION
posq[index] = (real4) ((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
}
}
}
mixed4 loadPos(__global const real4* restrict posq, __global const real4* restrict posqCorrection, int index) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
return (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
return posq[index];
#endif
}
/**
* Compute the direction each constraint is pointing in. This is called once at the beginning of constraint evaluation.
*/
__kernel void computeConstraintDirections(__global const int2* restrict constraintAtoms, __global mixed4* restrict constraintDistance,
__global const real4* restrict atomPositions, __global const real4* restrict posCorrection, __global int* restrict converged) {
for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) {
// Compute the direction for this constraint.
int2 atoms = constraintAtoms[index];
mixed4 dir = constraintDistance[index];
mixed4 oldPos1 = loadPos(atomPositions, posCorrection, atoms.x);
mixed4 oldPos2 = loadPos(atomPositions, posCorrection, atoms.y);
dir.x = oldPos1.x-oldPos2.x;
dir.y = oldPos1.y-oldPos2.y;
dir.z = oldPos1.z-oldPos2.z;
constraintDistance[index] = dir;
}
if (get_global_id(0) == 0) {
converged[0] = 1;
converged[1] = 0;
}
}
/**
* Compute the force applied by each constraint.
*/
__kernel void computeConstraintForce(__global const int2* restrict constraintAtoms, __global const mixed4* restrict constraintDistance, __global const mixed4* restrict atomPositions,
__global const mixed* restrict reducedMass, __global mixed* restrict delta1, __global int* restrict converged, __global int* restrict hostConvergedFlag, mixed tol, int iteration) {
__local int groupConverged;
if (converged[1-iteration%2]) {
if (get_global_id(0) == 0) {
converged[iteration%2] = 1;
hostConvergedFlag[0] = 1;
}
return; // The constraint iteration has already converged.
}
if (get_local_id(0) == 0)
groupConverged = 1;
barrier(CLK_LOCAL_MEM_FENCE);
mixed lowerTol = 1-2*tol+tol*tol;
mixed upperTol = 1+2*tol+tol*tol;
for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) {
// Compute the force due to this constraint.
int2 atoms = constraintAtoms[index];
mixed4 dir = constraintDistance[index];
mixed4 rp_ij = atomPositions[atoms.x]-atomPositions[atoms.y];
#ifndef CONSTRAIN_VELOCITIES
rp_ij.xyz += dir.xyz;
#endif
mixed rrpr = rp_ij.x*dir.x + rp_ij.y*dir.y + rp_ij.z*dir.z;
mixed d_ij2 = dir.x*dir.x + dir.y*dir.y + dir.z*dir.z;
#ifdef CONSTRAIN_VELOCITIES
delta1[index] = -2*reducedMass[index]*rrpr/d_ij2;
// See whether it has converged.
if (groupConverged && fabs(delta1[index]) > tol) {
groupConverged = 0;
converged[iteration%2] = 0;
}
#else
mixed rp2 = rp_ij.x*rp_ij.x + rp_ij.y*rp_ij.y + rp_ij.z*rp_ij.z;
mixed dist2 = dir.w*dir.w;
mixed diff = dist2 - rp2;
delta1[index] = (rrpr > d_ij2*1e-6f ? reducedMass[index]*diff/rrpr : 0.0f);
// See whether it has converged.
if (groupConverged && (rp2 < lowerTol*dist2 || rp2 > upperTol*dist2)) {
groupConverged = 0;
converged[iteration%2] = 0;
}
#endif
}
}
/**
* Multiply the vector of constraint forces by the constraint matrix.
*/
__kernel void multiplyByConstraintMatrix(__global const mixed* restrict delta1, __global mixed* restrict delta2, __global const int* restrict constraintMatrixColumn,
__global const mixed* restrict constraintMatrixValue, __global const int* restrict converged, int iteration) {
if (converged[iteration%2])
return; // The constraint iteration has already converged.
// Multiply by the inverse constraint matrix.
for (int index = get_global_id(0); index < NUM_CONSTRAINTS; index += get_global_size(0)) {
mixed sum = 0;
for (int i = 0; ; i++) {
int element = index+i*NUM_CONSTRAINTS;
int column = constraintMatrixColumn[element];
if (column >= NUM_CONSTRAINTS)
break;
sum += delta1[column]*constraintMatrixValue[element];
}
delta2[index] = sum;
}
}
/**
* Update the atom positions based on constraint forces.
*/
__kernel void updateAtomPositions(__global const int* restrict numAtomConstraints, __global const int* restrict atomConstraints, __global const mixed4* restrict constraintDistance,
__global mixed4* restrict atomPositions, __global const mixed4* restrict velm, __global const mixed* restrict delta1, __global const mixed* restrict delta2, __global int* restrict converged, int iteration) {
if (get_global_id(0) == 0)
converged[1-iteration%2] = 1;
if (converged[iteration%2])
return; // The constraint iteration has already converged.
mixed damping = (iteration < 2 ? 0.5f : 1.0f);
for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
// Compute the new position of this atom.
mixed4 atomPos = atomPositions[index];
mixed invMass = velm[index].w;
int num = numAtomConstraints[index];
for (int i = 0; i < num; i++) {
int constraint = atomConstraints[index+i*NUM_ATOMS];
bool forward = (constraint > 0);
constraint = (forward ? constraint-1 : -constraint-1);
mixed constraintForce = damping*invMass*delta2[constraint];
constraintForce = (forward ? constraintForce : -constraintForce);
mixed4 dir = constraintDistance[constraint];
atomPos.x += constraintForce*dir.x;
atomPos.y += constraintForce*dir.y;
atomPos.z += constraintForce*dir.z;
}
atomPositions[index] = atomPos;
}
}
const real PI = 3.14159265358979323846f;
// Compute the first angle.
real4 v0a = (real4) (pos1.xyz-pos2.xyz, 0.0f);
real4 v1a = (real4) (pos3.xyz-pos2.xyz, 0.0f);
real4 v2a = (real4) (pos3.xyz-pos4.xyz, 0.0f);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0a)
APPLY_PERIODIC_TO_DELTA(v1a)
APPLY_PERIODIC_TO_DELTA(v2a)
#endif
real4 cp0a = cross(v0a, v1a);
real4 cp1a = cross(v1a, v2a);
real cosangle = dot(normalize(cp0a), normalize(cp1a));
real angleA;
if (cosangle > 0.99f || cosangle < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real4 cross_prod = cross(cp0a, cp1a);
real scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
angleA = asin(SQRT(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0.0f)
angleA = PI-angleA;
}
else
angleA = acos(cosangle);
angleA = (dot(v0a, cp1a) >= 0 ? angleA : -angleA);
angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
// Compute the second angle.
real4 v0b = (real4) (pos5.xyz-pos6.xyz, 0.0f);
real4 v1b = (real4) (pos7.xyz-pos6.xyz, 0.0f);
real4 v2b = (real4) (pos7.xyz-pos8.xyz, 0.0f);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0b)
APPLY_PERIODIC_TO_DELTA(v1b)
APPLY_PERIODIC_TO_DELTA(v2b)
#endif
real4 cp0b = cross(v0b, v1b);
real4 cp1b = cross(v1b, v2b);
cosangle = dot(normalize(cp0b), normalize(cp1b));
real angleB;
if (cosangle > 0.99f || cosangle < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real4 cross_prod = cross(cp0b, cp1b);
real scale = dot(cp0b, cp0b)*dot(cp1b, cp1b);
angleB = asin(SQRT(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0.0f)
angleB = PI-angleB;
}
else
angleB = acos(cosangle);
angleB = (dot(v0b, cp1b) >= 0 ? angleB : -angleB);
angleB = fmod(angleB+2.0f*PI, 2.0f*PI);
// Identify which patch this is in.
int2 pos = MAP_POS[MAPS[index]];
int size = pos.y;
real delta = 2*PI/size;
int s = (int) (angleA/delta);
int t = (int) (angleB/delta);
float4 c[4];
int coeffIndex = pos.x+4*(s+size*t);
c[0] = COEFF[coeffIndex];
c[1] = COEFF[coeffIndex+1];
c[2] = COEFF[coeffIndex+2];
c[3] = COEFF[coeffIndex+3];
real da = angleA/delta-s;
real db = angleB/delta-t;
// Evaluate the spline to determine the energy and gradients.
real torsionEnergy = 0.0f;
real dEdA = 0.0f;
real dEdB = 0.0f;
torsionEnergy = da*torsionEnergy + ((c[3].w*db + c[3].z)*db + c[3].y)*db + c[3].x;
dEdA = db*dEdA + (3.0f*c[3].w*da + 2.0f*c[2].w)*da + c[1].w;
dEdB = da*dEdB + (3.0f*c[3].w*db + 2.0f*c[3].z)*db + c[3].y;
torsionEnergy = da*torsionEnergy + ((c[2].w*db + c[2].z)*db + c[2].y)*db + c[2].x;
dEdA = db*dEdA + (3.0f*c[3].z*da + 2.0f*c[2].z)*da + c[1].z;
dEdB = da*dEdB + (3.0f*c[2].w*db + 2.0f*c[2].z)*db + c[2].y;
torsionEnergy = da*torsionEnergy + ((c[1].w*db + c[1].z)*db + c[1].y)*db + c[1].x;
dEdA = db*dEdA + (3.0f*c[3].y*da + 2.0f*c[2].y)*da + c[1].y;
dEdB = da*dEdB + (3.0f*c[1].w*db + 2.0f*c[1].z)*db + c[1].y;
torsionEnergy = da*torsionEnergy + ((c[0].w*db + c[0].z)*db + c[0].y)*db + c[0].x;
dEdA = db*dEdA + (3.0f*c[3].x*da + 2.0f*c[2].x)*da + c[1].x;
dEdB = da*dEdB + (3.0f*c[0].w*db + 2.0f*c[0].z)*db + c[0].y;
dEdA /= delta;
dEdB /= delta;
energy += torsionEnergy;
// Apply the force to the first torsion.
real normCross1 = dot(cp0a, cp0a);
real normSqrBC = dot(v1a, v1a);
real normBC = SQRT(normSqrBC);
real normCross2 = dot(cp1a, cp1a);
real dp = 1.0f/normSqrBC;
real4 ff = (real4) ((-dEdA*normBC)/normCross1, dot(v0a, v1a)*dp, dot(v2a, v1a)*dp, (dEdA*normBC)/normCross2);
real4 force1 = ff.x*cp0a;
real4 force4 = ff.w*cp1a;
real4 d = ff.y*force1 - ff.z*force4;
real4 force2 = d-force1;
real4 force3 = -d-force4;
// Apply the force to the second torsion.
normCross1 = dot(cp0b, cp0b);
normSqrBC = dot(v1b, v1b);
normBC = SQRT(normSqrBC);
normCross2 = dot(cp1b, cp1b);
dp = 1.0f/normSqrBC;
ff = (real4) ((-dEdB*normBC)/normCross1, dot(v0b, v1b)*dp, dot(v2b, v1b)*dp, (dEdB*normBC)/normCross2);
real4 force5 = ff.x*cp0b;
real4 force8 = ff.w*cp1b;
d = ff.y*force5 - ff.z*force8;
real4 force6 = d-force5;
real4 force7 = -d-force8;
/**
* This file contains OpenCL definitions for the macros and functions needed for the
* common compute framework.
*/
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#ifdef SUPPORTS_64_BIT_ATOMICS
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#endif
#define KERNEL __kernel
#define DEVICE
#define LOCAL __local
#define LOCAL_ARG __local
#define GLOBAL __global
#define RESTRICT restrict
#define LOCAL_ID get_local_id(0)
#define LOCAL_SIZE get_local_size(0)
#define GLOBAL_ID get_global_id(0)
#define GLOBAL_SIZE get_global_size(0)
#define GROUP_ID get_group_id(0)
#define NUM_GROUPS get_num_groups(0)
#define SYNC_THREADS barrier(CLK_LOCAL_MEM_FENCE+CLK_GLOBAL_MEM_FENCE);
#define MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE+CLK_GLOBAL_MEM_FENCE);
#define ATOMIC_ADD(dest, value) atom_add(dest, value)
typedef long mm_long;
typedef unsigned long mm_ulong;
#define make_short2(x...) ((short2) (x))
#define make_short3(x...) ((short3) (x))
#define make_short4(x...) ((short4) (x))
#define make_int2(x...) ((int2) (x))
#define make_int3(x...) ((int3) (x))
#define make_int4(x...) ((int4) (x))
#define make_float2(x...) ((float2) (x))
#define make_float3(x...) ((float3) (x))
#define make_float4(x...) ((float4) (x))
#define make_double2(x...) ((double2) (x))
#define make_double3(x...) ((double3) (x))
#define make_double4(x...) ((double4) (x))
#define trimTo3(v) (v).xyz
// OpenCL has overloaded versions of standard math functions for single and double
// precision arguments. CUDA has separate functions. To allow them to be called
// consistently, we define the "single precision" functions to just be synonyms
// for the standard ones.
#define sqrtf(x) sqrt(x)
#define rsqrtf(x) rsqrt(x)
#define expf(x) exp(x)
#define logf(x) log(x)
#define powf(x) pow(x)
#define cosf(x) cos(x)
#define sinf(x) sin(x)
#define tanf(x) tan(x)
#define acosf(x) acos(x)
#define asinf(x) asin(x)
#define atanf(x) atan(x)
#define atan2f(x, y) atan2(x, y)
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
real4 ccb_delta(real4 vec1, real4 vec2, bool periodic, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
if (periodic)
APPLY_PERIODIC_TO_DELTA(result);
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
}
/**
* Compute the angle between two vectors. The w component of each vector should contain the squared magnitude.
*/
real ccb_computeAngle(real4 vec1, real4 vec2) {
real dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
real cosine = dotProduct*RSQRT(vec1.w*vec2.w);
real angle;
if (cosine > 0.99f || cosine < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real4 crossProduct = cross(vec1, vec2);
real scale = vec1.w*vec2.w;
angle = asin(SQRT(dot(crossProduct, crossProduct)/scale));
if (cosine < 0)
angle = M_PI-angle;
}
else
angle = acos(cosine);
return angle;
}
/**
* Compute the cross product of two vectors, setting the fourth component to the squared magnitude.
*/
real4 ccb_computeCross(real4 vec1, real4 vec2) {
real4 result = cross(vec1, vec2);
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
}
COMPUTE_FORCE
real4 force1 = (real4) (-dEdX, -dEdY, -dEdZ, 0);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment