Commit c8dac206 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to implement double precision in OpenCL

parent 34938e2c
......@@ -58,7 +58,7 @@ void OpenCLBondedUtilities::addInteraction(const vector<vector<int> >& atoms, co
std::string OpenCLBondedUtilities::addArgument(cl::Memory& data, const string& type) {
arguments.push_back(&data);
argTypes.push_back(type);
return "customArg"+OpenCLExpressionUtilities::intToString(arguments.size());
return "customArg"+context.intToString(arguments.size());
}
void OpenCLBondedUtilities::addPrefixCode(const string& source) {
......@@ -164,17 +164,17 @@ void OpenCLBondedUtilities::initialize(const System& system) {
stringstream s;
for (int i = 0; i < (int) prefixCode.size(); i++)
s<<prefixCode[i];
s<<"__kernel void computeBondedForces(__global float4* restrict forceBuffers, __global float* restrict energyBuffer, __global const float4* restrict posq, int groups";
s<<"__kernel void computeBondedForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, int groups";
for (int i = 0; i < setSize; i++) {
int force = set[i];
string indexType = "uint"+(indexWidth[force] == 1 ? "" : OpenCLExpressionUtilities::intToString(indexWidth[force]));
string indexType = "uint"+(indexWidth[force] == 1 ? "" : context.intToString(indexWidth[force]));
s<<", __global const "<<indexType<<"* restrict atomIndices"<<i;
s<<", __global const "<<indexType<<"* restrict bufferIndices"<<i;
}
for (int i = 0; i < (int) arguments.size(); i++)
s<<", __global "<<argTypes[i]<<"* customArg"<<(i+1);
s<<") {\n";
s<<"float energy = 0.0f;\n";
s<<"real energy = 0.0f;\n";
for (int i = 0; i < setSize; i++) {
int force = set[i];
s<<createForceSource(i, forceAtoms[force].size(), forceAtoms[force][0].size(), forceGroup[force], forceSource[force]);
......@@ -182,7 +182,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
s<<"energyBuffer[get_global_id(0)] += energy;\n";
s<<"}\n";
map<string, string> defines;
defines["PADDED_NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getPaddedNumAtoms());
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
cl::Program program = context.createProgram(s.str(), defines);
kernels.push_back(cl::Kernel(program, "computeBondedForces"));
}
......@@ -206,7 +206,7 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
suffix = suffix4;
else
suffix = suffix16;
string indexType = "uint"+(width == 1 ? "" : OpenCLExpressionUtilities::intToString(width));
string indexType = "uint"+(width == 1 ? "" : context.intToString(width));
stringstream s;
s<<"if ((groups&"<<(1<<group)<<") != 0)\n";
s<<"for (unsigned int index = get_global_id(0); index < "<<numBonds<<"; index += get_global_size(0)) {\n";
......@@ -214,13 +214,13 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
s<<" "<<indexType<<" buffers = bufferIndices"<<forceIndex<<"[index];\n";
for (int i = 0; i < numAtoms; i++) {
s<<" unsigned int atom"<<(i+1)<<" = atoms"<<suffix[i]<<";\n";
s<<" float4 pos"<<(i+1)<<" = posq[atom"<<(i+1)<<"];\n";
s<<" real4 pos"<<(i+1)<<" = posq[atom"<<(i+1)<<"];\n";
}
s<<computeForce<<"\n";
for (int i = 0; i < numAtoms; i++) {
s<<" {\n";
s<<" unsigned int offset = atom"<<(i+1)<<"+buffers"<<suffix[i]<<"*PADDED_NUM_ATOMS;\n";
s<<" float4 force = forceBuffers[offset];\n";
s<<" real4 force = forceBuffers[offset];\n";
s<<" force.xyz += force"<<(i+1)<<".xyz;\n";
s<<" forceBuffers[offset] = force;\n";
s<<" }\n";
......
......@@ -68,7 +68,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), atomsWereReordered(false), posq(NULL),
posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL),
bonded(NULL), nonbonded(NULL), thread(NULL) {
expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
if (precision == "single") {
useDoublePrecision = false;
useMixedPrecision = false;
......@@ -145,7 +145,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
this->deviceIndex = deviceIndex;
if (device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>() < minThreadBlockSize)
throw OpenMMException("The specified OpenCL device is not compatible with OpenMM");
compilationDefines["WORK_GROUP_SIZE"] = OpenCLExpressionUtilities::intToString(ThreadBlockSize);
compilationDefines["WORK_GROUP_SIZE"] = intToString(ThreadBlockSize);
if (platformVendor.size() >= 5 && platformVendor.substr(0, 5) == "Intel")
defaultOptimizationOptions = "";
else
......@@ -269,7 +269,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
clearFourBuffersKernel = cl::Kernel(utilities, "clearFourBuffers");
clearFiveBuffersKernel = cl::Kernel(utilities, "clearFiveBuffers");
clearSixBuffersKernel = cl::Kernel(utilities, "clearSixBuffers");
reduceFloat4Kernel = cl::Kernel(utilities, "reduceFloat4Buffer");
reduceReal4Kernel = cl::Kernel(utilities, "reduceReal4Buffer");
reduceForcesKernel = cl::Kernel(utilities, "reduceForces");
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
......@@ -316,9 +316,10 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
thread = new WorkThread();
// Create the integration utilities object.
// Create utilities objects.
integration = new OpenCLIntegrationUtilities(*this, system);
expression = new OpenCLExpressionUtilities(*this);
}
OpenCLContext::~OpenCLContext() {
......@@ -346,6 +347,8 @@ OpenCLContext::~OpenCLContext() {
delete atomIndexDevice;
if (integration != NULL)
delete integration;
if (expression != NULL)
delete expression;
if (bonded != NULL)
delete bonded;
if (nonbonded != NULL)
......@@ -376,10 +379,10 @@ void OpenCLContext::initialize() {
reduceForcesKernel.setArg<cl::Buffer>(1, forceBuffers->getDeviceBuffer());
reduceForcesKernel.setArg<cl_int>(2, paddedNumAtoms);
reduceForcesKernel.setArg<cl_int>(3, numForceBuffers);
addAutoclearBuffer(longForceBuffer->getDeviceBuffer(), longForceBuffer->getSize()*2);
addAutoclearBuffer(*longForceBuffer);
}
addAutoclearBuffer(forceBuffers->getDeviceBuffer(), forceBuffers->getSize()*4);
addAutoclearBuffer(energyBuffer->getDeviceBuffer(), energyBuffer->getSize());
addAutoclearBuffer(*forceBuffers);
addAutoclearBuffer(*energyBuffer);
int bufferBytes = max(posq->getSize()*posq->getElementSize(), energyBuffer->getSize()*energyBuffer->getElementSize());
pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedMemory = queue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
......@@ -479,6 +482,21 @@ cl::Program OpenCLContext::createProgram(const string source, const map<string,
return program;
}
string OpenCLContext::doubleToString(double value) {
stringstream s;
s.precision(useDoublePrecision ? 16 : 8);
s << scientific << value;
if (!useDoublePrecision)
s << "f";
return s.str();
}
string OpenCLContext::intToString(int value) {
stringstream s;
s << value;
return s.str();
}
void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSize) {
if (blockSize == -1)
blockSize = ThreadBlockSize;
......@@ -494,18 +512,23 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
}
void OpenCLContext::clearBuffer(OpenCLArray& array) {
clearBuffer(array.getDeviceBuffer(), array.getSize()*array.getElementSize()/sizeof(cl_float));
clearBuffer(array.getDeviceBuffer(), array.getSize()*array.getElementSize());
}
void OpenCLContext::clearBuffer(cl::Memory& memory, int size) {
int words = size/4;
clearBufferKernel.setArg<cl::Memory>(0, memory);
clearBufferKernel.setArg<cl_int>(1, size);
executeKernel(clearBufferKernel, size, 128);
clearBufferKernel.setArg<cl_int>(1, words);
executeKernel(clearBufferKernel, words, 128);
}
void OpenCLContext::addAutoclearBuffer(OpenCLArray& array) {
addAutoclearBuffer(array.getDeviceBuffer(), array.getSize()*array.getElementSize());
}
void OpenCLContext::addAutoclearBuffer(cl::Memory& memory, int size) {
autoclearBuffers.push_back(&memory);
autoclearBufferSizes.push_back(size);
autoclearBufferSizes.push_back(size/4);
}
void OpenCLContext::clearAutoclearBuffers() {
......@@ -581,10 +604,10 @@ void OpenCLContext::reduceForces() {
void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
int bufferSize = array.getSize()/numBuffers;
reduceFloat4Kernel.setArg<cl::Buffer>(0, array.getDeviceBuffer());
reduceFloat4Kernel.setArg<cl_int>(1, bufferSize);
reduceFloat4Kernel.setArg<cl_int>(2, numBuffers);
executeKernel(reduceFloat4Kernel, bufferSize, 128);
reduceReal4Kernel.setArg<cl::Buffer>(0, array.getDeviceBuffer());
reduceReal4Kernel.setArg<cl_int>(1, bufferSize);
reduceReal4Kernel.setArg<cl_int>(2, numBuffers);
executeKernel(reduceReal4Kernel, bufferSize, 128);
}
void OpenCLContext::tagAtomsInMolecule(int atom, int molecule, vector<int>& atomMolecule, vector<vector<int> >& atomBonds) {
......
......@@ -45,6 +45,7 @@ namespace OpenMM {
class OpenCLArray;
class OpenCLForceInfo;
class OpenCLIntegrationUtilities;
class OpenCLExpressionUtilities;
class OpenCLBondedUtilities;
class OpenCLNonbondedUtilities;
class System;
......@@ -314,14 +315,18 @@ public:
* Set all elements of an array to 0.
*
* @param memory the Memory to clear
* @param size the number of float elements in the buffer
* @param size the size of the buffer in bytes
*/
void clearBuffer(cl::Memory& memory, int size);
/**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*/
void addAutoclearBuffer(OpenCLArray& array);
/**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*
* @param memory the Memory to clear
* @param size the number of float elements in the buffer
* @param size the size of the buffer in bytes
*/
void addAutoclearBuffer(cl::Memory& memory, int size);
/**
......@@ -329,7 +334,7 @@ public:
*/
void clearAutoclearBuffers();
/**
* Given a collection of buffers packed into an array, sum them and store
* Given a collection of floating point buffers packed into an array, sum them and store
* the sum in the first buffer.
*
* @param array the array containing the buffers to reduce
......@@ -437,6 +442,15 @@ public:
bool getUseMixedPrecision() {
return useMixedPrecision;
}
/**
* Convert a number to a string in a format suitable for including in a kernel.
* This takes into account whether the context uses single or double precision.
*/
std::string doubleToString(double value);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
std::string intToString(int value);
/**
* Get the size of the periodic box.
*/
......@@ -476,6 +490,12 @@ public:
OpenCLIntegrationUtilities& getIntegrationUtilities() {
return *integration;
}
/**
* Get the OpenCLExpressionUtilities for this context.
*/
OpenCLExpressionUtilities& getExpressionUtilities() {
return *expression;
}
/**
* Get the OpenCLBondedUtilities for this context.
*/
......@@ -580,7 +600,7 @@ private:
cl::Kernel clearFourBuffersKernel;
cl::Kernel clearFiveBuffersKernel;
cl::Kernel clearSixBuffersKernel;
cl::Kernel reduceFloat4Kernel;
cl::Kernel reduceReal4Kernel;
cl::Kernel reduceForcesKernel;
std::vector<OpenCLForceInfo*> forces;
std::vector<Molecule> molecules;
......@@ -601,6 +621,7 @@ private:
std::vector<int> autoclearBufferSizes;
std::vector<ReorderListener*> reorderListeners;
OpenCLIntegrationUtilities* integration;
OpenCLExpressionUtilities* expression;
OpenCLBondedUtilities* bonded;
OpenCLNonbondedUtilities* nonbonded;
WorkThread* thread;
......
......@@ -33,19 +33,6 @@ using namespace OpenMM;
using namespace Lepton;
using namespace std;
string OpenCLExpressionUtilities::doubleToString(double value) {
stringstream s;
s.precision(8);
s << scientific << value << "f";
return s.str();
}
string OpenCLExpressionUtilities::intToString(int value) {
stringstream s;
s << value;
return s.str();
}
string OpenCLExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const map<string, string>& variables,
const vector<pair<string, string> >& functions, const string& prefix, const string& functionParams, const string& tempType) {
vector<pair<ExpressionTreeNode, string> > variableNodes;
......@@ -75,13 +62,13 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
return;
for (int i = 0; i < (int) node.getChildren().size(); i++)
processExpression(out, node.getChildren()[i], temps, functions, prefix, functionParams, allExpressions, tempType);
string name = prefix+intToString(temps.size());
string name = prefix+context.intToString(temps.size());
bool hasRecordedNode = false;
out << tempType << " " << name << " = ";
switch (node.getOperation().getId()) {
case Operation::CONSTANT:
out << doubleToString(dynamic_cast<const Operation::Constant*>(&node.getOperation())->getValue());
out << context.doubleToString(dynamic_cast<const Operation::Constant*>(&node.getOperation())->getValue());
break;
case Operation::VARIABLE:
throw OpenMMException("Unknown variable in expression: "+node.getOperation().getName());
......@@ -107,7 +94,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
string valueName = name;
string derivName = name;
if (valueNode != NULL && derivNode != NULL) {
string name2 = prefix+intToString(temps.size());
string name2 = prefix+context.intToString(temps.size());
out << tempType << " " << name2 << " = 0.0f;\n";
if (isDeriv) {
valueName = name2;
......@@ -236,10 +223,10 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out << "RECIP(" << getTempName(node.getChildren()[0], temps) << ")";
break;
case Operation::ADD_CONSTANT:
out << doubleToString(dynamic_cast<const Operation::AddConstant*>(&node.getOperation())->getValue()) << "+" << getTempName(node.getChildren()[0], temps);
out << context.doubleToString(dynamic_cast<const Operation::AddConstant*>(&node.getOperation())->getValue()) << "+" << getTempName(node.getChildren()[0], temps);
break;
case Operation::MULTIPLY_CONSTANT:
out << doubleToString(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()) << "*" << getTempName(node.getChildren()[0], temps);
out << context.doubleToString(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()) << "*" << getTempName(node.getChildren()[0], temps);
break;
case Operation::POWER_CONSTANT:
{
......@@ -266,7 +253,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
for (map<int, const ExpressionTreeNode*>::const_iterator iter = powers.begin(); iter != powers.end(); ++iter) {
if (iter->first != exponent) {
exponents.push_back(iter->first >= 0 ? iter->first : -iter->first);
string name2 = prefix+intToString(temps.size());
string name2 = prefix+context.intToString(temps.size());
names.push_back(name2);
temps.push_back(make_pair(*iter->second, name2));
out << tempType << " " << name2 << " = 0.0f;\n";
......@@ -295,7 +282,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out << "}";
}
else
out << "pow(" << getTempName(node.getChildren()[0], temps) << ", " << doubleToString(exponent) << ")";
out << "pow(" << getTempName(node.getChildren()[0], temps) << ", " << context.doubleToString(exponent) << ")";
break;
}
case Operation::MIN:
......
......@@ -45,6 +45,8 @@ namespace OpenMM {
class OPENMM_EXPORT OpenCLExpressionUtilities {
public:
OpenCLExpressionUtilities(OpenCLContext& context) : context(context) {
}
/**
* Generate the source code for calculating a set of expressions.
*
......@@ -54,10 +56,10 @@ public:
* @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "float")
* @param tempType the type of value to use for temporary variables (defaults to "real")
*/
static std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::map<std::string, std::string>& variables,
const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams, const std::string& tempType="float");
std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::map<std::string, std::string>& variables,
const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams, const std::string& tempType="real");
/**
* Generate the source code for calculating a set of expressions.
*
......@@ -69,7 +71,7 @@ public:
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "float")
*/
static std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variables,
std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variables,
const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams, const std::string& tempType="float");
/**
* Calculate the spline coefficients for a tabulated function that appears in expressions.
......@@ -79,26 +81,19 @@ public:
* @param max the value of the independent variable corresponding to the last element of values
* @return the spline coefficients
*/
static std::vector<mm_float4> computeFunctionCoefficients(const std::vector<double>& values, double min, double max);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static std::string doubleToString(double value);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static std::string intToString(int value);
std::vector<mm_float4> computeFunctionCoefficients(const std::vector<double>& values, double min, double max);
class FunctionPlaceholder;
private:
static void processExpression(std::stringstream& out, const Lepton::ExpressionTreeNode& node,
void processExpression(std::stringstream& out, const Lepton::ExpressionTreeNode& node,
std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps,
const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams,
const std::vector<Lepton::ParsedExpression>& allExpressions, const std::string& tempType);
static std::string getTempName(const Lepton::ExpressionTreeNode& node, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps);
static void findRelatedTabulatedFunctions(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
std::string getTempName(const Lepton::ExpressionTreeNode& node, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps);
void findRelatedTabulatedFunctions(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
const Lepton::ExpressionTreeNode*& valueNode, const Lepton::ExpressionTreeNode*& derivNode);
static void findRelatedPowers(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
void findRelatedPowers(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
std::map<int, const Lepton::ExpressionTreeNode*>& powers);
OpenCLContext& context;
};
/**
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2011 Stanford University and the Authors. *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -47,15 +47,15 @@ void OpenCLFFT3D::execFFT(OpenCLArray& in, OpenCLArray& out, bool forward) {
maxSize = 1;
zkernel.setArg<cl::Buffer>(0, in.getDeviceBuffer());
zkernel.setArg<cl::Buffer>(1, out.getDeviceBuffer());
zkernel.setArg<cl_float>(2, forward ? 1.0f : -1.0f);
zkernel.setArg<cl_int>(2, forward ? 1 : -1);
context.executeKernel(zkernel, xsize*ysize*zsize, min(zsize, (int) maxSize));
xkernel.setArg<cl::Buffer>(0, out.getDeviceBuffer());
xkernel.setArg<cl::Buffer>(1, in.getDeviceBuffer());
xkernel.setArg<cl_float>(2, forward ? 1.0f : -1.0f);
xkernel.setArg<cl_int>(2, forward ? 1 : -1);
context.executeKernel(xkernel, xsize*ysize*zsize, min(xsize, (int) maxSize));
ykernel.setArg<cl::Buffer>(0, in.getDeviceBuffer());
ykernel.setArg<cl::Buffer>(1, out.getDeviceBuffer());
ykernel.setArg<cl_float>(2, forward ? 1.0f : -1.0f);
ykernel.setArg<cl_int>(2, forward ? 1 : -1);
context.executeKernel(ykernel, xsize*ysize*zsize, min(ysize, (int) maxSize));
}
......@@ -99,23 +99,23 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source<<"int i = get_local_id(0);\n";
}
source<<"int j = i/"<<m<<";\n";
source<<"float2 c0 = data"<<input<<"[i];\n";
source<<"float2 c1 = data"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"float2 c2 = data"<<input<<"[i+"<<(2*L*m)<<"];\n";
source<<"float2 c3 = data"<<input<<"[i+"<<(3*L*m)<<"];\n";
source<<"float2 c4 = data"<<input<<"[i+"<<(4*L*m)<<"];\n";
source<<"float2 d0 = c1+c4;\n";
source<<"float2 d1 = c2+c3;\n";
source<<"float2 d2 = "<<OpenCLExpressionUtilities::doubleToString(sin(0.4*M_PI))<<"*(c1-c4);\n";
source<<"float2 d3 = "<<OpenCLExpressionUtilities::doubleToString(sin(0.4*M_PI))<<"*(c2-c3);\n";
source<<"float2 d4 = d0+d1;\n";
source<<"float2 d5 = "<<OpenCLExpressionUtilities::doubleToString(0.25*sqrt(5.0))<<"*(d0-d1);\n";
source<<"float2 d6 = c0-0.25f*d4;\n";
source<<"float2 d7 = d6+d5;\n";
source<<"float2 d8 = d6-d5;\n";
string coeff = OpenCLExpressionUtilities::doubleToString(sin(0.2*M_PI)/sin(0.4*M_PI));
source<<"float2 d9 = sign*(float2) (d2.y+"<<coeff<<"*d3.y, -d2.x-"<<coeff<<"*d3.x);\n";
source<<"float2 d10 = sign*(float2) ("<<coeff<<"*d2.y-d3.y, d3.x-"<<coeff<<"*d2.x);\n";
source<<"real2 c0 = data"<<input<<"[i];\n";
source<<"real2 c1 = data"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"real2 c2 = data"<<input<<"[i+"<<(2*L*m)<<"];\n";
source<<"real2 c3 = data"<<input<<"[i+"<<(3*L*m)<<"];\n";
source<<"real2 c4 = data"<<input<<"[i+"<<(4*L*m)<<"];\n";
source<<"real2 d0 = c1+c4;\n";
source<<"real2 d1 = c2+c3;\n";
source<<"real2 d2 = "<<context.doubleToString(sin(0.4*M_PI))<<"*(c1-c4);\n";
source<<"real2 d3 = "<<context.doubleToString(sin(0.4*M_PI))<<"*(c2-c3);\n";
source<<"real2 d4 = d0+d1;\n";
source<<"real2 d5 = "<<context.doubleToString(0.25*sqrt(5.0))<<"*(d0-d1);\n";
source<<"real2 d6 = c0-0.25f*d4;\n";
source<<"real2 d7 = d6+d5;\n";
source<<"real2 d8 = d6-d5;\n";
string coeff = context.doubleToString(sin(0.2*M_PI)/sin(0.4*M_PI));
source<<"real2 d9 = sign*(real2) (d2.y+"<<coeff<<"*d3.y, -d2.x-"<<coeff<<"*d3.x);\n";
source<<"real2 d10 = sign*(real2) ("<<coeff<<"*d2.y-d3.y, d3.x-"<<coeff<<"*d2.x);\n";
source<<"data"<<output<<"[i+4*j*"<<m<<"] = c0+d4;\n";
source<<"data"<<output<<"[i+(4*j+1)*"<<m<<"] = multiplyComplex(w[j*"<<zsize<<"/"<<(5*L)<<"], d7+d9);\n";
source<<"data"<<output<<"[i+(4*j+2)*"<<m<<"] = multiplyComplex(w[j*"<<(2*zsize)<<"/"<<(5*L)<<"], d8+d10);\n";
......@@ -134,14 +134,14 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source<<"int i = get_local_id(0);\n";
}
source<<"int j = i/"<<m<<";\n";
source<<"float2 c0 = data"<<input<<"[i];\n";
source<<"float2 c1 = data"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"float2 c2 = data"<<input<<"[i+"<<(2*L*m)<<"];\n";
source<<"float2 c3 = data"<<input<<"[i+"<<(3*L*m)<<"];\n";
source<<"float2 d0 = c0+c2;\n";
source<<"float2 d1 = c0-c2;\n";
source<<"float2 d2 = c1+c3;\n";
source<<"float2 d3 = sign*(float2) (c1.y-c3.y, c3.x-c1.x);\n";
source<<"real2 c0 = data"<<input<<"[i];\n";
source<<"real2 c1 = data"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"real2 c2 = data"<<input<<"[i+"<<(2*L*m)<<"];\n";
source<<"real2 c3 = data"<<input<<"[i+"<<(3*L*m)<<"];\n";
source<<"real2 d0 = c0+c2;\n";
source<<"real2 d1 = c0-c2;\n";
source<<"real2 d2 = c1+c3;\n";
source<<"real2 d3 = sign*(real2) (c1.y-c3.y, c3.x-c1.x);\n";
source<<"data"<<output<<"[i+3*j*"<<m<<"] = d0+d2;\n";
source<<"data"<<output<<"[i+(3*j+1)*"<<m<<"] = multiplyComplex(w[j*"<<zsize<<"/"<<(4*L)<<"], d1+d3);\n";
source<<"data"<<output<<"[i+(3*j+2)*"<<m<<"] = multiplyComplex(w[j*"<<(2*zsize)<<"/"<<(4*L)<<"], d0-d2);\n";
......@@ -159,12 +159,12 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source<<"int i = get_local_id(0);\n";
}
source<<"int j = i/"<<m<<";\n";
source<<"float2 c0 = data"<<input<<"[i];\n";
source<<"float2 c1 = data"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"float2 c2 = data"<<input<<"[i+"<<(2*L*m)<<"];\n";
source<<"float2 d0 = c1+c2;\n";
source<<"float2 d1 = c0-0.5f*d0;\n";
source<<"float2 d2 = sign*"<<OpenCLExpressionUtilities::doubleToString(sin(M_PI/3.0))<<"*(float2) (c1.y-c2.y, c2.x-c1.x);\n";
source<<"real2 c0 = data"<<input<<"[i];\n";
source<<"real2 c1 = data"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"real2 c2 = data"<<input<<"[i+"<<(2*L*m)<<"];\n";
source<<"real2 d0 = c1+c2;\n";
source<<"real2 d1 = c0-0.5f*d0;\n";
source<<"real2 d2 = sign*"<<context.doubleToString(sin(M_PI/3.0))<<"*(real2) (c1.y-c2.y, c2.x-c1.x);\n";
source<<"data"<<output<<"[i+2*j*"<<m<<"] = c0+d0;\n";
source<<"data"<<output<<"[i+(2*j+1)*"<<m<<"] = multiplyComplex(w[j*"<<zsize<<"/"<<(3*L)<<"], d1+d2);\n";
source<<"data"<<output<<"[i+(2*j+2)*"<<m<<"] = multiplyComplex(w[j*"<<(2*zsize)<<"/"<<(3*L)<<"], d1-d2);\n";
......@@ -181,15 +181,15 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source<<"int i = get_local_id(0);\n";
}
source<<"int j = i/"<<m<<";\n";
source<<"float2 c0 = data"<<input<<"[i];\n";
source<<"float2 c1 = data"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"real2 c0 = data"<<input<<"[i];\n";
source<<"real2 c1 = data"<<input<<"[i+"<<(L*m)<<"];\n";
source<<"data"<<output<<"[i+j*"<<m<<"] = c0+c1;\n";
source<<"data"<<output<<"[i+(j+1)*"<<m<<"] = multiplyComplex(w[j*"<<zsize<<"/"<<(2*L)<<"], c0-c1);\n";
source<<"}\n";
m = m*2;
}
else
throw OpenMMException("Illegal size for FFT: "+OpenCLExpressionUtilities::intToString(zsize));
throw OpenMMException("Illegal size for FFT: "+context.intToString(zsize));
source<<"barrier(CLK_LOCAL_MEM_FENCE);\n";
source<<"}\n";
++stage;
......@@ -205,16 +205,17 @@ cl::Kernel OpenCLFFT3D::createKernel(int xsize, int ysize, int zsize) {
source<<"out[y*(ZSIZE*XSIZE)+get_local_id(0)*XSIZE+x] = data"<<(stage%2)<<"[get_local_id(0)];\n";
source<<"barrier(CLK_GLOBAL_MEM_FENCE);";
map<string, string> replacements;
replacements["XSIZE"] = OpenCLExpressionUtilities::intToString(xsize);
replacements["YSIZE"] = OpenCLExpressionUtilities::intToString(ysize);
replacements["ZSIZE"] = OpenCLExpressionUtilities::intToString(zsize);
replacements["M_PI"] = OpenCLExpressionUtilities::doubleToString(M_PI);
replacements["XSIZE"] = context.intToString(xsize);
replacements["YSIZE"] = context.intToString(ysize);
replacements["ZSIZE"] = context.intToString(zsize);
replacements["M_PI"] = context.doubleToString(M_PI);
replacements["COMPUTE_FFT"] = source.str();
replacements["LOOP_REQUIRED"] = (loopRequired ? "1" : "0");
cl::Program program = context.createProgram(context.replaceStrings(OpenCLKernelSources::fft, replacements));
cl::Kernel kernel(program, "execFFT");
kernel.setArg(3, zsize*sizeof(mm_float2), NULL);
kernel.setArg(4, zsize*sizeof(mm_float2), NULL);
kernel.setArg(5, zsize*sizeof(mm_float2), NULL);
int bufferSize = zsize*(context.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2));
kernel.setArg(3, bufferSize, NULL);
kernel.setArg(4, bufferSize, NULL);
kernel.setArg(5, bufferSize, NULL);
return kernel;
}
......@@ -559,8 +559,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Create the CCMA kernels.
map<string, string> defines;
defines["NUM_CONSTRAINTS"] = OpenCLExpressionUtilities::intToString(numCCMA);
defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(numAtoms);
defines["NUM_CONSTRAINTS"] = context.intToString(numCCMA);
defines["NUM_ATOMS"] = context.intToString(numAtoms);
cl::Program ccmaProgram = context.createProgram(OpenCLKernelSources::ccma, defines);
ccmaDirectionsKernel = cl::Kernel(ccmaProgram, "computeConstraintDirections");
ccmaPosForceKernel = cl::Kernel(ccmaProgram, "computeConstraintForce");
......@@ -630,9 +630,9 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Create the kernels for virtual sites.
map<string, string> defines;
defines["NUM_2_AVERAGE"] = OpenCLExpressionUtilities::intToString(num2Avg);
defines["NUM_3_AVERAGE"] = OpenCLExpressionUtilities::intToString(num3Avg);
defines["NUM_OUT_OF_PLANE"] = OpenCLExpressionUtilities::intToString(numOutOfPlane);
defines["NUM_2_AVERAGE"] = context.intToString(num2Avg);
defines["NUM_3_AVERAGE"] = context.intToString(num3Avg);
defines["NUM_OUT_OF_PLANE"] = context.intToString(numOutOfPlane);
cl::Program vsiteProgram = context.createProgram(OpenCLKernelSources::virtualSites, defines);
vsitePositionKernel = cl::Kernel(vsiteProgram, "computeVirtualSites");
vsitePositionKernel.setArg<cl::Buffer>(0, context.getPosq().getDeviceBuffer());
......
......@@ -53,19 +53,6 @@ using namespace std;
using Lepton::ExpressionTreeNode;
using Lepton::Operation;
static string doubleToString(double value) {
stringstream s;
s.precision(8);
s << scientific << value << "f";
return s.str();
}
static string intToString(int value) {
stringstream s;
s << value;
return s.str();
}
static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseMixedPrecision())
kernel.setArg<cl::Buffer>(index, cl.getPosqCorrection().getDeviceBuffer());
......@@ -73,6 +60,20 @@ static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int inde
kernel.setArg<void*>(index, NULL);
}
static void setPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision())
kernel.setArg<mm_double4>(index, cl.getPeriodicBoxSizeDouble());
else
kernel.setArg<mm_float4>(index, cl.getPeriodicBoxSize());
}
static void setInvPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision())
kernel.setArg<mm_double4>(index, cl.getInvPeriodicBoxSizeDouble());
else
kernel.setArg<mm_float4>(index, cl.getInvPeriodicBoxSize());
}
static bool isZeroExpression(const Lepton::ParsedExpression& expression) {
const Lepton::Operation& op = expression.getRootNode().getOperation();
if (op.getId() != Lepton::Operation::CONSTANT)
......@@ -124,11 +125,19 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
double sum = 0.0f;
if (includeEnergy) {
OpenCLArray& energyArray = cl.getEnergyBuffer();
cl_float* energy = (cl_float*) cl.getPinnedBuffer();
if (cl.getUseDoublePrecision()) {
double* energy = (double*) cl.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
sum += energy[i];
}
else {
float* energy = (float*) cl.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
sum += energy[i];
}
}
return sum;
}
......@@ -401,7 +410,7 @@ void OpenCLApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
cl::Program program = cl.createProgram(OpenCLKernelSources::constraints, defines);
applyDeltasKernel = cl::Kernel(program, "applyPositionDeltas");
applyDeltasKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
......@@ -571,7 +580,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
map<string, Lepton::ParsedExpression> expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdR = "] = forceExpression;
expressions["real dEdR = "] = forceExpression;
// Create the kernels.
......@@ -587,7 +596,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+intToString(i)+"]";
string value = argName+"["+cl.intToString(i)+"]";
variables[name] = value;
}
}
......@@ -598,7 +607,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
compute<<buffer.getType()<<" bondParams"<<(i+1)<<" = "<<argName<<"[index];\n";
}
vector<pair<string, string> > functions;
compute << OpenCLExpressionUtilities::createExpressions(expressions, variables, functions, "temp", "");
compute << cl.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
map<string, string> replacements;
replacements["COMPUTE_FORCE"] = compute.str();
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::bondForce, replacements), force.getForceGroup());
......@@ -796,7 +805,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("theta").optimize();
map<string, Lepton::ParsedExpression> expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdAngle = "] = forceExpression;
expressions["real dEdAngle = "] = forceExpression;
// Create the kernels.
......@@ -812,7 +821,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+intToString(i)+"]";
string value = argName+"["+cl.intToString(i)+"]";
variables[name] = value;
}
}
......@@ -823,7 +832,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
compute<<buffer.getType()<<" angleParams"<<(i+1)<<" = "<<argName<<"[index];\n";
}
vector<pair<string, string> > functions;
compute << OpenCLExpressionUtilities::createExpressions(expressions, variables, functions, "temp", "");
compute << cl.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
map<string, string> replacements;
replacements["COMPUTE_FORCE"] = compute.str();
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::angleForce, replacements), force.getForceGroup());
......@@ -1194,7 +1203,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("theta").optimize();
map<string, Lepton::ParsedExpression> expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdAngle = "] = forceExpression;
expressions["real dEdAngle = "] = forceExpression;
// Create the kernels.
......@@ -1210,7 +1219,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+intToString(i)+"]";
string value = argName+"["+cl.intToString(i)+"]";
variables[name] = value;
}
}
......@@ -1221,7 +1230,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
compute<<buffer.getType()<<" torsionParams"<<(i+1)<<" = "<<argName<<"[index];\n";
}
vector<pair<string, string> > functions;
compute << OpenCLExpressionUtilities::createExpressions(expressions, variables, functions, "temp", "");
compute << cl.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
map<string, string> replacements;
replacements["COMPUTE_FORCE"] = compute.str();
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup());
......@@ -1349,7 +1358,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
int numParticles = force.getNumParticles();
sigmaEpsilon = OpenCLArray::create<mm_float2>(cl, cl.getPaddedNumAtoms(), "sigmaEpsilon");
vector<mm_float4> posq(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_float4> posqf(cl.getPaddedNumAtoms());
vector<mm_double4> posqd(cl.getPaddedNumAtoms());
vector<mm_float2> sigmaEpsilonVector(cl.getPaddedNumAtoms());
vector<vector<int> > exclusionList(numParticles);
double sumSquaredCharges = 0.0;
......@@ -1358,7 +1368,10 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
posq[i].w = (float) charge;
if (cl.getUseDoublePrecision())
posqd[i] = mm_double4(0, 0, 0, charge);
else
posqf[i] = mm_float4(0, 0, 0, (float) charge);
sigmaEpsilonVector[i] = mm_float2((float) (0.5*sigma), (float) (2.0*sqrt(epsilon)));
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
......@@ -1371,7 +1384,10 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
exclusionList[exclusions[i].first].push_back(exclusions[i].second);
exclusionList[exclusions[i].second].push_back(exclusions[i].first);
}
cl.getPosq().upload(posq);
if (cl.getUseDoublePrecision())
cl.getPosq().upload(posqd);
else
cl.getPosq().upload(posqf);
sigmaEpsilon->upload(sigmaEpsilonVector);
bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic);
......@@ -1383,8 +1399,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
double reactionFieldK = pow(force.getCutoffDistance(), -3.0)*(force.getReactionFieldDielectric()-1.0)/(2.0*force.getReactionFieldDielectric()+1.0);
double reactionFieldC = (1.0 / force.getCutoffDistance())*(3.0*force.getReactionFieldDielectric())/(2.0*force.getReactionFieldDielectric()+1.0);
defines["REACTION_FIELD_K"] = doubleToString(reactionFieldK);
defines["REACTION_FIELD_C"] = doubleToString(reactionFieldC);
defines["REACTION_FIELD_K"] = cl.doubleToString(reactionFieldK);
defines["REACTION_FIELD_C"] = cl.doubleToString(reactionFieldC);
}
if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0)
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
......@@ -1396,23 +1412,24 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
int kmaxx, kmaxy, kmaxz;
NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
defines["EWALD_ALPHA"] = doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI));
defines["EWALD_ALPHA"] = cl.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
ewaldSelfEnergy = (cl.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
// Create the reciprocal space kernels.
map<string, string> replacements;
replacements["NUM_ATOMS"] = intToString(numParticles);
replacements["KMAX_X"] = intToString(kmaxx);
replacements["KMAX_Y"] = intToString(kmaxy);
replacements["KMAX_Z"] = intToString(kmaxz);
replacements["EXP_COEFFICIENT"] = doubleToString(-1.0/(4.0*alpha*alpha));
replacements["NUM_ATOMS"] = cl.intToString(numParticles);
replacements["KMAX_X"] = cl.intToString(kmaxx);
replacements["KMAX_Y"] = cl.intToString(kmaxy);
replacements["KMAX_Z"] = cl.intToString(kmaxz);
replacements["EXP_COEFFICIENT"] = cl.doubleToString(-1.0/(4.0*alpha*alpha));
cl::Program program = cl.createProgram(OpenCLKernelSources::ewald, replacements);
ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums");
ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces");
cosSinSums = OpenCLArray::create<mm_float2>(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), "cosSinSums");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2));
cosSinSums = new OpenCLArray(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
else if (force.getNonbondedMethod() == NonbondedForce::PME) {
// Compute the PME parameters.
......@@ -1422,30 +1439,31 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
gridSizeX = OpenCLFFT3D::findLegalDimension(gridSizeX);
gridSizeY = OpenCLFFT3D::findLegalDimension(gridSizeY);
gridSizeZ = OpenCLFFT3D::findLegalDimension(gridSizeZ);
defines["EWALD_ALPHA"] = doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI));
defines["EWALD_ALPHA"] = cl.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
ewaldSelfEnergy = (cl.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
pmeDefines["PME_ORDER"] = intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = intToString(numParticles);
pmeDefines["RECIP_EXP_FACTOR"] = doubleToString(M_PI*M_PI/(alpha*alpha));
pmeDefines["GRID_SIZE_X"] = intToString(gridSizeX);
pmeDefines["GRID_SIZE_Y"] = intToString(gridSizeY);
pmeDefines["GRID_SIZE_Z"] = intToString(gridSizeZ);
pmeDefines["EPSILON_FACTOR"] = doubleToString(sqrt(ONE_4PI_EPS0));
pmeDefines["PME_ORDER"] = cl.intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = cl.intToString(numParticles);
pmeDefines["RECIP_EXP_FACTOR"] = cl.doubleToString(M_PI*M_PI/(alpha*alpha));
pmeDefines["GRID_SIZE_X"] = cl.intToString(gridSizeX);
pmeDefines["GRID_SIZE_Y"] = cl.intToString(gridSizeY);
pmeDefines["GRID_SIZE_Z"] = cl.intToString(gridSizeZ);
pmeDefines["EPSILON_FACTOR"] = cl.doubleToString(sqrt(ONE_4PI_EPS0));
// Create required data structures.
pmeGrid = OpenCLArray::create<mm_float2>(cl, gridSizeX*gridSizeY*gridSizeZ, "pmeGrid");
cl.addAutoclearBuffer(pmeGrid->getDeviceBuffer(), pmeGrid->getSize()*2);
pmeGrid2 = OpenCLArray::create<mm_float2>(cl, gridSizeX*gridSizeY*gridSizeZ, "pmeGrid2");
pmeBsplineModuliX = OpenCLArray::create<cl_float>(cl, gridSizeX, "pmeBsplineModuliX");
pmeBsplineModuliY = OpenCLArray::create<cl_float>(cl, gridSizeY, "pmeBsplineModuliY");
pmeBsplineModuliZ = OpenCLArray::create<cl_float>(cl, gridSizeZ, "pmeBsplineModuliZ");
pmeBsplineTheta = OpenCLArray::create<mm_float4>(cl, PmeOrder*numParticles, "pmeBsplineTheta");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
pmeGrid = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid");
cl.addAutoclearBuffer(*pmeGrid);
pmeGrid2 = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid2");
pmeBsplineModuliX = new OpenCLArray(cl, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new OpenCLArray(cl, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new OpenCLArray(cl, gridSizeZ, elementSize, "pmeBsplineModuliZ");
pmeBsplineTheta = new OpenCLArray(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
if (deviceIsCpu)
pmeBsplineDTheta = OpenCLArray::create<mm_float4>(cl, PmeOrder*numParticles, "pmeBsplineDTheta");
pmeBsplineDTheta = new OpenCLArray(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineDTheta");
pmeAtomRange = OpenCLArray::create<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = OpenCLArray::create<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
sort = new OpenCLSort<SortTrait>(cl, cl.getNumAtoms());
......@@ -1487,7 +1505,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
for(int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
vector<cl_float> moduli(ndata);
vector<cl_double> moduli(ndata);
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
......@@ -1503,6 +1521,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if (moduli[i] < 1.0e-7)
moduli[i] = (moduli[i-1]+moduli[i+1])*0.5f;
}
if (cl.getUseDoublePrecision()) {
if (dim == 0)
pmeBsplineModuliX->upload(moduli);
else if (dim == 1)
......@@ -1510,6 +1529,18 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
else
pmeBsplineModuliZ->upload(moduli);
}
else {
vector<float> modulif(ndata);
for (int i = 0; i < ndata; i++)
modulif[i] = (float) moduli[i];
if (dim == 0)
pmeBsplineModuliX->upload(modulif);
else if (dim == 1)
pmeBsplineModuliY->upload(modulif);
else
pmeBsplineModuliZ->upload(modulif);
}
}
}
else
ewaldSelfEnergy = 0.0;
......@@ -1568,9 +1599,10 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeSpreadChargeKernel = cl::Kernel(program, "gridSpreadCharge");
pmeConvolutionKernel = cl::Kernel(program, "reciprocalConvolution");
pmeInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta->getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*sizeof(mm_float4), NULL);
pmeUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL);
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(3, pmeAtomGridIndex->getDeviceBuffer());
if (deviceIsCpu)
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(6, pmeBsplineDTheta->getDeviceBuffer());
......@@ -1591,7 +1623,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeConvolutionKernel.setArg<cl::Buffer>(2, pmeBsplineModuliX->getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(3, pmeBsplineModuliY->getDeviceBuffer());
pmeConvolutionKernel.setArg<cl::Buffer>(4, pmeBsplineModuliZ->getDeviceBuffer());
interpolateForceThreads = (cl.getDevice().getInfo<CL_DEVICE_LOCAL_MEM_SIZE>() > 2*128*PmeOrder*sizeof(mm_float4) ? 128 : 64);
interpolateForceThreads = (cl.getDevice().getInfo<CL_DEVICE_LOCAL_MEM_SIZE>() > 2*128*PmeOrder*elementSize ? 128 : 64);
pmeInterpolateForceKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(1, cl.getForceBuffers().getDeviceBuffer());
pmeInterpolateForceKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer());
......@@ -1600,7 +1632,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeInterpolateForceKernel.setArg<cl::Buffer>(6, pmeBsplineDTheta->getDeviceBuffer());
}
else
pmeInterpolateForceKernel.setArg(5, 2*interpolateForceThreads*PmeOrder*sizeof(mm_float4), NULL);
pmeInterpolateForceKernel.setArg(5, 2*interpolateForceThreads*PmeOrder*elementSize, NULL);
if (cl.getSupports64BitGlobalAtomics()) {
pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeFinishSpreadChargeKernel.setArg<cl::Buffer>(0, pmeGrid->getDeviceBuffer());
......@@ -1608,57 +1640,68 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
}
}
if (cosSinSums != NULL && cl.getContextIndex() == 0 && includeReciprocal) {
mm_float4 boxSize = cl.getPeriodicBoxSize();
mm_float4 recipBoxSize = mm_float4((float) (2*M_PI/boxSize.x), (float) (2*M_PI/boxSize.y), (float) (2*M_PI/boxSize.z), 0);
float recipCoefficient = (float) (ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z));
ewaldSumsKernel.setArg<mm_float4>(3, recipBoxSize);
ewaldSumsKernel.setArg<cl_float>(4, recipCoefficient);
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
mm_double4 recipBoxSize = mm_double4(2*M_PI/boxSize.x, 2*M_PI/boxSize.y, 2*M_PI/boxSize.z, 0.0);
double recipCoefficient = ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z);
if (cl.getUseDoublePrecision()) {
ewaldSumsKernel.setArg<mm_double4>(3, recipBoxSize);
ewaldSumsKernel.setArg<cl_double>(4, recipCoefficient);
ewaldForcesKernel.setArg<mm_double4>(3, recipBoxSize);
ewaldForcesKernel.setArg<cl_double>(4, recipCoefficient);
}
else {
ewaldSumsKernel.setArg<mm_float4>(3, mm_float4((float) recipBoxSize.x, (float) recipBoxSize.y, (float) recipBoxSize.z, 0));
ewaldSumsKernel.setArg<cl_float>(4, (cl_float) recipCoefficient);
ewaldForcesKernel.setArg<mm_float4>(3, mm_float4((float) recipBoxSize.x, (float) recipBoxSize.y, (float) recipBoxSize.z, 0));
ewaldForcesKernel.setArg<cl_float>(4, (cl_float) recipCoefficient);
}
cl.executeKernel(ewaldSumsKernel, cosSinSums->getSize());
ewaldForcesKernel.setArg<mm_float4>(3, recipBoxSize);
ewaldForcesKernel.setArg<cl_float>(4, recipCoefficient);
cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms());
}
if (pmeGrid != NULL && cl.getContextIndex() == 0 && includeReciprocal) {
mm_float4 boxSize = cl.getPeriodicBoxSize();
mm_float4 invBoxSize = cl.getInvPeriodicBoxSize();
pmeUpdateBsplinesKernel.setArg<mm_float4>(4, boxSize);
pmeUpdateBsplinesKernel.setArg<mm_float4>(5, invBoxSize);
setPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 4);
setInvPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 5);
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu) {
pmeSpreadChargeKernel.setArg<mm_float4>(5, boxSize);
pmeSpreadChargeKernel.setArg<mm_float4>(6, invBoxSize);
setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5);
setInvPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 6);
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
}
else {
sort->sort(*pmeAtomGridIndex);
pmeAtomRangeKernel.setArg<mm_float4>(3, boxSize);
pmeAtomRangeKernel.setArg<mm_float4>(4, invBoxSize);
setPeriodicBoxSizeArg(cl, pmeAtomRangeKernel, 3);
setInvPeriodicBoxSizeArg(cl, pmeAtomRangeKernel, 4);
cl.executeKernel(pmeAtomRangeKernel, cl.getNumAtoms());
if (cl.getSupports64BitGlobalAtomics()) {
pmeSpreadChargeKernel.setArg<mm_float4>(5, boxSize);
pmeSpreadChargeKernel.setArg<mm_float4>(6, invBoxSize);
setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5);
setInvPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 6);
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms(), PmeOrder*PmeOrder*PmeOrder);
cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize());
}
else {
pmeZIndexKernel.setArg<mm_float4>(2, boxSize);
pmeZIndexKernel.setArg<mm_float4>(3, invBoxSize);
setPeriodicBoxSizeArg(cl, pmeZIndexKernel, 2);
setInvPeriodicBoxSizeArg(cl, pmeZIndexKernel, 3);
cl.executeKernel(pmeZIndexKernel, cl.getNumAtoms());
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
}
}
fft->execFFT(*pmeGrid, *pmeGrid2, true);
pmeConvolutionKernel.setArg<mm_float4>(5, invBoxSize);
pmeConvolutionKernel.setArg<cl_float>(6, (float) (1.0/(M_PI*boxSize.x*boxSize.y*boxSize.z)));
setInvPeriodicBoxSizeArg(cl, pmeConvolutionKernel, 5);
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
double scaleFactor = 1.0/(M_PI*boxSize.x*boxSize.y*boxSize.z);
if (cl.getUseDoublePrecision())
pmeConvolutionKernel.setArg<cl_double>(6, scaleFactor);
else
pmeConvolutionKernel.setArg<cl_float>(6, (float) scaleFactor);
cl.executeKernel(pmeConvolutionKernel, cl.getNumAtoms());
fft->execFFT(*pmeGrid2, *pmeGrid, false);
pmeInterpolateForceKernel.setArg<mm_float4>(3, boxSize);
pmeInterpolateForceKernel.setArg<mm_float4>(4, invBoxSize);
setPeriodicBoxSizeArg(cl, pmeInterpolateForceKernel, 3);
setInvPeriodicBoxSizeArg(cl, pmeInterpolateForceKernel, 4);
cl.executeKernel(pmeInterpolateForceKernel, cl.getNumAtoms(), interpolateForceThreads);
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (dispersionCoefficient != 0.0 && includeDirect) {
mm_float4 boxSize = cl.getPeriodicBoxSize();
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
energy += dispersionCoefficient/(boxSize.x*boxSize.y*boxSize.z);
}
return energy;
......@@ -1697,8 +1740,9 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
// Record the per-particle parameters.
OpenCLArray& posq = cl.getPosq();
posq.download((mm_float4*) cl.getPinnedBuffer());
posq.download(cl.getPinnedBuffer());
mm_float4* posqf = (mm_float4*) cl.getPinnedBuffer();
mm_double4* posqd = (mm_double4*) cl.getPinnedBuffer();
vector<mm_float2> sigmaEpsilonVector(cl.getPaddedNumAtoms());
double sumSquaredCharges = 0.0;
const vector<cl_int>& order = cl.getAtomIndex();
......@@ -1706,6 +1750,9 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
int index = order[i];
double charge, sigma, epsilon;
force.getParticleParameters(index, charge, sigma, epsilon);
if (cl.getUseDoublePrecision())
posqd[i].w = charge;
else
posqf[i].w = (float) charge;
sigmaEpsilonVector[index] = mm_float2((float) (0.5*sigma), (float) (2.0*sqrt(epsilon)));
sumSquaredCharges += charge*charge;
......@@ -1782,7 +1829,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
string prefix = "custom"+intToString(forceIndex)+"_";
string prefix = "custom"+cl.intToString(forceIndex)+"_";
// Record parameters and exclusions.
......@@ -1819,11 +1866,11 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
vector<double> values;
double min, max;
force.getFunctionParameters(i, name, values, min, max);
string arrayName = prefix+"table"+intToString(i);
string arrayName = prefix+"table"+cl.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = &fp;
tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max);
vector<mm_float4> f = cl.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
tabulatedFunctions.push_back(OpenCLArray::create<mm_float4>(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer()));
......@@ -1866,18 +1913,18 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+intToString(i)+"]";
string value = "globals["+cl.intToString(i)+"]";
variables.push_back(makeVariable(name, prefix+value));
}
stringstream compute;
compute << OpenCLExpressionUtilities::createExpressions(forceExpressions, variables, functionDefinitions, prefix+"temp", prefix+"functionParams");
compute << cl.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, prefix+"temp", prefix+"functionParams");
map<string, string> replacements;
replacements["COMPUTE_FORCE"] = compute.str();
string source = cl.replaceStrings(OpenCLKernelSources::customNonbonded, replacements);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"params"+intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"params"+cl.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
if (globals != NULL) {
globals->upload(globalParamValues);
......@@ -1965,14 +2012,14 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
longBornSum = OpenCLArray::create<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornSum");
longBornForce = OpenCLArray::create<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornForce");
bornForce = OpenCLArray::create<cl_float>(cl, cl.getPaddedNumAtoms(), "bornForce");
cl.addAutoclearBuffer(longBornSum->getDeviceBuffer(), 2*longBornSum->getSize());
cl.addAutoclearBuffer(longBornForce->getDeviceBuffer(), 2*longBornForce->getSize());
cl.addAutoclearBuffer(*longBornSum);
cl.addAutoclearBuffer(*longBornForce);
}
else {
bornSum = OpenCLArray::create<cl_float>(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornSum");
bornForce = OpenCLArray::create<cl_float>(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornForce");
cl.addAutoclearBuffer(bornSum->getDeviceBuffer(), bornSum->getSize());
cl.addAutoclearBuffer(bornForce->getDeviceBuffer(), bornForce->getSize());
cl.addAutoclearBuffer(*bornSum);
cl.addAutoclearBuffer(*bornForce);
}
vector<mm_float4> posq(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
int numParticles = force.getNumParticles();
......@@ -2012,12 +2059,12 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
defines["USE_CUTOFF"] = "1";
if (nb.getUsePeriodic())
defines["USE_PERIODIC"] = "1";
defines["CUTOFF_SQUARED"] = doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance());
defines["PREFACTOR"] = doubleToString(prefactor);
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(cl.getNumAtomBlocks());
defines["FORCE_WORK_GROUP_SIZE"] = OpenCLExpressionUtilities::intToString(nb.getForceThreadBlockSize());
defines["CUTOFF_SQUARED"] = cl.doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance());
defines["PREFACTOR"] = cl.doubleToString(prefactor);
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = cl.intToString(cl.getNumAtomBlocks());
defines["FORCE_WORK_GROUP_SIZE"] = cl.intToString(nb.getForceThreadBlockSize());
string platformVendor = cl::Platform(cl.getDevice().getInfo<CL_DEVICE_PLATFORM>()).getInfo<CL_PLATFORM_VENDOR>();
if (platformVendor == "Apple")
defines["USE_APPLE_WORKAROUND"] = "1";
......@@ -2220,7 +2267,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
string prefix = "custom"+intToString(forceIndex)+"_";
string prefix = "custom"+cl.intToString(forceIndex)+"_";
// Record parameters and exclusions.
......@@ -2259,11 +2306,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
vector<double> values;
double min, max;
force.getFunctionParameters(i, name, values, min, max);
string arrayName = prefix+"table"+intToString(i);
string arrayName = prefix+"table"+cl.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = &fp;
tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max);
vector<mm_float4> f = cl.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
tabulatedFunctions.push_back(OpenCLArray::create<mm_float4>(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer()));
......@@ -2356,7 +2403,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+intToString(i)+"]";
string value = "globals["+cl.intToString(i)+"]";
variables.push_back(makeVariable(name, value));
}
map<string, Lepton::ParsedExpression> n2ValueExpressions;
......@@ -2364,7 +2411,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[0], functions).optimize();
n2ValueExpressions["tempValue1 = "] = ex;
n2ValueExpressions["tempValue2 = "] = ex.renameVariables(rename);
n2ValueSource << OpenCLExpressionUtilities::createExpressions(n2ValueExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
n2ValueSource << cl.getExpressionUtilities().createExpressions(n2ValueExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
map<string, string> replacements;
string n2ValueStr = n2ValueSource.str();
replacements["COMPUTE_VALUE"] = n2ValueStr;
......@@ -2374,7 +2421,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
pairValueUsesParam.resize(params->getBuffers().size(), false);
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1);
string paramName = "params"+cl.intToString(i+1);
if (n2ValueStr.find(paramName+"1") != n2ValueStr.npos || n2ValueStr.find(paramName+"2") != n2ValueStr.npos) {
extraArgs << ", __global const " << buffer.getType() << "* restrict global_" << paramName << ", __local " << buffer.getType() << "* restrict local_" << paramName;
loadLocal1 << "local_" << paramName << "[localAtomIndex] = " << paramName << "1;\n";
......@@ -2399,11 +2446,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (useExclusionsForValue)
defines["USE_EXCLUSIONS"] = "1";
if (cl.getSIMDWidth() == 32)
defines["WARPS_PER_GROUP"] = OpenCLExpressionUtilities::intToString(cl.getNonbondedUtilities().getForceThreadBlockSize()/OpenCLContext::TileSize);
defines["CUTOFF_SQUARED"] = doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(cl.getNumAtomBlocks());
defines["WARPS_PER_GROUP"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize()/OpenCLContext::TileSize);
defines["CUTOFF_SQUARED"] = cl.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = cl.intToString(cl.getNumAtomBlocks());
string file;
if (deviceIsCpu)
file = OpenCLKernelSources::customGBValueN2_cpu;
......@@ -2424,12 +2471,12 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs << ", __global const float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1);
string paramName = "params"+cl.intToString(i+1);
extraArgs << ", __global const " << buffer.getType() << "* restrict " << paramName;
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+intToString(i+1);
string valueName = "values"+cl.intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* restrict global_" << valueName;
reductionSource << buffer.getType() << " local_" << valueName << ";\n";
}
......@@ -2441,22 +2488,22 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]";
variables[force.getGlobalParameterName(i)] = "globals["+cl.intToString(i)+"]";
for (int i = 1; i < force.getNumComputedValues(); i++) {
variables[computedValueNames[i-1]] = "local_values"+computedValues->getParameterSuffix(i-1);
map<string, Lepton::ParsedExpression> valueExpressions;
valueExpressions["local_values"+computedValues->getParameterSuffix(i)+" = "] = Lepton::Parser::parse(computedValueExpressions[i], functions).optimize();
reductionSource << OpenCLExpressionUtilities::createExpressions(valueExpressions, variables, functionDefinitions, "value"+intToString(i)+"_temp", prefix+"functionParams");
reductionSource << cl.getExpressionUtilities().createExpressions(valueExpressions, variables, functionDefinitions, "value"+cl.intToString(i)+"_temp", prefix+"functionParams");
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
string valueName = "values"+intToString(i+1);
string valueName = "values"+cl.intToString(i+1);
reductionSource << "global_" << valueName << "[index] = local_" << valueName << ";\n";
}
map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["COMPUTE_VALUES"] = reductionSource.str();
map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customGBValuePerParticle, replacements), defines);
perParticleValueKernel = cl::Kernel(program, "computePerParticleValues");
}
......@@ -2478,7 +2525,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
variables.push_back(makeVariable(computedValueNames[i]+"2", "values"+computedValues->getParameterSuffix(i, "2")));
}
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables.push_back(makeVariable(force.getGlobalParameterName(i), "globals["+intToString(i)+"]"));
variables.push_back(makeVariable(force.getGlobalParameterName(i), "globals["+cl.intToString(i)+"]"));
stringstream n2EnergySource;
bool anyExclusions = (force.getNumExclusions() > 0);
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
......@@ -2494,23 +2541,23 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (useLong) {
for (int j = 0; j < force.getNumComputedValues(); j++) {
if (needChainForValue[j]) {
string index = intToString(j+1);
n2EnergyExpressions["/*"+intToString(i+1)+"*/ deriv"+index+"_1 += "] = energyDerivExpressions[i][2*j];
n2EnergyExpressions["/*"+intToString(i+1)+"*/ deriv"+index+"_2 += "] = energyDerivExpressions[i][2*j+1];
string index = cl.intToString(j+1);
n2EnergyExpressions["/*"+cl.intToString(i+1)+"*/ deriv"+index+"_1 += "] = energyDerivExpressions[i][2*j];
n2EnergyExpressions["/*"+cl.intToString(i+1)+"*/ deriv"+index+"_2 += "] = energyDerivExpressions[i][2*j+1];
}
}
}
else {
for (int j = 0; j < force.getNumComputedValues(); j++) {
if (needChainForValue[j]) {
n2EnergyExpressions["/*"+intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_1")+" += "] = energyDerivExpressions[i][2*j];
n2EnergyExpressions["/*"+intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_2")+" += "] = energyDerivExpressions[i][2*j+1];
n2EnergyExpressions["/*"+cl.intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_1")+" += "] = energyDerivExpressions[i][2*j];
n2EnergyExpressions["/*"+cl.intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_2")+" += "] = energyDerivExpressions[i][2*j+1];
}
}
}
if (exclude)
n2EnergySource << "if (!isExcluded) {\n";
n2EnergySource << OpenCLExpressionUtilities::createExpressions(n2EnergyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
n2EnergySource << cl.getExpressionUtilities().createExpressions(n2EnergyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
if (exclude)
n2EnergySource << "}\n";
}
......@@ -2523,7 +2570,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
pairEnergyUsesParam.resize(params->getBuffers().size(), false);
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1);
string paramName = "params"+cl.intToString(i+1);
if (n2EnergyStr.find(paramName+"1") != n2EnergyStr.npos || n2EnergyStr.find(paramName+"2") != n2EnergyStr.npos) {
extraArgs << ", __global const " << buffer.getType() << "* restrict global_" << paramName << ", __local " << buffer.getType() << "* restrict local_" << paramName;
loadLocal1 << "local_" << paramName << "[localAtomIndex] = " << paramName << "1;\n";
......@@ -2536,7 +2583,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
pairEnergyUsesValue.resize(computedValues->getBuffers().size(), false);
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+intToString(i+1);
string valueName = "values"+cl.intToString(i+1);
if (n2EnergyStr.find(valueName+"1") != n2EnergyStr.npos || n2EnergyStr.find(valueName+"2") != n2EnergyStr.npos) {
extraArgs << ", __global const " << buffer.getType() << "* restrict global_" << valueName << ", __local " << buffer.getType() << "* restrict local_" << valueName;
loadLocal1 << "local_" << valueName << "[localAtomIndex] = " << valueName << "1;\n";
......@@ -2549,7 +2596,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (useLong) {
extraArgs << ", __global long* restrict derivBuffers";
for (int i = 0; i < force.getNumComputedValues(); i++) {
string index = intToString(i+1);
string index = cl.intToString(i+1);
extraArgs << ", __local float* restrict local_deriv" << index;
clearLocal << "local_deriv" << index << "[localAtomIndex] = 0.0f;\n";
declare1 << "float deriv" << index << "_1 = 0.0f;\n";
......@@ -2564,7 +2611,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
else {
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = intToString(i+1);
string index = cl.intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* restrict derivBuffers" << index << ", __local " << buffer.getType() << "* restrict local_deriv" << index;
clearLocal << "local_deriv" << index << "[localAtomIndex] = 0.0f;\n";
declare1 << buffer.getType() << " deriv" << index << "_1 = 0.0f;\n";
......@@ -2598,11 +2645,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (anyExclusions)
defines["USE_EXCLUSIONS"] = "1";
if (cl.getSIMDWidth() == 32)
defines["WARPS_PER_GROUP"] = OpenCLExpressionUtilities::intToString(cl.getNonbondedUtilities().getForceThreadBlockSize()/OpenCLContext::TileSize);
defines["CUTOFF_SQUARED"] = doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(cl.getNumAtomBlocks());
defines["WARPS_PER_GROUP"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize()/OpenCLContext::TileSize);
defines["CUTOFF_SQUARED"] = cl.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = cl.intToString(cl.getNumAtomBlocks());
string file;
if (deviceIsCpu)
file = OpenCLKernelSources::customGBEnergyN2_cpu;
......@@ -2621,17 +2668,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs << ", __global const float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1);
string paramName = "params"+cl.intToString(i+1);
extraArgs << ", __global const " << buffer.getType() << "* restrict " << paramName;
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+intToString(i+1);
string valueName = "values"+cl.intToString(i+1);
extraArgs << ", __global const " << buffer.getType() << "* restrict " << valueName;
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = intToString(i+1);
string index = cl.intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* restrict derivBuffers" << index;
compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
}
......@@ -2639,11 +2686,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs << ", __global const long* restrict derivBuffersIn";
for (int i = 0; i < energyDerivs->getNumParameters(); ++i)
reduce << "derivBuffers" << energyDerivs->getParameterSuffix(i, "[index]") <<
" = (1.0f/0xFFFFFFFF)*derivBuffersIn[index+PADDED_NUM_ATOMS*" << intToString(i) << "];\n";
" = (1.0f/0xFFFFFFFF)*derivBuffersIn[index+PADDED_NUM_ATOMS*" << cl.intToString(i) << "];\n";
}
else {
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
reduce << "REDUCE_VALUE(derivBuffers" << intToString(i+1) << ", " << energyDerivs->getBuffers()[i].getType() << ")\n";
reduce << "REDUCE_VALUE(derivBuffers" << cl.intToString(i+1) << ", " << energyDerivs->getBuffers()[i].getType() << ")\n";
}
// Compute the various expressions.
......@@ -2655,7 +2702,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]";
variables[force.getGlobalParameterName(i)] = "globals["+cl.intToString(i)+"]";
for (int i = 0; i < force.getNumComputedValues(); i++)
variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]");
map<string, Lepton::ParsedExpression> expressions;
......@@ -2666,23 +2713,23 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (type != CustomGBForce::SingleParticle)
continue;
Lepton::ParsedExpression parsed = Lepton::Parser::parse(expression, functions).optimize();
expressions["/*"+intToString(i+1)+"*/ energy += "] = parsed;
expressions["/*"+cl.intToString(i+1)+"*/ energy += "] = parsed;
for (int j = 0; j < force.getNumComputedValues(); j++)
expressions["/*"+intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j)+" += "] = energyDerivExpressions[i][j];
expressions["/*"+cl.intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j)+" += "] = energyDerivExpressions[i][j];
Lepton::ParsedExpression gradx = parsed.differentiate("x").optimize();
Lepton::ParsedExpression grady = parsed.differentiate("y").optimize();
Lepton::ParsedExpression gradz = parsed.differentiate("z").optimize();
if (!isZeroExpression(gradx))
expressions["/*"+intToString(i+1)+"*/ force.x -= "] = gradx;
expressions["/*"+cl.intToString(i+1)+"*/ force.x -= "] = gradx;
if (!isZeroExpression(grady))
expressions["/*"+intToString(i+1)+"*/ force.y -= "] = grady;
expressions["/*"+cl.intToString(i+1)+"*/ force.y -= "] = grady;
if (!isZeroExpression(gradz))
expressions["/*"+intToString(i+1)+"*/ force.z -= "] = gradz;
expressions["/*"+cl.intToString(i+1)+"*/ force.z -= "] = gradz;
}
for (int i = 1; i < force.getNumComputedValues(); i++)
for (int j = 0; j < i; j++)
expressions["float dV"+intToString(i)+"dV"+intToString(j)+" = "] = valueDerivExpressions[i][j];
compute << OpenCLExpressionUtilities::createExpressions(expressions, variables, functionDefinitions, "temp", prefix+"functionParams");
expressions["float dV"+cl.intToString(i)+"dV"+cl.intToString(j)+" = "] = valueDerivExpressions[i][j];
compute << cl.getExpressionUtilities().createExpressions(expressions, variables, functionDefinitions, "temp", prefix+"functionParams");
// Record values.
......@@ -2695,7 +2742,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
compute << "deriv"<<(i+1)<<" *= totalDeriv"<<i<<";\n";
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
string index = intToString(i+1);
string index = cl.intToString(i+1);
compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n";
}
map<string, string> replacements;
......@@ -2703,8 +2750,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
replacements["REDUCE_DERIVATIVES"] = reduce.str();
replacements["COMPUTE_ENERGY"] = compute.str();
map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customGBEnergyPerParticle, replacements), defines);
perParticleEnergyKernel = cl::Kernel(program, "computePerParticleEnergy");
}
......@@ -2716,17 +2763,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
extraArgs << ", __global const float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1);
string paramName = "params"+cl.intToString(i+1);
extraArgs << ", __global const " << buffer.getType() << "* restrict " << paramName;
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+intToString(i+1);
string valueName = "values"+cl.intToString(i+1);
extraArgs << ", __global const " << buffer.getType() << "* restrict " << valueName;
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = intToString(i+1);
string index = cl.intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* restrict derivBuffers" << index;
compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
}
......@@ -2737,18 +2784,18 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]";
variables[force.getGlobalParameterName(i)] = "globals["+cl.intToString(i)+"]";
for (int i = 0; i < force.getNumComputedValues(); i++)
variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]");
for (int i = 1; i < force.getNumComputedValues(); i++) {
string is = intToString(i);
string is = cl.intToString(i);
compute << "float4 dV"<<is<<"dR = (float4) 0;\n";
for (int j = 1; j < i; j++) {
if (!isZeroExpression(valueDerivExpressions[i][j])) {
map<string, Lepton::ParsedExpression> derivExpressions;
string js = intToString(j);
string js = cl.intToString(j);
derivExpressions["float dV"+is+"dV"+js+" = "] = valueDerivExpressions[i][j];
compute << OpenCLExpressionUtilities::createExpressions(derivExpressions, variables, functionDefinitions, "temp_"+is+"_"+js, prefix+"functionParams");
compute << cl.getExpressionUtilities().createExpressions(derivExpressions, variables, functionDefinitions, "temp_"+is+"_"+js, prefix+"functionParams");
compute << "dV"<<is<<"dR += dV"<<is<<"dV"<<js<<"*dV"<<js<<"dR;\n";
}
}
......@@ -2759,17 +2806,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
gradientExpressions["dV"+is+"dR.y += "] = valueGradientExpressions[i][1];
if (!isZeroExpression(valueGradientExpressions[i][2]))
gradientExpressions["dV"+is+"dR.z += "] = valueGradientExpressions[i][2];
compute << OpenCLExpressionUtilities::createExpressions(gradientExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
compute << cl.getExpressionUtilities().createExpressions(gradientExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
}
for (int i = 1; i < force.getNumComputedValues(); i++) {
string is = intToString(i);
string is = cl.intToString(i);
compute << "force -= deriv"<<energyDerivs->getParameterSuffix(i)<<"*dV"<<is<<"dR;\n";
}
map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["COMPUTE_FORCES"] = compute.str();
map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customGBGradientChainRule, replacements), defines);
gradientChainRuleKernel = cl::Kernel(program, "computeGradientChainRuleTerms");
}
......@@ -2779,7 +2826,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
vector<pair<ExpressionTreeNode, string> > globalVariables;
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+intToString(i)+"]";
string value = "globals["+cl.intToString(i)+"]";
globalVariables.push_back(makeVariable(name, prefix+value));
}
vector<pair<ExpressionTreeNode, string> > variables = globalVariables;
......@@ -2800,7 +2847,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
Lepton::ParsedExpression dVdR = Lepton::Parser::parse(computedValueExpressions[0], functions).differentiate("r").optimize();
derivExpressions["float dV0dR1 = "] = dVdR;
derivExpressions["float dV0dR2 = "] = dVdR.renameVariables(rename);
chainSource << OpenCLExpressionUtilities::createExpressions(derivExpressions, variables, functionDefinitions, prefix+"temp0_", prefix+"functionParams");
chainSource << cl.getExpressionUtilities().createExpressions(derivExpressions, variables, functionDefinitions, prefix+"temp0_", prefix+"functionParams");
if (needChainForValue[0]) {
if (useExclusionsForValue)
chainSource << "if (!isExcluded) {\n";
......@@ -2823,20 +2870,20 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
vector<OpenCLNonbondedUtilities::ParameterInfo> arguments;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = prefix+"params"+intToString(i+1);
string paramName = prefix+"params"+cl.intToString(i+1);
if (chainStr.find(paramName+"1") != chainStr.npos || chainStr.find(paramName+"2") != chainStr.npos)
parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string paramName = prefix+"values"+intToString(i+1);
string paramName = prefix+"values"+cl.intToString(i+1);
if (chainStr.find(paramName+"1") != chainStr.npos || chainStr.find(paramName+"2") != chainStr.npos)
parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
if (needChainForValue[i]) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string paramName = prefix+"dEdV"+intToString(i+1);
string paramName = prefix+"dEdV"+cl.intToString(i+1);
parameters.push_back(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
}
......@@ -2852,11 +2899,11 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
cl.addForce(new OpenCLCustomGBForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
if (useLong)
cl.addAutoclearBuffer(longEnergyDerivs->getDeviceBuffer(), 2*longEnergyDerivs->getSize());
cl.addAutoclearBuffer(*longEnergyDerivs);
else {
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
cl.addAutoclearBuffer(buffer.getMemory(), buffer.getSize()*energyDerivs->getNumObjects()/sizeof(cl_float));
cl.addAutoclearBuffer(buffer.getMemory(), buffer.getSize()*energyDerivs->getNumObjects());
}
}
}
......@@ -2870,12 +2917,12 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
bool useLong = (cl.getSupports64BitGlobalAtomics() && !deviceIsCpu);
if (useLong) {
longValueBuffers = OpenCLArray::create<cl_long>(cl, cl.getPaddedNumAtoms(), "customGBLongValueBuffers");
cl.addAutoclearBuffer(longValueBuffers->getDeviceBuffer(), 2*longValueBuffers->getSize());
cl.clearBuffer(longValueBuffers->getDeviceBuffer(), 2*longValueBuffers->getSize());
cl.addAutoclearBuffer(*longValueBuffers);
cl.clearBuffer(*longValueBuffers);
}
else {
valueBuffers = OpenCLArray::create<cl_float>(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), "customGBValueBuffers");
cl.addAutoclearBuffer(valueBuffers->getDeviceBuffer(), valueBuffers->getSize());
cl.addAutoclearBuffer(*valueBuffers);
cl.clearBuffer(*valueBuffers);
}
int index = 0;
......@@ -3151,9 +3198,9 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
Lepton::ParsedExpression forceExpressionZ = energyExpression.differentiate("z").optimize();
map<string, Lepton::ParsedExpression> expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdX = "] = forceExpressionX;
expressions["float dEdY = "] = forceExpressionY;
expressions["float dEdZ = "] = forceExpressionZ;
expressions["real dEdX = "] = forceExpressionX;
expressions["real dEdY = "] = forceExpressionY;
expressions["real dEdZ = "] = forceExpressionZ;
// Create the kernels.
......@@ -3171,7 +3218,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+intToString(i)+"]";
string value = argName+"["+cl.intToString(i)+"]";
variables[name] = value;
}
}
......@@ -3182,7 +3229,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
compute<<buffer.getType()<<" particleParams"<<(i+1)<<" = "<<argName<<"[index];\n";
}
vector<pair<string, string> > functions;
compute << OpenCLExpressionUtilities::createExpressions(expressions, variables, functions, "temp", "");
compute << cl.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
map<string, string> replacements;
replacements["COMPUTE_FORCE"] = compute.str();
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::customExternalForce, replacements), force.getForceGroup());
......@@ -3455,11 +3502,11 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
vector<double> values;
double min, max;
force.getFunctionParameters(i, name, values, min, max);
string arrayName = "table"+intToString(i);
string arrayName = "table"+cl.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = &fp;
tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max);
vector<mm_float4> f = cl.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
tabulatedFunctions.push_back(OpenCLArray::create<mm_float4>(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
tableArgs << ", __global const float4* restrict " << arrayName;
......@@ -3491,7 +3538,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
variables[name] = "globals["+intToString(i)+"]";
variables[name] = "globals["+cl.intToString(i)+"]";
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
......@@ -3512,12 +3559,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
const vector<int>& atoms = iter->second;
string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
if (computedDeltas.count(deltaName) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n");
computedDeltas.insert(deltaName);
}
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float r_"+deltaName+" = sqrt(delta"+deltaName+".w);\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real r_"+deltaName+" = SQRT(delta"+deltaName+".w);\n");
variables[iter->first] = "r_"+deltaName;
forceExpressions["float dEdDistance"+intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
forceExpressions["real dEdDistance"+cl.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
......@@ -3526,16 +3573,16 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]];
if (computedDeltas.count(deltaName1) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[0]]+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[0]]+");\n");
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[2]]+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[1]]+", "+atomNamesLower[atoms[2]]+");\n");
computedDeltas.insert(deltaName2);
}
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float "+angleName+" = computeAngle(delta"+deltaName1+", delta"+deltaName2+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real "+angleName+" = computeAngle(delta"+deltaName1+", delta"+deltaName2+");\n");
variables[iter->first] = angleName;
forceExpressions["float dEdAngle"+intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
forceExpressions["real dEdAngle"+cl.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
......@@ -3547,23 +3594,23 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]];
if (computedDeltas.count(deltaName1) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName1+" = delta("+atomNamesLower[atoms[0]]+", "+atomNamesLower[atoms[1]]+");\n");
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[1]]+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName2+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[1]]+");\n");
computedDeltas.insert(deltaName2);
}
if (computedDeltas.count(deltaName3) == 0) {
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 delta"+deltaName3+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[3]]+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 delta"+deltaName3+" = delta("+atomNamesLower[atoms[2]]+", "+atomNamesLower[atoms[3]]+");\n");
computedDeltas.insert(deltaName3);
}
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 "+crossName1+" = computeCross(delta"+deltaName1+", delta"+deltaName2+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 "+crossName2+" = computeCross(delta"+deltaName2+", delta"+deltaName3+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float "+dihedralName+" = computeAngle("+crossName1+", "+crossName2+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 "+crossName1+" = computeCross(delta"+deltaName1+", delta"+deltaName2+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 "+crossName2+" = computeCross(delta"+deltaName2+", delta"+deltaName3+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real "+dihedralName+" = computeAngle("+crossName1+", "+crossName2+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, dihedralName+" *= (delta"+deltaName1+".x*"+crossName2+".x + delta"+deltaName1+".y*"+crossName2+".y + delta"+deltaName1+".z*"+crossName2+".z < 0 ? -1 : 1);\n");
variables[iter->first] = dihedralName;
forceExpressions["float dEdDihedral"+intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
forceExpressions["real dEdDihedral"+cl.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
// Next it needs to load parameters from global memory.
......@@ -3573,19 +3620,19 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
extraArgs << ", __global const "+buffer.getType()+"* restrict donor"+buffer.getName();
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+intToString(i+1)+" = donor"+buffer.getName()+"[index];\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" donorParams"+cl.intToString(i+1)+" = donor"+buffer.getName()+"[index];\n");
}
for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
extraArgs << ", __global const "+buffer.getType()+"* restrict acceptor"+buffer.getName();
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+intToString(i+1)+" = acceptor"+buffer.getName()+"[index];\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, buffer.getType()+" acceptorParams"+cl.intToString(i+1)+" = acceptor"+buffer.getName()+"[index];\n");
}
// Now evaluate the expressions.
computeAcceptor << OpenCLExpressionUtilities::createExpressions(forceExpressions, variables, functionDefinitions, "temp", "functionParams");
computeAcceptor << cl.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, "temp", "functionParams");
forceExpressions["energy += "] = energyExpression;
computeDonor << OpenCLExpressionUtilities::createExpressions(forceExpressions, variables, functionDefinitions, "temp", "functionParams");
computeDonor << cl.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, "temp", "functionParams");
// Finally, apply forces to atoms.
......@@ -3593,7 +3640,7 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
const vector<int>& atoms = iter->second;
string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
string value = "(dEdDistance"+intToString(index)+"/r_"+deltaName+")*delta"+deltaName+".xyz";
string value = "(dEdDistance"+cl.intToString(index)+"/r_"+deltaName+")*delta"+deltaName+".xyz";
applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "-"+value);
applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], value);
}
......@@ -3603,11 +3650,11 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string deltaName1 = atomNames[atoms[1]]+atomNames[atoms[0]];
string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "{\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 crossProd = cross(delta"+deltaName2+", delta"+deltaName1+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float lengthCross = max(length(crossProd), 1e-6f);\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 deltaCross0 = -cross(delta"+deltaName1+", crossProd)*dEdAngle"+intToString(index)+"/(delta"+deltaName1+".w*lengthCross);\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 deltaCross2 = cross(delta"+deltaName2+", crossProd)*dEdAngle"+intToString(index)+"/(delta"+deltaName2+".w*lengthCross);\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 deltaCross1 = -(deltaCross0+deltaCross2);\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 crossProd = cross(delta"+deltaName2+", delta"+deltaName1+");\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real lengthCross = max(length(crossProd), (real) 1e-6f);\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 deltaCross0 = -cross(delta"+deltaName1+", crossProd)*dEdAngle"+cl.intToString(index)+"/(delta"+deltaName1+".w*lengthCross);\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 deltaCross2 = cross(delta"+deltaName2+", crossProd)*dEdAngle"+cl.intToString(index)+"/(delta"+deltaName2+".w*lengthCross);\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 deltaCross1 = -(deltaCross0+deltaCross2);\n");
applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "deltaCross0.xyz");
applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], "deltaCross1.xyz");
applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[2], "deltaCross2.xyz");
......@@ -3622,15 +3669,15 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "{\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float r = sqrt(delta"+deltaName2+".w);\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 ff;\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.x = (-dEdDihedral"+intToString(index)+"*r)/"+crossName1+".w;\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real r = SQRT(delta"+deltaName2+".w);\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 ff;\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.x = (-dEdDihedral"+cl.intToString(index)+"*r)/"+crossName1+".w;\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.y = (delta"+deltaName1+".x*delta"+deltaName2+".x + delta"+deltaName1+".y*delta"+deltaName2+".y + delta"+deltaName1+".z*delta"+deltaName2+".z)/delta"+deltaName2+".w;\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.z = (delta"+deltaName3+".x*delta"+deltaName2+".x + delta"+deltaName3+".y*delta"+deltaName2+".y + delta"+deltaName3+".z*delta"+deltaName2+".z)/delta"+deltaName2+".w;\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.w = (dEdDihedral"+intToString(index)+"*r)/"+crossName2+".w;\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 internalF0 = ff.x*"+crossName1+";\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 internalF3 = ff.w*"+crossName2+";\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "float4 s = ff.y*internalF0 - ff.z*internalF3;\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "ff.w = (dEdDihedral"+cl.intToString(index)+"*r)/"+crossName2+".w;\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 internalF0 = ff.x*"+crossName1+";\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 internalF3 = ff.w*"+crossName2+";\n");
addDonorAndAcceptorCode(computeDonor, computeAcceptor, "real4 s = ff.y*internalF0 - ff.z*internalF3;\n");
applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[0], "internalF0.xyz");
applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[1], "s.xyz-internalF0.xyz");
applyDonorAndAcceptorForces(computeDonor, computeAcceptor, atoms[2], "-s.xyz-internalF3.xyz");
......@@ -3645,13 +3692,13 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
replacements["COMPUTE_ACCEPTOR_FORCE"] = computeAcceptor.str();
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
map<string, string> defines;
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
defines["NUM_DONORS"] = intToString(numDonors);
defines["NUM_ACCEPTORS"] = intToString(numAcceptors);
defines["PI"] = doubleToString(M_PI);
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
defines["NUM_DONORS"] = cl.intToString(numDonors);
defines["NUM_ACCEPTORS"] = cl.intToString(numAcceptors);
defines["PI"] = cl.doubleToString(M_PI);
if (force.getNonbondedMethod() != CustomHbondForce::NoCutoff) {
defines["USE_CUTOFF"] = "1";
defines["CUTOFF_SQUARED"] = doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["CUTOFF_SQUARED"] = cl.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
}
if (force.getNonbondedMethod() != CustomHbondForce::NoCutoff && force.getNonbondedMethod() != CustomHbondForce::CutoffNonPeriodic)
defines["USE_PERIODIC"] = "1";
......@@ -3729,11 +3776,11 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
acceptorKernel.setArg<cl::Buffer>(index++, tabulatedFunctionParams->getDeviceBuffer());
}
}
donorKernel.setArg<mm_float4>(8, cl.getPeriodicBoxSize());
donorKernel.setArg<mm_float4>(9, cl.getInvPeriodicBoxSize());
setPeriodicBoxSizeArg(cl, donorKernel, 8);
setInvPeriodicBoxSizeArg(cl, donorKernel, 9);
cl.executeKernel(donorKernel, max(numDonors, numAcceptors));
acceptorKernel.setArg<mm_float4>(8, cl.getPeriodicBoxSize());
acceptorKernel.setArg<mm_float4>(9, cl.getInvPeriodicBoxSize());
setPeriodicBoxSizeArg(cl, acceptorKernel, 8);
setInvPeriodicBoxSizeArg(cl, acceptorKernel, 9);
cl.executeKernel(acceptorKernel, max(numDonors, numAcceptors));
return 0.0;
}
......@@ -3848,7 +3895,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
force.getFunctionParameters(i, name, values, min, max);
functions[name] = &fp;
tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max);
vector<mm_float4> f = cl.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
OpenCLArray* array = OpenCLArray::create<mm_float4>(cl, values.size()-1, "TabulatedFunction");
tabulatedFunctions.push_back(array);
array->upload(f);
......@@ -3872,7 +3919,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
}
map<string, string> variables;
for (int i = 0; i < particlesPerBond; i++) {
string index = intToString(i+1);
string index = cl.intToString(i+1);
variables["x"+index] = "pos"+index+".x";
variables["y"+index] = "pos"+index+".y";
variables["z"+index] = "pos"+index+".z";
......@@ -3887,7 +3934,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+intToString(i)+"]";
string value = argName+"["+cl.intToString(i)+"]";
variables[name] = value;
}
}
......@@ -3903,7 +3950,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
set<string> computedDeltas;
vector<string> atomNames, posNames;
for (int i = 0; i < particlesPerBond; i++) {
string index = intToString(i+1);
string index = cl.intToString(i+1);
atomNames.push_back("P"+index);
posNames.push_back("pos"+index);
}
......@@ -3913,12 +3960,12 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
const vector<int>& atoms = iter->second;
string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
if (computedDeltas.count(deltaName) == 0) {
compute<<"float4 delta"<<deltaName<<" = ccb_delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<");\n";
compute<<"real4 delta"<<deltaName<<" = ccb_delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<");\n";
computedDeltas.insert(deltaName);
}
compute<<"float r_"<<deltaName<<" = sqrt(delta"<<deltaName<<".w);\n";
compute<<"real r_"<<deltaName<<" = sqrt(delta"<<deltaName<<".w);\n";
variables[iter->first] = "r_"+deltaName;
forceExpressions["float dEdDistance"+intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
forceExpressions["real dEdDistance"+cl.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
......@@ -3927,16 +3974,16 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"float4 delta"<<deltaName1<<" = ccb_delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[0]]<<");\n";
compute<<"real4 delta"<<deltaName1<<" = ccb_delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[0]]<<");\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"float4 delta"<<deltaName2<<" = ccb_delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[2]]<<");\n";
compute<<"real4 delta"<<deltaName2<<" = ccb_delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[2]]<<");\n";
computedDeltas.insert(deltaName2);
}
compute<<"float "<<angleName<<" = ccb_computeAngle(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
compute<<"real "<<angleName<<" = ccb_computeAngle(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
variables[iter->first] = angleName;
forceExpressions["float dEdAngle"+intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
forceExpressions["real dEdAngle"+cl.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
......@@ -3948,23 +3995,23 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"float4 delta"<<deltaName1<<" = ccb_delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<");\n";
compute<<"real4 delta"<<deltaName1<<" = ccb_delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<");\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"float4 delta"<<deltaName2<<" = ccb_delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[1]]<<");\n";
compute<<"real4 delta"<<deltaName2<<" = ccb_delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[1]]<<");\n";
computedDeltas.insert(deltaName2);
}
if (computedDeltas.count(deltaName3) == 0) {
compute<<"float4 delta"<<deltaName3<<" = ccb_delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[3]]<<");\n";
compute<<"real4 delta"<<deltaName3<<" = ccb_delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[3]]<<");\n";
computedDeltas.insert(deltaName3);
}
compute<<"float4 "<<crossName1<<" = ccb_computeCross(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
compute<<"float4 "<<crossName2<<" = ccb_computeCross(delta"<<deltaName2<<", delta"<<deltaName3<<");\n";
compute<<"float "<<dihedralName<<" = ccb_computeAngle("<<crossName1<<", "<<crossName2<<");\n";
compute<<"real4 "<<crossName1<<" = ccb_computeCross(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
compute<<"real4 "<<crossName2<<" = ccb_computeCross(delta"<<deltaName2<<", delta"<<deltaName3<<");\n";
compute<<"real "<<dihedralName<<" = ccb_computeAngle("<<crossName1<<", "<<crossName2<<");\n";
compute<<dihedralName<<" *= (delta"<<deltaName1<<".x*"<<crossName2<<".x + delta"<<deltaName1<<".y*"<<crossName2<<".y + delta"<<deltaName1<<".z*"<<crossName2<<".z < 0 ? -1 : 1);\n";
variables[iter->first] = dihedralName;
forceExpressions["float dEdDihedral"+intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
forceExpressions["real dEdDihedral"+cl.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
// Now evaluate the expressions.
......@@ -3975,16 +4022,16 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
compute<<buffer.getType()<<" bondParams"<<(i+1)<<" = "<<argName<<"[index];\n";
}
forceExpressions["energy += "] = energyExpression;
compute << OpenCLExpressionUtilities::createExpressions(forceExpressions, variables, functionDefinitions, "temp", functionParamsName);
compute << cl.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, "temp", functionParamsName);
// Finally, apply forces to atoms.
vector<string> forceNames;
for (int i = 0; i < particlesPerBond; i++) {
string istr = intToString(i+1);
string istr = cl.intToString(i+1);
string forceName = "force"+istr;
forceNames.push_back(forceName);
compute<<"float4 "<<forceName<<" = (float4) (0.0f, 0.0f, 0.0f, 0.0f);\n";
compute<<"real4 "<<forceName<<" = (real4) 0;\n";
compute<<"{\n";
Lepton::ParsedExpression forceExpressionX = energyExpression.differentiate("x"+istr).optimize();
Lepton::ParsedExpression forceExpressionY = energyExpression.differentiate("y"+istr).optimize();
......@@ -3997,14 +4044,14 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
if (!isZeroExpression(forceExpressionZ))
expressions[forceName+".z -= "] = forceExpressionZ;
if (expressions.size() > 0)
compute<<OpenCLExpressionUtilities::createExpressions(expressions, variables, functionDefinitions, "coordtemp", functionParamsName);
compute<<cl.getExpressionUtilities().createExpressions(expressions, variables, functionDefinitions, "coordtemp", functionParamsName);
compute<<"}\n";
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
const vector<int>& atoms = iter->second;
string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
string value = "(dEdDistance"+intToString(index)+"/r_"+deltaName+")*delta"+deltaName+".xyz";
string value = "(dEdDistance"+cl.intToString(index)+"/r_"+deltaName+")*delta"+deltaName+".xyz";
compute<<forceNames[atoms[0]]<<".xyz += "<<"-"<<value<<";\n";
compute<<forceNames[atoms[1]]<<".xyz += "<<value<<";\n";
}
......@@ -4014,11 +4061,11 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string deltaName1 = atomNames[atoms[1]]+atomNames[atoms[0]];
string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
compute<<"{\n";
compute<<"float4 crossProd = cross(delta"<<deltaName2<<", delta"<<deltaName1<<");\n";
compute<<"float lengthCross = max(length(crossProd), 1e-6f);\n";
compute<<"float4 deltaCross0 = -cross(delta"<<deltaName1<<", crossProd)*dEdAngle"<<intToString(index)<<"/(delta"<<deltaName1<<".w*lengthCross);\n";
compute<<"float4 deltaCross2 = cross(delta"<<deltaName2<<", crossProd)*dEdAngle"<<intToString(index)<<"/(delta"<<deltaName2<<".w*lengthCross);\n";
compute<<"float4 deltaCross1 = -(deltaCross0+deltaCross2);\n";
compute<<"real4 crossProd = cross(delta"<<deltaName2<<", delta"<<deltaName1<<");\n";
compute<<"real lengthCross = max(length(crossProd), (real) 1e-6f);\n";
compute<<"real4 deltaCross0 = -cross(delta"<<deltaName1<<", crossProd)*dEdAngle"<<cl.intToString(index)<<"/(delta"<<deltaName1<<".w*lengthCross);\n";
compute<<"real4 deltaCross2 = cross(delta"<<deltaName2<<", crossProd)*dEdAngle"<<cl.intToString(index)<<"/(delta"<<deltaName2<<".w*lengthCross);\n";
compute<<"real4 deltaCross1 = -(deltaCross0+deltaCross2);\n";
compute<<forceNames[atoms[0]]<<".xyz += deltaCross0.xyz;\n";
compute<<forceNames[atoms[1]]<<".xyz += deltaCross1.xyz;\n";
compute<<forceNames[atoms[2]]<<".xyz += deltaCross2.xyz;\n";
......@@ -4033,15 +4080,15 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
compute<<"{\n";
compute<<"float r = sqrt(delta"<<deltaName2<<".w);\n";
compute<<"float4 ff;\n";
compute<<"ff.x = (-dEdDihedral"<<intToString(index)<<"*r)/"<<crossName1<<".w;\n";
compute<<"real r = SQRT(delta"<<deltaName2<<".w);\n";
compute<<"real4 ff;\n";
compute<<"ff.x = (-dEdDihedral"<<cl.intToString(index)<<"*r)/"<<crossName1<<".w;\n";
compute<<"ff.y = (delta"<<deltaName1<<".x*delta"<<deltaName2<<".x + delta"<<deltaName1<<".y*delta"<<deltaName2<<".y + delta"<<deltaName1<<".z*delta"<<deltaName2<<".z)/delta"<<deltaName2<<".w;\n";
compute<<"ff.z = (delta"<<deltaName3<<".x*delta"<<deltaName2<<".x + delta"<<deltaName3<<".y*delta"<<deltaName2<<".y + delta"<<deltaName3<<".z*delta"<<deltaName2<<".z)/delta"<<deltaName2<<".w;\n";
compute<<"ff.w = (dEdDihedral"<<intToString(index)<<"*r)/"<<crossName2<<".w;\n";
compute<<"float4 internalF0 = ff.x*"<<crossName1<<";\n";
compute<<"float4 internalF3 = ff.w*"<<crossName2<<";\n";
compute<<"float4 s = ff.y*internalF0 - ff.z*internalF3;\n";
compute<<"ff.w = (dEdDihedral"<<cl.intToString(index)<<"*r)/"<<crossName2<<".w;\n";
compute<<"real4 internalF0 = ff.x*"<<crossName1<<";\n";
compute<<"real4 internalF3 = ff.w*"<<crossName2<<";\n";
compute<<"real4 s = ff.y*internalF0 - ff.z*internalF3;\n";
compute<<forceNames[atoms[0]]<<".xyz += internalF0.xyz;\n";
compute<<forceNames[atoms[1]]<<".xyz += s.xyz-internalF0.xyz;\n";
compute<<forceNames[atoms[2]]<<".xyz += -s.xyz-internalF3.xyz;\n";
......@@ -4050,7 +4097,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
}
cl.getBondedUtilities().addInteraction(atoms, compute.str(), force.getForceGroup());
map<string, string> replacements;
replacements["M_PI"] = doubleToString(M_PI);
replacements["M_PI"] = cl.doubleToString(M_PI);
cl.getBondedUtilities().addPrefixCode(cl.replaceStrings(OpenCLKernelSources::customCompoundBond, replacements));;
}
......@@ -4173,8 +4220,8 @@ void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const L
cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
cl::Program program = cl.createProgram(OpenCLKernelSources::langevin, defines, "");
kernel1 = cl::Kernel(program, "integrateLangevinPart1");
kernel2 = cl::Kernel(program, "integrateLangevinPart2");
......@@ -4266,7 +4313,7 @@ void OpenCLIntegrateBrownianStepKernel::initialize(const System& system, const B
cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
cl::Program program = cl.createProgram(OpenCLKernelSources::brownian, defines, "");
kernel1 = cl::Kernel(program, "integrateBrownianPart1");
kernel2 = cl::Kernel(program, "integrateBrownianPart2");
......@@ -4437,8 +4484,8 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system,
cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
cl::Program program = cl.createProgram(OpenCLKernelSources::langevin, defines, "");
kernel1 = cl::Kernel(program, "integrateLangevinPart1");
kernel2 = cl::Kernel(program, "integrateLangevinPart2");
......@@ -4635,10 +4682,10 @@ string OpenCLIntegrateCustomStepKernel::createGlobalComputation(const string& va
else {
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
if (variable == integrator.getGlobalVariableName(i))
expressions["globals["+intToString(i)+"] = "] = expr;
expressions["globals["+cl.intToString(i)+"] = "] = expr;
for (int i = 0; i < (int) parameterNames.size(); i++)
if (variable == parameterNames[i]) {
expressions["params["+intToString(i)+"] = "] = expr;
expressions["params["+cl.intToString(i)+"] = "] = expr;
modifiesParameters = true;
}
}
......@@ -4650,11 +4697,11 @@ string OpenCLIntegrateCustomStepKernel::createGlobalComputation(const string& va
variables["gaussian"] = "gaussian";
variables[energyName] = "energy[0]";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+intToString(i)+"]";
variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(i)+"]";
for (int i = 0; i < (int) parameterNames.size(); i++)
variables[parameterNames[i]] = "params["+intToString(i)+"]";
variables[parameterNames[i]] = "params["+cl.intToString(i)+"]";
vector<pair<string, string> > functions;
return OpenCLExpressionUtilities::createExpressions(expressions, variables, functions, "temp", "");
return cl.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
}
string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& variable, const Lepton::ParsedExpression& expr, int component, CustomIntegrator& integrator, const string& forceName, const string& energyName) {
......@@ -4666,7 +4713,7 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
else if (variable == "v")
expressions["velocity"+suffix+" = "] = expr;
else if (variable == "")
expressions["sum[3*index+"+intToString(component)+"] = "] = expr;
expressions["sum[3*index+"+cl.intToString(component)+"] = "] = expr;
else {
for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
if (variable == integrator.getPerDofVariableName(i))
......@@ -4684,14 +4731,14 @@ string OpenCLIntegrateCustomStepKernel::createPerDofComputation(const string& va
variables["dt"] = "stepSize";
variables[energyName] = "energy[0]";
for (int i = 0; i < integrator.getNumGlobalVariables(); i++)
variables[integrator.getGlobalVariableName(i)] = "globals["+intToString(i)+"]";
variables[integrator.getGlobalVariableName(i)] = "globals["+cl.intToString(i)+"]";
for (int i = 0; i < integrator.getNumPerDofVariables(); i++)
variables[integrator.getPerDofVariableName(i)] = "perDof"+suffix.substr(1)+perDofValues->getParameterSuffix(i);
for (int i = 0; i < (int) parameterNames.size(); i++)
variables[parameterNames[i]] = "params["+intToString(i)+"]";
variables[parameterNames[i]] = "params["+cl.intToString(i)+"]";
vector<pair<string, string> > functions;
string tempType = (cl.getSupportsDoublePrecision() ? "double" : "float");
return OpenCLExpressionUtilities::createExpressions(expressions, variables, functions, "temp"+intToString(component)+"_", "", tempType);
return cl.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp"+cl.intToString(component)+"_", "", tempType);
}
void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrator& integrator, bool& forcesAreValid) {
......@@ -4733,8 +4780,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
merged.resize(numSteps, false);
modifiesParameters = false;
map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["WORK_GROUP_SIZE"] = intToString(OpenCLContext::ThreadBlockSize);
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["WORK_GROUP_SIZE"] = cl.intToString(OpenCLContext::ThreadBlockSize);
// Initialize the random number generator.
......@@ -4858,9 +4905,9 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
stringstream compute;
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
compute << buffer.getType()<<" perDofx"<<intToString(i+1)<<" = perDofValues"<<intToString(i+1)<<"[3*index];\n";
compute << buffer.getType()<<" perDofy"<<intToString(i+1)<<" = perDofValues"<<intToString(i+1)<<"[3*index+1];\n";
compute << buffer.getType()<<" perDofz"<<intToString(i+1)<<" = perDofValues"<<intToString(i+1)<<"[3*index+2];\n";
compute << buffer.getType()<<" perDofx"<<cl.intToString(i+1)<<" = perDofValues"<<cl.intToString(i+1)<<"[3*index];\n";
compute << buffer.getType()<<" perDofy"<<cl.intToString(i+1)<<" = perDofValues"<<cl.intToString(i+1)<<"[3*index+1];\n";
compute << buffer.getType()<<" perDofz"<<cl.intToString(i+1)<<" = perDofValues"<<cl.intToString(i+1)<<"[3*index+2];\n";
}
int numGaussian = 0, numUniform = 0;
for (int j = step; j < numSteps && (j == step || merged[j]); j++) {
......@@ -4882,9 +4929,9 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
else {
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
compute << "perDofValues"<<intToString(i+1)<<"[3*index] = perDofx"<<intToString(i+1)<<";\n";
compute << "perDofValues"<<intToString(i+1)<<"[3*index+1] = perDofy"<<intToString(i+1)<<";\n";
compute << "perDofValues"<<intToString(i+1)<<"[3*index+2] = perDofz"<<intToString(i+1)<<";\n";
compute << "perDofValues"<<cl.intToString(i+1)<<"[3*index] = perDofx"<<cl.intToString(i+1)<<";\n";
compute << "perDofValues"<<cl.intToString(i+1)<<"[3*index+1] = perDofy"<<cl.intToString(i+1)<<";\n";
compute << "perDofValues"<<cl.intToString(i+1)<<"[3*index+2] = perDofz"<<cl.intToString(i+1)<<";\n";
}
}
compute << "}\n";
......@@ -4896,7 +4943,7 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
stringstream args;
for (int i = 0; i < (int) perDofValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = perDofValues->getBuffers()[i];
string valueName = "perDofValues"+intToString(i+1);
string valueName = "perDofValues"+cl.intToString(i+1);
args << ", __global " << buffer.getType() << "* restrict " << valueName;
}
replacements["PARAMETER_ARGUMENTS"] = args.str();
......@@ -5216,7 +5263,7 @@ OpenCLApplyAndersenThermostatKernel::~OpenCLApplyAndersenThermostatKernel() {
void OpenCLApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) {
randomSeed = thermostat.getRandomNumberSeed();
map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
cl::Program program = cl.createProgram(OpenCLKernelSources::andersenThermostat, defines);
kernel = cl::Kernel(program, "applyAndersenThermostat");
cl.getIntegrationUtilities().initRandomNumberGenerator(randomSeed);
......@@ -5349,7 +5396,7 @@ void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotion
for (int i = 0; i < numAtoms; i++)
totalMass += system.getParticleMass(i);
map<string, string> defines;
defines["INVERSE_TOTAL_MASS"] = doubleToString(1.0/totalMass);
defines["INVERSE_TOTAL_MASS"] = cl.doubleToString(1.0/totalMass);
cl::Program program = cl.createProgram(OpenCLKernelSources::removeCM, defines);
kernel1 = cl::Kernel(program, "calcCenterOfMassMomentum");
kernel1.setArg<cl_int>(0, numAtoms);
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2011 Stanford University and the Authors. *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -267,7 +267,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
forceKernel = createInteractionKernel(kernelSource, parameters, arguments, true, true);
if (useCutoff) {
map<string, string> defines;
defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks());
defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks());
if (forceBufferPerAtomBlock)
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
if (usePeriodic)
......@@ -281,6 +281,9 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findBlockBoundsKernel.setArg<cl::Buffer>(5, blockBoundingBox->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(6, interactionCount->getDeviceBuffer());
findInteractingBlocksKernel = cl::Kernel(interactingBlocksProgram, "findBlocksWithInteractions");
if (context.getUseDoublePrecision())
findInteractingBlocksKernel.setArg<cl_double>(0, cutoff*cutoff);
else
findInteractingBlocksKernel.setArg<cl_float>(0, (cl_float) (cutoff*cutoff));
findInteractingBlocksKernel.setArg<cl::Buffer>(3, blockCenter->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(4, blockBoundingBox->getDeviceBuffer());
......@@ -293,6 +296,9 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksKernel.setArg<cl_uint>(11, startTileIndex+numTiles);
if (context.getSIMDWidth() == 32 && !deviceIsCpu) {
findInteractionsWithinBlocksKernel = cl::Kernel(interactingBlocksProgram, "findInteractionsWithinBlocks");
if (context.getUseDoublePrecision())
findInteractionsWithinBlocksKernel.setArg<cl_double>(0, cutoff*cutoff);
else
findInteractionsWithinBlocksKernel.setArg<cl_float>(0, (cl_float) (cutoff*cutoff));
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(3, context.getPosq().getDeviceBuffer());
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(4, interactingTiles->getDeviceBuffer());
......@@ -315,6 +321,20 @@ int OpenCLNonbondedUtilities::findExclusionIndex(int x, int y, const vector<cl_u
throw OpenMMException("Internal error: exclusion in unexpected tile");
}
static void setPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision())
kernel.setArg<mm_double4>(index, cl.getPeriodicBoxSizeDouble());
else
kernel.setArg<mm_float4>(index, cl.getPeriodicBoxSize());
}
static void setInvPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision())
kernel.setArg<mm_double4>(index, cl.getInvPeriodicBoxSizeDouble());
else
kernel.setArg<mm_float4>(index, cl.getInvPeriodicBoxSize());
}
void OpenCLNonbondedUtilities::prepareInteractions() {
if (!useCutoff)
return;
......@@ -327,15 +347,15 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
// Compute the neighbor list.
findBlockBoundsKernel.setArg<mm_float4>(1, context.getPeriodicBoxSize());
findBlockBoundsKernel.setArg<mm_float4>(2, context.getInvPeriodicBoxSize());
setPeriodicBoxSizeArg(context, findBlockBoundsKernel, 1);
setInvPeriodicBoxSizeArg(context, findBlockBoundsKernel, 2);
context.executeKernel(findBlockBoundsKernel, context.getNumAtoms());
findInteractingBlocksKernel.setArg<mm_float4>(1, context.getPeriodicBoxSize());
findInteractingBlocksKernel.setArg<mm_float4>(2, context.getInvPeriodicBoxSize());
setPeriodicBoxSizeArg(context, findInteractingBlocksKernel, 1);
setInvPeriodicBoxSizeArg(context, findInteractingBlocksKernel, 2);
context.executeKernel(findInteractingBlocksKernel, context.getNumAtoms(), deviceIsCpu ? 1 : -1);
if (context.getSIMDWidth() == 32 && !deviceIsCpu) {
findInteractionsWithinBlocksKernel.setArg<mm_float4>(1, context.getPeriodicBoxSize());
findInteractionsWithinBlocksKernel.setArg<mm_float4>(2, context.getInvPeriodicBoxSize());
setPeriodicBoxSizeArg(context, findInteractionsWithinBlocksKernel, 1);
setInvPeriodicBoxSizeArg(context, findInteractionsWithinBlocksKernel, 2);
context.executeKernel(findInteractionsWithinBlocksKernel, context.getNumAtoms(), 128);
}
}
......@@ -343,8 +363,8 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
void OpenCLNonbondedUtilities::computeInteractions() {
if (cutoff != -1.0) {
if (useCutoff) {
forceKernel.setArg<mm_float4>(10, context.getPeriodicBoxSize());
forceKernel.setArg<mm_float4>(11, context.getInvPeriodicBoxSize());
setPeriodicBoxSizeArg(context, forceKernel, 10);
setInvPeriodicBoxSizeArg(context, forceKernel, 11);
}
context.executeKernel(forceKernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
......@@ -498,11 +518,11 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines["USE_EXCLUSIONS"] = "1";
if (isSymmetric)
defines["USE_SYMMETRIC"] = "1";
defines["FORCE_WORK_GROUP_SIZE"] = OpenCLExpressionUtilities::intToString(forceThreadBlockSize);
defines["CUTOFF_SQUARED"] = OpenCLExpressionUtilities::doubleToString(cutoff*cutoff);
defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(context.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = OpenCLExpressionUtilities::intToString(context.getNumAtomBlocks());
defines["FORCE_WORK_GROUP_SIZE"] = context.intToString(forceThreadBlockSize);
defines["CUTOFF_SQUARED"] = context.doubleToString(cutoff*cutoff);
defines["NUM_ATOMS"] = context.intToString(context.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks());
if ((localDataSize/4)%2 == 0)
defines["PARAMETER_SIZE_IS_EVEN"] = "1";
string file;
......
......@@ -30,6 +30,7 @@
#include "OpenCLContext.h"
#include "openmm/System.h"
#include "OpenCLExpressionUtilities.h"
#include <sstream>
#include <string>
#include <vector>
......@@ -287,8 +288,11 @@ public:
name(name), componentType(componentType), numComponents(numComponents), size(size), memory(&memory) {
if (numComponents == 1)
type = componentType;
else
type = componentType+OpenCLExpressionUtilities::intToString(numComponents);
else {
std::stringstream s;
s << componentType << numComponents;
type = s.str();
}
}
const std::string& getName() const {
return name;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011 Stanford University and the Authors. *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -54,14 +54,14 @@ using namespace std;
class OpenCLParallelCalcForcesAndEnergyKernel::BeginComputationTask : public OpenCLContext::WorkTask {
public:
BeginComputationTask(ContextImpl& context, OpenCLContext& cl, OpenCLCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, int groups, mm_float4* pinnedMemory) : context(context), cl(cl), kernel(kernel),
bool includeForce, bool includeEnergy, int groups, void* pinnedMemory) : context(context), cl(cl), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), pinnedMemory(pinnedMemory) {
}
void execute() {
// Copy coordinates over to this device and execute the kernel.
if (cl.getContextIndex() > 0)
cl.getQueue().enqueueWriteBuffer(cl.getPosq().getDeviceBuffer(), CL_FALSE, 0, cl.getPaddedNumAtoms()*sizeof(mm_float4), pinnedMemory);
cl.getQueue().enqueueWriteBuffer(cl.getPosq().getDeviceBuffer(), CL_FALSE, 0, cl.getPaddedNumAtoms()*cl.getPosq().getElementSize(), pinnedMemory);
kernel.beginComputation(context, includeForce, includeEnergy, groups);
}
private:
......@@ -70,13 +70,13 @@ private:
OpenCLCalcForcesAndEnergyKernel& kernel;
bool includeForce, includeEnergy;
int groups;
mm_float4* pinnedMemory;
void* pinnedMemory;
};
class OpenCLParallelCalcForcesAndEnergyKernel::FinishComputationTask : public OpenCLContext::WorkTask {
public:
FinishComputationTask(ContextImpl& context, OpenCLContext& cl, OpenCLCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, mm_float4* pinnedMemory) :
bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, void* pinnedMemory) :
context(context), cl(cl), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy),
completionTime(completionTime), pinnedMemory(pinnedMemory) {
}
......@@ -87,8 +87,9 @@ public:
if (includeForce) {
if (cl.getContextIndex() > 0) {
int numAtoms = cl.getPaddedNumAtoms();
void* dest = (cl.getUseDoublePrecision() ? (void*) &((mm_double4*) pinnedMemory)[(cl.getContextIndex()-1)*numAtoms] : (void*) &((mm_float4*) pinnedMemory)[(cl.getContextIndex()-1)*numAtoms]);
cl.getQueue().enqueueReadBuffer(cl.getForce().getDeviceBuffer(), CL_TRUE, 0,
numAtoms*sizeof(mm_float4), &pinnedMemory[(cl.getContextIndex()-1)*numAtoms]);
numAtoms*cl.getForce().getElementSize(), dest);
}
else
cl.getQueue().finish();
......@@ -103,7 +104,7 @@ private:
int groups;
double& energy;
long long& completionTime;
mm_float4* pinnedMemory;
void* pinnedMemory;
};
OpenCLParallelCalcForcesAndEnergyKernel::OpenCLParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, OpenCLPlatform::PlatformData& data) :
......@@ -129,19 +130,20 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
OpenCLContext& cl0 = *data.contexts[0];
int elementSize = (cl0.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
if (contextForces == NULL) {
contextForces = OpenCLArray::create<mm_float4>(cl0, &cl0.getForceBuffers().getDeviceBuffer(),
data.contexts.size()*cl0.getPaddedNumAtoms(), "contextForces");
int bufferBytes = (data.contexts.size()-1)*cl0.getPaddedNumAtoms()*sizeof(mm_float4);
int bufferBytes = (data.contexts.size()-1)*cl0.getPaddedNumAtoms()*elementSize;
pinnedPositionBuffer = new cl::Buffer(cl0.getContext(), CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedPositionMemory = (mm_float4*) cl0.getQueue().enqueueMapBuffer(*pinnedPositionBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
pinnedPositionMemory = cl0.getQueue().enqueueMapBuffer(*pinnedPositionBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
pinnedForceBuffer = new cl::Buffer(cl0.getContext(), CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedForceMemory = (mm_float4*) cl0.getQueue().enqueueMapBuffer(*pinnedForceBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
pinnedForceMemory = cl0.getQueue().enqueueMapBuffer(*pinnedForceBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
}
// Copy coordinates over to each device and execute the kernel.
cl0.getQueue().enqueueReadBuffer(cl0.getPosq().getDeviceBuffer(), CL_TRUE, 0, cl0.getPaddedNumAtoms()*sizeof(mm_float4), pinnedPositionMemory);
cl0.getQueue().enqueueReadBuffer(cl0.getPosq().getDeviceBuffer(), CL_TRUE, 0, cl0.getPaddedNumAtoms()*elementSize, pinnedPositionMemory);
for (int i = 0; i < (int) data.contexts.size(); i++) {
data.contextEnergy[i] = 0.0;
OpenCLContext& cl = *data.contexts[i];
......@@ -165,8 +167,9 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
OpenCLContext& cl = *data.contexts[0];
int numAtoms = cl.getPaddedNumAtoms();
cl.getQueue().enqueueWriteBuffer(contextForces->getDeviceBuffer(), CL_FALSE, numAtoms*sizeof(mm_float4),
numAtoms*(data.contexts.size()-1)*sizeof(mm_float4), pinnedForceMemory);
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueWriteBuffer(contextForces->getDeviceBuffer(), CL_FALSE, numAtoms*elementSize,
numAtoms*(data.contexts.size()-1)*elementSize, pinnedForceMemory);
cl.reduceBuffer(*contextForces, data.contexts.size());
// Balance work between the contexts by transferring a few nonbonded tiles from the context that
......
......@@ -84,8 +84,8 @@ private:
OpenCLArray* contextForces;
cl::Buffer* pinnedPositionBuffer;
cl::Buffer* pinnedForceBuffer;
mm_float4* pinnedPositionMemory;
mm_float4* pinnedForceMemory;
void* pinnedPositionMemory;
void* pinnedForceMemory;
};
/**
......
......@@ -141,7 +141,7 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
device << contexts[i]->getDeviceIndex();
}
propertyValues[OpenCLPlatform::OpenCLDeviceIndex()] = device.str();
propertyValues[OpenCLPlatform::OpenCLPlatformIndex()] = OpenCLExpressionUtilities::intToString(platformIndex);
propertyValues[OpenCLPlatform::OpenCLPlatformIndex()] = contexts[0]->intToString(platformIndex);
propertyValues[OpenCLPlatform::OpenCLPrecision()] = precisionProperty;
contextEnergy.resize(contexts.size());
}
......
......@@ -162,7 +162,7 @@ public:
// Assign array elements to buckets.
unsigned int numBuckets = bucketOffset->getSize();
context.clearBuffer(bucketOffset->getDeviceBuffer(), numBuckets);
context.clearBuffer(*bucketOffset);
assignElementsKernel.setArg<cl::Buffer>(0, data.getDeviceBuffer());
assignElementsKernel.setArg<cl_int>(1, data.getSize());
assignElementsKernel.setArg<cl_int>(2, numBuckets);
......
float4 v0 = pos2-pos1;
float4 v1 = pos2-pos3;
float4 cp = cross(v0, v1);
float rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z;
rp = max(SQRT(rp), 1.0e-06f);
float r21 = v0.x*v0.x + v0.y*v0.y + v0.z*v0.z;
float r23 = v1.x*v1.x + v1.y*v1.y + v1.z*v1.z;
float dot = v0.x*v1.x + v0.y*v1.y + v0.z*v1.z;
float cosine = clamp(dot*RSQRT(r21*r23), -1.0f, 1.0f);
float theta = acos(cosine);
real4 v0 = pos2-pos1;
real4 v1 = pos2-pos3;
real4 cp = cross(v0, v1);
real rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z;
rp = max(SQRT(rp), (real) 1.0e-06f);
real r21 = v0.x*v0.x + v0.y*v0.y + v0.z*v0.z;
real r23 = v1.x*v1.x + v1.y*v1.y + v1.z*v1.z;
real dot = v0.x*v1.x + v0.y*v1.y + v0.z*v1.z;
real cosine = clamp(dot*RSQRT(r21*r23), (real) -1, (real) 1);
real theta = acos(cosine);
COMPUTE_FORCE
float4 force1 = cross(v0, cp)*(dEdAngle/(r21*rp));
float4 force3 = cross(cp, v1)*(dEdAngle/(r23*rp));
float4 force2 = -force1-force3;
real4 force1 = cross(v0, cp)*(dEdAngle/(r21*rp));
real4 force3 = cross(cp, v1)*(dEdAngle/(r23*rp));
real4 force2 = -force1-force3;
float4 delta = pos2-pos1;
float r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
real4 delta = pos2-pos1;
real r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
COMPUTE_FORCE
dEdR = (r > 0.0f) ? (dEdR / r) : 0.0f;
delta.xyz *= dEdR;
float4 force1 = delta;
float4 force2 = -delta;
\ No newline at end of file
real4 force1 = delta;
real4 force2 = -delta;
\ No newline at end of file
const float PI = 3.14159265358979323846f;
const real PI = 3.14159265358979323846f;
// Compute the first angle.
float4 v0a = (float4) (pos1.xyz-pos2.xyz, 0.0f);
float4 v1a = (float4) (pos3.xyz-pos2.xyz, 0.0f);
float4 v2a = (float4) (pos3.xyz-pos4.xyz, 0.0f);
float4 cp0a = cross(v0a, v1a);
float4 cp1a = cross(v1a, v2a);
float cosangle = dot(normalize(cp0a), normalize(cp1a));
float angleA;
real4 v0a = (real4) (pos1.xyz-pos2.xyz, 0.0f);
real4 v1a = (real4) (pos3.xyz-pos2.xyz, 0.0f);
real4 v2a = (real4) (pos3.xyz-pos4.xyz, 0.0f);
real4 cp0a = cross(v0a, v1a);
real4 cp1a = cross(v1a, v2a);
real cosangle = dot(normalize(cp0a), normalize(cp1a));
real angleA;
if (cosangle > 0.99f || cosangle < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
float4 cross_prod = cross(cp0a, cp1a);
float scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
real4 cross_prod = cross(cp0a, cp1a);
real scale = dot(cp0a, cp0a)*dot(cp1a, cp1a);
angleA = asin(SQRT(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0.0f)
angleA = PI-angleA;
......@@ -25,18 +25,18 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
// Compute the second angle.
float4 v0b = (float4) (pos5.xyz-pos6.xyz, 0.0f);
float4 v1b = (float4) (pos7.xyz-pos6.xyz, 0.0f);
float4 v2b = (float4) (pos7.xyz-pos8.xyz, 0.0f);
float4 cp0b = cross(v0b, v1b);
float4 cp1b = cross(v1b, v2b);
real4 v0b = (real4) (pos5.xyz-pos6.xyz, 0.0f);
real4 v1b = (real4) (pos7.xyz-pos6.xyz, 0.0f);
real4 v2b = (real4) (pos7.xyz-pos8.xyz, 0.0f);
real4 cp0b = cross(v0b, v1b);
real4 cp1b = cross(v1b, v2b);
cosangle = dot(normalize(cp0b), normalize(cp1b));
float angleB;
real angleB;
if (cosangle > 0.99f || cosangle < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
float4 cross_prod = cross(cp0b, cp1b);
float scale = dot(cp0b, cp0b)*dot(cp1b, cp1b);
real4 cross_prod = cross(cp0b, cp1b);
real scale = dot(cp0b, cp0b)*dot(cp1b, cp1b);
angleB = asin(SQRT(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0.0f)
angleB = PI-angleB;
......@@ -50,7 +50,7 @@ angleB = fmod(angleB+2.0f*PI, 2.0f*PI);
int2 pos = MAP_POS[MAPS[index]];
int size = pos.y;
float delta = 2*PI/size;
real delta = 2*PI/size;
int s = (int) (angleA/delta);
int t = (int) (angleB/delta);
float4 c[4];
......@@ -59,14 +59,14 @@ c[0] = COEFF[coeffIndex];
c[1] = COEFF[coeffIndex+1];
c[2] = COEFF[coeffIndex+2];
c[3] = COEFF[coeffIndex+3];
float da = angleA/delta-s;
float db = angleB/delta-t;
real da = angleA/delta-s;
real db = angleB/delta-t;
// Evaluate the spline to determine the energy and gradients.
float torsionEnergy = 0.0f;
float dEdA = 0.0f;
float dEdB = 0.0f;
real torsionEnergy = 0.0f;
real dEdA = 0.0f;
real dEdB = 0.0f;
torsionEnergy = da*torsionEnergy + ((c[3].w*db + c[3].z)*db + c[3].y)*db + c[3].x;
dEdA = db*dEdA + (3.0f*c[3].w*da + 2.0f*c[2].w)*da + c[1].w;
dEdB = da*dEdB + (3.0f*c[3].w*db + 2.0f*c[3].z)*db + c[3].y;
......@@ -85,17 +85,17 @@ energy += torsionEnergy;
// Apply the force to the first torsion.
float normCross1 = dot(cp0a, cp0a);
float normSqrBC = dot(v1a, v1a);
float normBC = SQRT(normSqrBC);
float normCross2 = dot(cp1a, cp1a);
float dp = 1.0f/normSqrBC;
float4 ff = (float4) ((-dEdA*normBC)/normCross1, dot(v0a, v1a)*dp, dot(v2a, v1a)*dp, (dEdA*normBC)/normCross2);
float4 force1 = ff.x*cp0a;
float4 force4 = ff.w*cp1a;
float4 d = ff.y*force1 - ff.z*force4;
float4 force2 = d-force1;
float4 force3 = -d-force4;
real normCross1 = dot(cp0a, cp0a);
real normSqrBC = dot(v1a, v1a);
real normBC = SQRT(normSqrBC);
real normCross2 = dot(cp1a, cp1a);
real dp = 1.0f/normSqrBC;
real4 ff = (real4) ((-dEdA*normBC)/normCross1, dot(v0a, v1a)*dp, dot(v2a, v1a)*dp, (dEdA*normBC)/normCross2);
real4 force1 = ff.x*cp0a;
real4 force4 = ff.w*cp1a;
real4 d = ff.y*force1 - ff.z*force4;
real4 force2 = d-force1;
real4 force3 = -d-force4;
// Apply the force to the second torsion.
......@@ -104,9 +104,9 @@ normSqrBC = dot(v1b, v1b);
normBC = SQRT(normSqrBC);
normCross2 = dot(cp1b, cp1b);
dp = 1.0f/normSqrBC;
ff = (float4) ((-dEdB*normBC)/normCross1, dot(v0b, v1b)*dp, dot(v2b, v1b)*dp, (dEdB*normBC)/normCross2);
float4 force5 = ff.x*cp0b;
float4 force8 = ff.w*cp1b;
ff = (real4) ((-dEdB*normBC)/normCross1, dot(v0b, v1b)*dp, dot(v2b, v1b)*dp, (dEdB*normBC)/normCross2);
real4 force5 = ff.x*cp0b;
real4 force8 = ff.w*cp1b;
d = ff.y*force5 - ff.z*force8;
float4 force6 = d-force5;
float4 force7 = -d-force8;
real4 force6 = d-force5;
real4 force7 = -d-force8;
#if USE_EWALD
bool needCorrection = isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS;
if (!isExcluded || needCorrection) {
float tempForce = 0.0f;
real tempForce = 0;
if (r2 < CUTOFF_SQUARED || needCorrection) {
const float alphaR = EWALD_ALPHA*r;
const float expAlphaRSqr = EXP(-alphaR*alphaR);
const float prefactor = 138.935456f*posq1.w*posq2.w*invR;
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
const real prefactor = 138.935456f*posq1.w*posq2.w*invR;
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
float t = 1.0f+(0.0705230784f+(0.0422820123f+(0.0092705272f+(0.0001520143f+(0.0002765672f+0.0000430638f*alphaR)*alphaR)*alphaR)*alphaR)*alphaR)*alphaR;
real t = 1.0f+(0.0705230784f+(0.0422820123f+(0.0092705272f+(0.0001520143f+(0.0002765672f+0.0000430638f*alphaR)*alphaR)*alphaR)*alphaR)*alphaR)*alphaR;
t *= t;
t *= t;
t *= t;
const float erfcAlphaR = RECIP(t*t);
const real erfcAlphaR = RECIP(t*t);
if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
......@@ -24,11 +24,11 @@ if (!isExcluded || needCorrection) {
}
else {
#if HAS_LENNARD_JONES
float sig = sigmaEpsilon1.x + sigmaEpsilon2.x;
float sig2 = invR*sig;
real sig = sigmaEpsilon1.x + sigmaEpsilon2.x;
real sig2 = invR*sig;
sig2 *= sig2;
float sig6 = sig2*sig2*sig2;
float epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y);
real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y);
tempForce = epssig6*(12.0f*sig6 - 6.0f) + prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += epssig6*(sig6 - 1.0f) + prefactor*erfcAlphaR;
#else
......@@ -41,32 +41,37 @@ if (!isExcluded || needCorrection) {
}
#else
{
#ifdef USE_DOUBLE_PRECISION
unsigned long includeInteraction;
#else
unsigned int includeInteraction;
#endif
#ifdef USE_CUTOFF
unsigned int includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
#else
unsigned int includeInteraction = (!isExcluded);
includeInteraction = (!isExcluded);
#endif
float tempForce = 0.0f;
real tempForce = 0;
#if HAS_LENNARD_JONES
float sig = sigmaEpsilon1.x + sigmaEpsilon2.x;
float sig2 = invR*sig;
real sig = sigmaEpsilon1.x + sigmaEpsilon2.x;
real sig2 = invR*sig;
sig2 *= sig2;
float sig6 = sig2*sig2*sig2;
float epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y);
real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y);
tempForce = epssig6*(12.0f*sig6 - 6.0f);
tempEnergy += select(0.0f, epssig6*(sig6 - 1.0f), includeInteraction);
tempEnergy += select((real) 0, epssig6*(sig6-1), includeInteraction);
#endif
#if HAS_COULOMB
#ifdef USE_CUTOFF
const float prefactor = 138.935456f*posq1.w*posq2.w;
const real prefactor = 138.935456f*posq1.w*posq2.w;
tempForce += prefactor*(invR - 2.0f*REACTION_FIELD_K*r2);
tempEnergy += select(0.0f, prefactor*(invR + REACTION_FIELD_K*r2 - REACTION_FIELD_C), includeInteraction);
tempEnergy += select((real) 0, prefactor*(invR + REACTION_FIELD_K*r2 - REACTION_FIELD_C), includeInteraction);
#else
const float prefactor = 138.935456f*posq1.w*posq2.w*invR;
const real prefactor = 138.935456f*posq1.w*posq2.w*invR;
tempForce += prefactor;
tempEnergy += select(0.0f, prefactor, includeInteraction);
tempEnergy += select((real) 0, prefactor, includeInteraction);
#endif
#endif
dEdR += select(0.0f, tempForce*invR*invR, includeInteraction);
dEdR += select((real) 0, tempForce*invR*invR, includeInteraction);
}
#endif
\ No newline at end of file
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
float4 ccb_delta(float4 vec1, float4 vec2) {
float4 result = (float4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
real4 ccb_delta(real4 vec1, real4 vec2) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
}
......@@ -10,17 +10,17 @@ float4 ccb_delta(float4 vec1, float4 vec2) {
/**
* Compute the angle between two vectors. The w component of each vector should contain the squared magnitude.
*/
float ccb_computeAngle(float4 vec1, float4 vec2) {
float dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
float cosine = dotProduct*RSQRT(vec1.w*vec2.w);
float angle;
real ccb_computeAngle(real4 vec1, real4 vec2) {
real dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
real cosine = dotProduct*RSQRT(vec1.w*vec2.w);
real angle;
if (cosine > 0.99f || cosine < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
float4 crossProduct = cross(vec1, vec2);
float scale = vec1.w*vec2.w;
real4 crossProduct = cross(vec1, vec2);
real scale = vec1.w*vec2.w;
angle = asin(SQRT(dot(crossProduct, crossProduct)/scale));
if (cosine < 0.0f)
if (cosine < 0)
angle = M_PI-angle;
}
else
......@@ -31,8 +31,8 @@ float ccb_computeAngle(float4 vec1, float4 vec2) {
/**
* Compute the cross product of two vectors, setting the fourth component to the squared magnitude.
*/
float4 ccb_computeCross(float4 vec1, float4 vec2) {
float4 result = cross(vec1, vec2);
real4 ccb_computeCross(real4 vec1, real4 vec2) {
real4 result = cross(vec1, vec2);
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
}
COMPUTE_FORCE
float4 force1 = (float4) (-dEdX, -dEdY, -dEdZ, 0.0f);
real4 force1 = (real4) (-dEdX, -dEdY, -dEdZ, 0);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment