/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2010 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "OpenCLKernels.h"
#include "OpenCLForceInfo.h"
#include "openmm/LangevinIntegrator.h"
#include "openmm/Context.h"
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/CMAPTorsionForceImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomHbondForceImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "OpenCLExpressionUtilities.h"
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLNonbondedUtilities.h"
#include "OpenCLKernelSources.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "openmm/internal/MSVC_erfc.h"
#include
#include
using namespace OpenMM;
using namespace std;
static string doubleToString(double value) {
stringstream s;
s.precision(8);
s << scientific << value << "f";
return s.str();
}
static string intToString(int value) {
stringstream s;
s << value;
return s.str();
}
static bool isZeroExpression(const Lepton::ParsedExpression& expression) {
const Lepton::Operation& op = expression.getRootNode().getOperation();
if (op.getId() != Lepton::Operation::CONSTANT)
return false;
return (dynamic_cast(op).getValue() == 0.0);
}
void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
}
void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (cl.getNonbondedUtilities().getUseCutoff() && cl.getComputeForceCount()%100 == 0) {
cl.reorderAtoms();
cl.getNonbondedUtilities().updateNeighborListSize();
}
cl.setComputeForceCount(cl.getComputeForceCount()+1);
cl.clearAutoclearBuffers();
cl.getNonbondedUtilities().prepareInteractions();
}
double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy) {
cl.getNonbondedUtilities().computeInteractions();
if (includeForces)
cl.reduceForces();
double sum = 0.0f;
if (includeEnergy) {
OpenCLArray& energy = cl.getEnergyBuffer();
energy.download();
for (int i = 0; i < energy.getSize(); i++)
sum += energy[i];
}
return sum;
}
void OpenCLUpdateStateDataKernel::initialize(const System& system) {
}
double OpenCLUpdateStateDataKernel::getTime(const ContextImpl& context) const {
return cl.getTime();
}
void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
vector& contexts = cl.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->setTime(time);
}
void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, std::vector& positions) {
OpenCLArray& posq = cl.getPosq();
posq.download();
OpenCLArray& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
mm_float4 periodicBoxSize = cl.getPeriodicBoxSize();
for (int i = 0; i < numParticles; ++i) {
mm_float4 pos = posq[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x-offset.x*periodicBoxSize.x, pos.y-offset.y*periodicBoxSize.y, pos.z-offset.z*periodicBoxSize.z);
}
}
void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const std::vector& positions) {
OpenCLArray& posq = cl.getPosq();
OpenCLArray& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
for (int i = 0; i < numParticles; ++i) {
mm_float4& pos = posq[i];
const Vec3& p = positions[order[i]];
pos.x = (cl_float) p[0];
pos.y = (cl_float) p[1];
pos.z = (cl_float) p[2];
}
for (int i = numParticles; i < cl.getPaddedNumAtoms(); i++)
posq[i] = mm_float4(0.0f, 0.0f, 0.0f, 0.0f);
posq.upload();
for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++)
cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0);
}
void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, std::vector& velocities) {
OpenCLArray& velm = cl.getVelm();
velm.download();
OpenCLArray& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
velocities.resize(numParticles);
for (int i = 0; i < numParticles; ++i) {
mm_float4 vel = velm[i];
velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
}
}
void OpenCLUpdateStateDataKernel::setVelocities(ContextImpl& context, const std::vector& velocities) {
OpenCLArray& velm = cl.getVelm();
OpenCLArray& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
for (int i = 0; i < numParticles; ++i) {
mm_float4& vel = velm[i];
const Vec3& p = velocities[order[i]];
vel.x = (cl_float) p[0];
vel.y = (cl_float) p[1];
vel.z = (cl_float) p[2];
}
for (int i = numParticles; i < cl.getPaddedNumAtoms(); i++)
velm[i] = mm_float4(0.0f, 0.0f, 0.0f, 0.0f);
velm.upload();
}
void OpenCLUpdateStateDataKernel::getForces(ContextImpl& context, std::vector& forces) {
OpenCLArray& force = cl.getForce();
force.download();
OpenCLArray& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
forces.resize(numParticles);
for (int i = 0; i < numParticles; ++i) {
mm_float4 f = force[i];
forces[order[i]] = Vec3(f.x, f.y, f.z);
}
}
void OpenCLUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const {
mm_float4 box = cl.getPeriodicBoxSize();
a = Vec3(box.x, 0, 0);
b = Vec3(0, box.y, 0);
c = Vec3(0, 0, box.z);
}
void OpenCLUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const {
vector& contexts = cl.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->setPeriodicBoxSize(a[0], b[1], c[2]);
}
void OpenCLApplyConstraintsKernel::initialize(const System& system) {
}
void OpenCLApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
cl.getIntegrationUtilities().applyConstraints(tol);
}
class OpenCLBondForceInfo : public OpenCLForceInfo {
public:
OpenCLBondForceInfo(int requiredBuffers, const HarmonicBondForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2;
double length, k;
force.getBondParameters(index, particle1, particle2, length, k);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
double length1, length2, k1, k2;
force.getBondParameters(group1, particle1, particle2, length1, k1);
force.getBondParameters(group2, particle1, particle2, length2, k2);
return (length1 == length2 && k1 == k2);
}
private:
const HarmonicBondForce& force;
};
OpenCLCalcHarmonicBondForceKernel::~OpenCLCalcHarmonicBondForceKernel() {
if (params != NULL)
delete params;
if (indices != NULL)
delete indices;
}
void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumBonds()/numContexts;
numBonds = endIndex-startIndex;
if (numBonds == 0)
return;
params = new OpenCLArray(cl, numBonds, "bondParams");
indices = new OpenCLArray(cl, numBonds, "bondIndices");
vector forceBufferCounter(system.getNumParticles(), 0);
vector paramVector(numBonds);
vector indicesVector(numBonds);
for (int i = 0; i < numBonds; i++) {
int particle1, particle2;
double length, k;
force.getBondParameters(startIndex+i, particle1, particle2, length, k);
paramVector[i] = mm_float2((cl_float) length, (cl_float) k);
indicesVector[i] = mm_int4(particle1, particle2, forceBufferCounter[particle1]++, forceBufferCounter[particle2]++);
}
params->upload(paramVector);
indices->upload(indicesVector);
int maxBuffers = 1;
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
cl.addForce(new OpenCLBondForceInfo(maxBuffers, force));
cl::Program program = cl.createProgram(OpenCLKernelSources::harmonicBondForce);
kernel = cl::Kernel(program, "calcHarmonicBondForce");
}
double OpenCLCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numBonds == 0)
return 0.0;
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg(0, cl.getPaddedNumAtoms());
kernel.setArg(1, numBonds);
kernel.setArg(2, cl.getForceBuffers().getDeviceBuffer());
kernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(4, cl.getPosq().getDeviceBuffer());
kernel.setArg(5, params->getDeviceBuffer());
kernel.setArg(6, indices->getDeviceBuffer());
}
cl.executeKernel(kernel, numBonds);
return 0.0;
}
class OpenCLCustomBondForceInfo : public OpenCLForceInfo {
public:
OpenCLCustomBondForceInfo(int requiredBuffers, const CustomBondForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2;
vector parameters;
force.getBondParameters(index, particle1, particle2, parameters);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
vector parameters1, parameters2;
force.getBondParameters(group1, particle1, particle2, parameters1);
force.getBondParameters(group2, particle1, particle2, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomBondForce& force;
};
OpenCLCalcCustomBondForceKernel::~OpenCLCalcCustomBondForceKernel() {
if (params != NULL)
delete params;
if (indices != NULL)
delete indices;
if (globals != NULL)
delete globals;
}
void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumBonds()/numContexts;
numBonds = endIndex-startIndex;
if (numBonds == 0)
return;
params = new OpenCLParameterSet(cl, force.getNumPerBondParameters(), numBonds, "customBondParams");
indices = new OpenCLArray(cl, numBonds, "customBondIndices");
string extraArguments;
if (force.getNumGlobalParameters() > 0) {
globals = new OpenCLArray(cl, force.getNumGlobalParameters(), "customBondGlobals", false, CL_MEM_READ_ONLY);
extraArguments += ", __constant float* globals";
}
vector forceBufferCounter(system.getNumParticles(), 0);
vector > paramVector(numBonds);
vector indicesVector(numBonds);
for (int i = 0; i < numBonds; i++) {
int particle1, particle2;
vector parameters;
force.getBondParameters(startIndex+i, particle1, particle2, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
indicesVector[i] = mm_int4(particle1, particle2, forceBufferCounter[particle1]++, forceBufferCounter[particle2]++);
}
params->setParameterValues(paramVector);
indices->upload(indicesVector);
int maxBuffers = 1;
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
cl.addForce(new OpenCLCustomBondForceInfo(maxBuffers, force));
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdR = "] = forceExpression;
// Create the kernels.
map variables;
variables["r"] = "r";
for (int i = 0; i < force.getNumPerBondParameters(); i++) {
const string& name = force.getPerBondParameterName(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+intToString(i)+"]";
variables[name] = value;
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
extraArguments += ", __global "+buffer.getType()+"* "+buffer.getName();
compute< > functions;
compute << OpenCLExpressionUtilities::createExpressions(expressions, variables, functions, "temp", "");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
replacements["EXTRA_ARGUMENTS"] = extraArguments;
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customBondForce, replacements));
kernel = cl::Kernel(program, "computeCustomBondForces");
}
double OpenCLCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numBonds == 0)
return 0.0;
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg(0, cl.getPaddedNumAtoms());
kernel.setArg(1, numBonds);
kernel.setArg(2, cl.getForceBuffers().getDeviceBuffer());
kernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(4, cl.getPosq().getDeviceBuffer());
kernel.setArg(5, indices->getDeviceBuffer());
int nextIndex = 6;
if (globals != NULL)
kernel.setArg(nextIndex++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
kernel.setArg(nextIndex++, buffer.getMemory());
}
}
cl.executeKernel(kernel, numBonds);
return 0.0;
}
class OpenCLAngleForceInfo : public OpenCLForceInfo {
public:
OpenCLAngleForceInfo(int requiredBuffers, const HarmonicAngleForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2, particle3;
double angle, k;
force.getAngleParameters(index, particle1, particle2, particle3, angle, k);
particles.resize(3);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3;
double angle1, angle2, k1, k2;
force.getAngleParameters(group1, particle1, particle2, particle3, angle1, k1);
force.getAngleParameters(group2, particle1, particle2, particle3, angle2, k2);
return (angle1 == angle2 && k1 == k2);
}
private:
const HarmonicAngleForce& force;
};
OpenCLCalcHarmonicAngleForceKernel::~OpenCLCalcHarmonicAngleForceKernel() {
if (params != NULL)
delete params;
if (indices != NULL)
delete indices;
}
void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0)
return;
params = new OpenCLArray(cl, numAngles, "angleParams");
indices = new OpenCLArray(cl, numAngles, "angleIndices");
vector forceBufferCounter(system.getNumParticles(), 0);
vector paramVector(numAngles);
vector indicesVector(numAngles);
for (int i = 0; i < numAngles; i++) {
int particle1, particle2, particle3;
double angle, k;
force.getAngleParameters(startIndex+i, particle1, particle2, particle3, angle, k);
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
indicesVector[i] = mm_int8(particle1, particle2, particle3,
forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, 0, 0);
}
params->upload(paramVector);
indices->upload(indicesVector);
int maxBuffers = 1;
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
cl.addForce(new OpenCLAngleForceInfo(maxBuffers, force));
cl::Program program = cl.createProgram(OpenCLKernelSources::harmonicAngleForce);
kernel = cl::Kernel(program, "calcHarmonicAngleForce");
}
double OpenCLCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numAngles == 0)
return 0.0;
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg(0, cl.getPaddedNumAtoms());
kernel.setArg(1, numAngles);
kernel.setArg(2, cl.getForceBuffers().getDeviceBuffer());
kernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(4, cl.getPosq().getDeviceBuffer());
kernel.setArg(5, params->getDeviceBuffer());
kernel.setArg(6, indices->getDeviceBuffer());
}
cl.executeKernel(kernel, numAngles);
return 0.0;
}
class OpenCLCustomAngleForceInfo : public OpenCLForceInfo {
public:
OpenCLCustomAngleForceInfo(int requiredBuffers, const CustomAngleForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2, particle3;
vector parameters;
force.getAngleParameters(index, particle1, particle2, particle3, parameters);
particles.resize(3);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3;
vector parameters1, parameters2;
force.getAngleParameters(group1, particle1, particle2, particle3, parameters1);
force.getAngleParameters(group2, particle1, particle2, particle3, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomAngleForce& force;
};
OpenCLCalcCustomAngleForceKernel::~OpenCLCalcCustomAngleForceKernel() {
if (params != NULL)
delete params;
if (indices != NULL)
delete indices;
if (globals != NULL)
delete globals;
}
void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0)
return;
params = new OpenCLParameterSet(cl, force.getNumPerAngleParameters(), numAngles, "customAngleParams");
indices = new OpenCLArray(cl, numAngles, "customAngleIndices");
string extraArguments;
if (force.getNumGlobalParameters() > 0) {
globals = new OpenCLArray(cl, force.getNumGlobalParameters(), "customAngleGlobals", false, CL_MEM_READ_ONLY);
extraArguments += ", __constant float* globals";
}
vector forceBufferCounter(system.getNumParticles(), 0);
vector > paramVector(numAngles);
vector indicesVector(numAngles);
for (int i = 0; i < numAngles; i++) {
int particle1, particle2, particle3;
vector parameters;
force.getAngleParameters(startIndex+i, particle1, particle2, particle3, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
indicesVector[i] = mm_int8(particle1, particle2, particle3, forceBufferCounter[particle1]++,
forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, 0, 0);
}
params->setParameterValues(paramVector);
indices->upload(indicesVector);
int maxBuffers = 1;
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
cl.addForce(new OpenCLCustomAngleForceInfo(maxBuffers, force));
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("theta").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdAngle = "] = forceExpression;
// Create the kernels.
map variables;
variables["theta"] = "theta";
for (int i = 0; i < force.getNumPerAngleParameters(); i++) {
const string& name = force.getPerAngleParameterName(i);
variables[name] = "angleParams"+params->getParameterSuffix(i);
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+intToString(i)+"]";
variables[name] = value;
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
extraArguments += ", __global "+buffer.getType()+"* "+buffer.getName();
compute< > functions;
compute << OpenCLExpressionUtilities::createExpressions(expressions, variables, functions, "temp", "");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
replacements["EXTRA_ARGUMENTS"] = extraArguments;
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customAngleForce, replacements));
kernel = cl::Kernel(program, "computeCustomAngleForces");
}
double OpenCLCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numAngles == 0)
return 0.0;
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg(0, cl.getPaddedNumAtoms());
kernel.setArg(1, numAngles);
kernel.setArg(2, cl.getForceBuffers().getDeviceBuffer());
kernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(4, cl.getPosq().getDeviceBuffer());
kernel.setArg(5, indices->getDeviceBuffer());
int nextIndex = 6;
if (globals != NULL)
kernel.setArg(nextIndex++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
kernel.setArg(nextIndex++, buffer.getMemory());
}
}
cl.executeKernel(kernel, numAngles);
return 0.0;
}
class OpenCLPeriodicTorsionForceInfo : public OpenCLForceInfo {
public:
OpenCLPeriodicTorsionForceInfo(int requiredBuffers, const PeriodicTorsionForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2, particle3, particle4, periodicity;
double phase, k;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, periodicity, phase, k);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4, periodicity1, periodicity2;
double phase1, phase2, k1, k2;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, periodicity1, phase1, k1);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, periodicity2, phase2, k2);
return (periodicity1 == periodicity2 && phase1 == phase2 && k1 == k2);
}
private:
const PeriodicTorsionForce& force;
};
OpenCLCalcPeriodicTorsionForceKernel::~OpenCLCalcPeriodicTorsionForceKernel() {
if (params != NULL)
delete params;
if (indices != NULL)
delete indices;
}
void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
params = new OpenCLArray(cl, numTorsions, "periodicTorsionParams");
indices = new OpenCLArray(cl, numTorsions, "periodicTorsionIndices");
vector forceBufferCounter(system.getNumParticles(), 0);
vector paramVector(numTorsions);
vector indicesVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int particle1, particle2, particle3, particle4, periodicity;
double phase, k;
force.getTorsionParameters(startIndex+i, particle1, particle2, particle3, particle4, periodicity, phase, k);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
indicesVector[i] = mm_int8(particle1, particle2, particle3, particle4,
forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, forceBufferCounter[particle4]++);
}
params->upload(paramVector);
indices->upload(indicesVector);
int maxBuffers = 1;
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
cl.addForce(new OpenCLPeriodicTorsionForceInfo(maxBuffers, force));
cl::Program program = cl.createProgram(OpenCLKernelSources::periodicTorsionForce);
kernel = cl::Kernel(program, "calcPeriodicTorsionForce");
}
double OpenCLCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numTorsions == 0)
return 0.0;
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg(0, cl.getPaddedNumAtoms());
kernel.setArg(1, numTorsions);
kernel.setArg(2, cl.getForceBuffers().getDeviceBuffer());
kernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(4, cl.getPosq().getDeviceBuffer());
kernel.setArg(5, params->getDeviceBuffer());
kernel.setArg(6, indices->getDeviceBuffer());
}
cl.executeKernel(kernel, numTorsions);
return 0.0;
}
class OpenCLRBTorsionForceInfo : public OpenCLForceInfo {
public:
OpenCLRBTorsionForceInfo(int requiredBuffers, const RBTorsionForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2, particle3, particle4;
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4;
double c0a, c0b, c1a, c1b, c2a, c2b, c3a, c3b, c4a, c4b, c5a, c5b;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, c0a, c1a, c2a, c3a, c4a, c5a);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, c0b, c1b, c2b, c3b, c4b, c5b);
return (c0a == c0b && c1a == c1b && c2a == c2b && c3a == c3b && c4a == c4b && c5a == c5b);
}
private:
const RBTorsionForce& force;
};
OpenCLCalcRBTorsionForceKernel::~OpenCLCalcRBTorsionForceKernel() {
if (params != NULL)
delete params;
if (indices != NULL)
delete indices;
}
void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
params = new OpenCLArray(cl, numTorsions, "rbTorsionParams");
indices = new OpenCLArray(cl, numTorsions, "rbTorsionIndices");
vector forceBufferCounter(system.getNumParticles(), 0);
vector paramVector(numTorsions);
vector indicesVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int particle1, particle2, particle3, particle4;
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(startIndex+i, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
indicesVector[i] = mm_int8(particle1, particle2, particle3, particle4,
forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, forceBufferCounter[particle4]++);
}
params->upload(paramVector);
indices->upload(indicesVector);
int maxBuffers = 1;
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
cl.addForce(new OpenCLRBTorsionForceInfo(maxBuffers, force));
cl::Program program = cl.createProgram(OpenCLKernelSources::rbTorsionForce);
kernel = cl::Kernel(program, "calcRBTorsionForce");
}
double OpenCLCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numTorsions == 0)
return 0.0;
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg(0, cl.getPaddedNumAtoms());
kernel.setArg(1, numTorsions);
kernel.setArg(2, cl.getForceBuffers().getDeviceBuffer());
kernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(4, cl.getPosq().getDeviceBuffer());
kernel.setArg(5, params->getDeviceBuffer());
kernel.setArg(6, indices->getDeviceBuffer());
}
cl.executeKernel(kernel, numTorsions);
return 0.0;
}
class OpenCLCMAPTorsionForceInfo : public OpenCLForceInfo {
public:
OpenCLCMAPTorsionForceInfo(int requiredBuffers, const CMAPTorsionForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, std::vector& particles) {
int map, a1, a2, a3, a4, b1, b2, b3, b4;
force.getTorsionParameters(index, map, a1, a2, a3, a4, b1, b2, b3, b4);
particles.resize(8);
particles[0] = a1;
particles[1] = a2;
particles[2] = a3;
particles[3] = a4;
particles[4] = b1;
particles[5] = b2;
particles[6] = b3;
particles[7] = b4;
}
bool areGroupsIdentical(int group1, int group2) {
int map1, map2, a1, a2, a3, a4, b1, b2, b3, b4;
force.getTorsionParameters(group1, map1, a1, a2, a3, a4, b1, b2, b3, b4);
force.getTorsionParameters(group2, map2, a1, a2, a3, a4, b1, b2, b3, b4);
return (map1 == map2);
}
private:
const CMAPTorsionForce& force;
};
OpenCLCalcCMAPTorsionForceKernel::~OpenCLCalcCMAPTorsionForceKernel() {
if (coefficients != NULL)
delete coefficients;
if (mapPositions != NULL)
delete mapPositions;
if (torsionMaps != NULL)
delete torsionMaps;
if (torsionIndices != NULL)
delete torsionIndices;
}
void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
int numMaps = force.getNumMaps();
vector coeffVec;
vector mapPositionsVec(numMaps);
vector energy;
vector > c;
int currentPosition = 0;
for (int i = 0; i < numMaps; i++) {
int size;
force.getMapParameters(i, size, energy);
CMAPTorsionForceImpl::calcMapDerivatives(size, energy, c);
mapPositionsVec[i] = mm_int2(currentPosition, size);
currentPosition += 4*size*size;
for (int j = 0; j < size*size; j++) {
coeffVec.push_back(mm_float4((float) c[j][0], (float) c[j][1], (float) c[j][2], (float) c[j][3]));
coeffVec.push_back(mm_float4((float) c[j][4], (float) c[j][5], (float) c[j][6], (float) c[j][7]));
coeffVec.push_back(mm_float4((float) c[j][8], (float) c[j][9], (float) c[j][10], (float) c[j][11]));
coeffVec.push_back(mm_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
}
}
vector forceBufferCounter(system.getNumParticles(), 0);
vector torsionMapsVec(numTorsions);
vector torsionIndicesVec(numTorsions);
for (int i = 0; i < numTorsions; i++) {
mm_int16& ind = torsionIndicesVec[i];
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], ind.s0, ind.s1, ind.s2, ind.s3, ind.s4, ind.s5, ind.s6, ind.s7);
ind.s8 = forceBufferCounter[ind.s0]++;
ind.s9 = forceBufferCounter[ind.s1]++;
ind.s10 = forceBufferCounter[ind.s2]++;
ind.s11 = forceBufferCounter[ind.s3]++;
ind.s12 = forceBufferCounter[ind.s4]++;
ind.s13 = forceBufferCounter[ind.s5]++;
ind.s14 = forceBufferCounter[ind.s6]++;
ind.s15 = forceBufferCounter[ind.s7]++;
}
coefficients = new OpenCLArray(cl, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions = new OpenCLArray(cl, numMaps, "cmapTorsionMapPositions");
torsionMaps = new OpenCLArray(cl, numTorsions, "cmapTorsionMaps");
torsionIndices = new OpenCLArray(cl, numTorsions, "cmapTorsionIndices");
coefficients->upload(coeffVec);
mapPositions->upload(mapPositionsVec);
torsionMaps->upload(torsionMapsVec);
torsionIndices->upload(torsionIndicesVec);
int maxBuffers = 1;
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
cl.addForce(new OpenCLCMAPTorsionForceInfo(maxBuffers, force));
cl::Program program = cl.createProgram(OpenCLKernelSources::cmapTorsionForce);
kernel = cl::Kernel(program, "computeCMAPTorsionForces");
}
double OpenCLCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numTorsions == 0)
return 0.0;
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg(0, cl.getPaddedNumAtoms());
kernel.setArg(1, numTorsions);
kernel.setArg(2, cl.getForceBuffers().getDeviceBuffer());
kernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(4, cl.getPosq().getDeviceBuffer());
kernel.setArg(5, coefficients->getDeviceBuffer());
kernel.setArg(6, mapPositions->getDeviceBuffer());
kernel.setArg(7, torsionIndices->getDeviceBuffer());
kernel.setArg(8, torsionMaps->getDeviceBuffer());
}
cl.executeKernel(kernel, numTorsions);
return 0.0;
}
class OpenCLCustomTorsionForceInfo : public OpenCLForceInfo {
public:
OpenCLCustomTorsionForceInfo(int requiredBuffers, const CustomTorsionForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2, particle3, particle4;
vector parameters;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, parameters);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4;
vector parameters1, parameters2;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, parameters1);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomTorsionForce& force;
};
OpenCLCalcCustomTorsionForceKernel::~OpenCLCalcCustomTorsionForceKernel() {
if (params != NULL)
delete params;
if (indices != NULL)
delete indices;
if (globals != NULL)
delete globals;
}
void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
params = new OpenCLParameterSet(cl, force.getNumPerTorsionParameters(), numTorsions, "customTorsionParams");
indices = new OpenCLArray(cl, numTorsions, "customTorsionIndices");
string extraArguments;
if (force.getNumGlobalParameters() > 0) {
globals = new OpenCLArray(cl, force.getNumGlobalParameters(), "customTorsionGlobals", false, CL_MEM_READ_ONLY);
extraArguments += ", __constant float* globals";
}
vector forceBufferCounter(system.getNumParticles(), 0);
vector > paramVector(numTorsions);
vector indicesVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int particle1, particle2, particle3, particle4;
vector parameters;
force.getTorsionParameters(startIndex+i, particle1, particle2, particle3, particle4, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
indicesVector[i] = mm_int8(particle1, particle2, particle3, particle4, forceBufferCounter[particle1]++,
forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, forceBufferCounter[particle4]++);
}
params->setParameterValues(paramVector);
indices->upload(indicesVector);
int maxBuffers = 1;
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
cl.addForce(new OpenCLCustomTorsionForceInfo(maxBuffers, force));
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("theta").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdAngle = "] = forceExpression;
// Create the kernels.
map variables;
variables["theta"] = "theta";
for (int i = 0; i < force.getNumPerTorsionParameters(); i++) {
const string& name = force.getPerTorsionParameterName(i);
variables[name] = "torsionParams"+params->getParameterSuffix(i);
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+intToString(i)+"]";
variables[name] = value;
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
extraArguments += ", __global "+buffer.getType()+"* "+buffer.getName();
compute< > functions;
compute << OpenCLExpressionUtilities::createExpressions(expressions, variables, functions, "temp", "");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
replacements["EXTRA_ARGUMENTS"] = extraArguments;
replacements["M_PI"] = doubleToString(M_PI);
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customTorsionForce, replacements));
kernel = cl::Kernel(program, "computeCustomTorsionForces");
}
double OpenCLCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numTorsions == 0)
return 0.0;
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg(0, cl.getPaddedNumAtoms());
kernel.setArg(1, numTorsions);
kernel.setArg(2, cl.getForceBuffers().getDeviceBuffer());
kernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(4, cl.getPosq().getDeviceBuffer());
kernel.setArg(5, indices->getDeviceBuffer());
int nextIndex = 6;
if (globals != NULL)
kernel.setArg(nextIndex++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
kernel.setArg(nextIndex++, buffer.getMemory());
}
}
cl.executeKernel(kernel, numTorsions);
return 0.0;
}
class OpenCLNonbondedForceInfo : public OpenCLForceInfo {
public:
OpenCLNonbondedForceInfo(int requiredBuffers, const NonbondedForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, sigma1, sigma2, epsilon1, epsilon2;
force.getParticleParameters(particle1, charge1, sigma1, epsilon1);
force.getParticleParameters(particle2, charge2, sigma2, epsilon2);
return (charge1 == charge2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
int getNumParticleGroups() {
return force.getNumExceptions();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(index, particle1, particle2, chargeProd, sigma, epsilon);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
double chargeProd1, chargeProd2, sigma1, sigma2, epsilon1, epsilon2;
force.getExceptionParameters(group1, particle1, particle2, chargeProd1, sigma1, epsilon1);
force.getExceptionParameters(group2, particle1, particle2, chargeProd2, sigma2, epsilon2);
return (chargeProd1 == chargeProd2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
private:
const NonbondedForce& force;
};
OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
if (exceptionParams != NULL)
delete exceptionParams;
if (exceptionIndices != NULL)
delete exceptionIndices;
if (cosSinSums != NULL)
delete cosSinSums;
if (pmeGrid != NULL)
delete pmeGrid;
if (pmeBsplineModuliX != NULL)
delete pmeBsplineModuliX;
if (pmeBsplineModuliY != NULL)
delete pmeBsplineModuliY;
if (pmeBsplineModuliZ != NULL)
delete pmeBsplineModuliZ;
if (pmeBsplineTheta != NULL)
delete pmeBsplineTheta;
if (pmeBsplineDtheta != NULL)
delete pmeBsplineDtheta;
if (pmeAtomRange != NULL)
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex;
if (erfcTable != NULL)
delete erfcTable;
if (sort != NULL)
delete sort;
if (fft != NULL)
delete fft;
}
void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
// Identify which exceptions are 1-4 interactions.
vector > exclusions;
vector exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
exclusions.push_back(pair(particle1, particle2));
if (chargeProd != 0.0 || epsilon != 0.0)
exceptions.push_back(i);
}
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
sigmaEpsilon = new OpenCLArray(cl, numParticles, "sigmaEpsilon");
OpenCLArray& posq = cl.getPosq();
vector sigmaEpsilonVector(numParticles);
vector > exclusionList(numParticles);
double sumSquaredCharges = 0.0;
bool hasCoulomb = false;
bool hasLJ = false;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
posq[i].w = (float) charge;
sigmaEpsilonVector[i] = mm_float2((float) (0.5*sigma), (float) (2.0*sqrt(epsilon)));
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
hasLJ = true;
}
for (int i = 0; i < (int) exclusions.size(); i++) {
exclusionList[exclusions[i].first].push_back(exclusions[i].second);
exclusionList[exclusions[i].second].push_back(exclusions[i].first);
}
posq.upload();
sigmaEpsilon->upload(sigmaEpsilonVector);
bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic);
map defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
if (useCutoff) {
// Compute the reaction field constants.
double reactionFieldK = pow(force.getCutoffDistance(), -3.0)*(force.getReactionFieldDielectric()-1.0)/(2.0*force.getReactionFieldDielectric()+1.0);
double reactionFieldC = (1.0 / force.getCutoffDistance())*(3.0*force.getReactionFieldDielectric())/(2.0*force.getReactionFieldDielectric()+1.0);
defines["REACTION_FIELD_K"] = doubleToString(reactionFieldK);
defines["REACTION_FIELD_C"] = doubleToString(reactionFieldC);
}
if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0)
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else
dispersionCoefficient = 0.0;
double alpha = 0;
if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
// Compute the Ewald parameters.
int kmaxx, kmaxy, kmaxz;
NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
defines["EWALD_ALPHA"] = doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
ewaldSelfEnergy = (cl.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI) : 0.0);
// Create the reciprocal space kernels.
map replacements;
replacements["NUM_ATOMS"] = intToString(numParticles);
replacements["KMAX_X"] = intToString(kmaxx);
replacements["KMAX_Y"] = intToString(kmaxy);
replacements["KMAX_Z"] = intToString(kmaxz);
replacements["EXP_COEFFICIENT"] = doubleToString(-1.0/(4.0*alpha*alpha));
cl::Program program = cl.createProgram(OpenCLKernelSources::ewald, replacements);
ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums");
ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces");
cosSinSums = new OpenCLArray(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), "cosSinSums");
}
else if (force.getNonbondedMethod() == NonbondedForce::PME) {
// Compute the PME parameters.
int gridSizeX, gridSizeY, gridSizeZ;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
gridSizeX = OpenCLFFT3D::findLegalDimension(gridSizeX);
gridSizeY = OpenCLFFT3D::findLegalDimension(gridSizeY);
gridSizeZ = OpenCLFFT3D::findLegalDimension(gridSizeZ);
defines["EWALD_ALPHA"] = doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
ewaldSelfEnergy = (cl.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI) : 0.0);
pmeDefines["PME_ORDER"] = intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = intToString(numParticles);
pmeDefines["RECIP_EXP_FACTOR"] = doubleToString(M_PI*M_PI/(alpha*alpha));
pmeDefines["GRID_SIZE_X"] = intToString(gridSizeX);
pmeDefines["GRID_SIZE_Y"] = intToString(gridSizeY);
pmeDefines["GRID_SIZE_Z"] = intToString(gridSizeZ);
pmeDefines["EPSILON_FACTOR"] = doubleToString(std::sqrt(ONE_4PI_EPS0));
// Create required data structures.
pmeGrid = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ, "pmeGrid");
pmeBsplineModuliX = new OpenCLArray(cl, gridSizeX, "pmeBsplineModuliX");
pmeBsplineModuliY = new OpenCLArray(cl, gridSizeY, "pmeBsplineModuliY");
pmeBsplineModuliZ = new OpenCLArray(cl, gridSizeZ, "pmeBsplineModuliZ");
pmeBsplineTheta = new OpenCLArray(cl, PmeOrder*numParticles, "pmeBsplineTheta");
pmeBsplineDtheta = new OpenCLArray(cl, PmeOrder*numParticles, "pmeBsplineDtheta");
pmeAtomRange = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = new OpenCLArray(cl, numParticles, "pmeAtomGridIndex");
sort = new OpenCLSort(cl, cl.getNumAtoms(), "int2", "value.y");
fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ);
// Initialize the b-spline moduli.
int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
vector data(PmeOrder);
vector ddata(PmeOrder);
vector bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
}
// Differentiate.
ddata[0] = -data[0];
for (int i = 1; i < PmeOrder; i++)
ddata[i] = data[i-1]-data[i];
double div = 1.0/(PmeOrder-1);
data[PmeOrder-1] = 0.0;
for (int i = 1; i < (PmeOrder-1); i++)
data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
data[0] = div*data[0];
for (int i = 0; i < maxSize; i++)
bsplines_data[i] = 0.0;
for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = data[i-1];
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
vector moduli(ndata);
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
for (int j = 0; j < ndata; j++) {
double arg = (2.0*M_PI*i*j)/ndata;
sc += bsplines_data[j]*cos(arg);
ss += bsplines_data[j]*sin(arg);
}
moduli[i] = (float) (sc*sc+ss*ss);
}
for (int i = 0; i < ndata; i++)
{
if (moduli[i] < 1.0e-7)
moduli[i] = (moduli[i-1]+moduli[i+1])*0.5f;
}
if (dim == 0)
pmeBsplineModuliX->upload(moduli);
else if (dim == 1)
pmeBsplineModuliY->upload(moduli);
else
pmeBsplineModuliZ->upload(moduli);
}
}
else
ewaldSelfEnergy = 0.0;
// Tabulate values of erfc().
if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
const int tableSize = 2048;
defines["ERFC_TABLE_SCALE"] = doubleToString((tableSize-1)/(alpha*force.getCutoffDistance()));
erfcTable = new OpenCLArray(cl, tableSize, "ErfcTable", false, CL_MEM_READ_ONLY);
vector erfcVector(tableSize);
for (int i = 0; i < tableSize; ++i)
erfcVector[i] = (float) erfc(i*(alpha*force.getCutoffDistance())/(tableSize-1));
erfcTable->upload(erfcVector);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo("erfcTable", "float", 1, sizeof(cl_float), erfcTable->getDeviceBuffer()));
}
// Add the interaction to the default nonbonded kernel.
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source);
if (hasLJ)
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(cl_float2), sigmaEpsilon->getDeviceBuffer()));
// Initialize the exceptions.
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cl.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
int maxBuffers = cl.getNonbondedUtilities().getNumForceBuffers();
if (numExceptions > 0) {
exceptionParams = new OpenCLArray(cl, numExceptions, "exceptionParams");
exceptionIndices = new OpenCLArray(cl, numExceptions, "exceptionIndices");
vector exceptionParamsVector(numExceptions);
vector exceptionIndicesVector(numExceptions);
vector forceBufferCounter(system.getNumParticles(), 0);
for (int i = 0; i < numExceptions; i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], particle1, particle2, chargeProd, sigma, epsilon);
exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionIndicesVector[i] = mm_int4(particle1, particle2, forceBufferCounter[particle1]++, forceBufferCounter[particle2]++);
}
exceptionParams->upload(exceptionParamsVector);
exceptionIndices->upload(exceptionIndicesVector);
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
}
cl.addForce(new OpenCLNonbondedForceInfo(maxBuffers, force));
defines.clear();
defines["NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
defines["NUM_EXCEPTIONS"] = intToString(numExceptions);
cl::Program program = cl.createProgram(OpenCLKernelSources::nonbondedExceptions, defines);
exceptionsKernel = cl::Kernel(program, "computeNonbondedExceptions");
}
double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
bool deviceIsCpu = (cl.getDevice().getInfo() == CL_DEVICE_TYPE_CPU);
if (!hasInitializedKernel) {
hasInitializedKernel = true;
if (exceptionIndices != NULL) {
exceptionsKernel.setArg(0, cl.getForceBuffers().getDeviceBuffer());
exceptionsKernel.setArg(1, cl.getEnergyBuffer().getDeviceBuffer());
exceptionsKernel.setArg(2, cl.getPosq().getDeviceBuffer());
exceptionsKernel.setArg(3, exceptionParams->getDeviceBuffer());
exceptionsKernel.setArg(4, exceptionIndices->getDeviceBuffer());
}
if (cosSinSums != NULL) {
ewaldSumsKernel.setArg(0, cl.getEnergyBuffer().getDeviceBuffer());
ewaldSumsKernel.setArg(1, cl.getPosq().getDeviceBuffer());
ewaldSumsKernel.setArg(2, cosSinSums->getDeviceBuffer());
ewaldForcesKernel.setArg(0, cl.getForceBuffers().getDeviceBuffer());
ewaldForcesKernel.setArg(1, cl.getPosq().getDeviceBuffer());
ewaldForcesKernel.setArg(2, cosSinSums->getDeviceBuffer());
}
if (pmeGrid != NULL) {
string file = (deviceIsCpu ? OpenCLKernelSources::pme_cpu : OpenCLKernelSources::pme);
cl::Program program = cl.createProgram(file, pmeDefines);
pmeUpdateBsplinesKernel = cl::Kernel(program, "updateBsplines");
pmeAtomRangeKernel = cl::Kernel(program, "findAtomRangeForGrid");
pmeSpreadChargeKernel = cl::Kernel(program, "gridSpreadCharge");
pmeConvolutionKernel = cl::Kernel(program, "reciprocalConvolution");
pmeInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
pmeUpdateBsplinesKernel.setArg(0, cl.getPosq().getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg(1, pmeBsplineTheta->getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg(2, pmeBsplineDtheta->getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg(3, 2*OpenCLContext::ThreadBlockSize*PmeOrder*sizeof(mm_float4), NULL);
pmeUpdateBsplinesKernel.setArg(4, pmeAtomGridIndex->getDeviceBuffer());
pmeAtomRangeKernel.setArg(0, pmeAtomGridIndex->getDeviceBuffer());
pmeAtomRangeKernel.setArg(1, pmeAtomRange->getDeviceBuffer());
pmeAtomRangeKernel.setArg(2, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg(0, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg(1, pmeAtomGridIndex->getDeviceBuffer());
pmeSpreadChargeKernel.setArg(2, pmeAtomRange->getDeviceBuffer());
pmeSpreadChargeKernel.setArg(3, pmeGrid->getDeviceBuffer());
pmeSpreadChargeKernel.setArg(4, pmeBsplineTheta->getDeviceBuffer());
pmeConvolutionKernel.setArg(0, pmeGrid->getDeviceBuffer());
pmeConvolutionKernel.setArg(1, cl.getEnergyBuffer().getDeviceBuffer());
pmeConvolutionKernel.setArg(2, pmeBsplineModuliX->getDeviceBuffer());
pmeConvolutionKernel.setArg(3, pmeBsplineModuliY->getDeviceBuffer());
pmeConvolutionKernel.setArg(4, pmeBsplineModuliZ->getDeviceBuffer());
pmeInterpolateForceKernel.setArg(0, cl.getPosq().getDeviceBuffer());
pmeInterpolateForceKernel.setArg(1, cl.getForceBuffers().getDeviceBuffer());
pmeInterpolateForceKernel.setArg(2, pmeBsplineTheta->getDeviceBuffer());
pmeInterpolateForceKernel.setArg(3, pmeBsplineDtheta->getDeviceBuffer());
pmeInterpolateForceKernel.setArg(4, pmeGrid->getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) {
pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeFinishSpreadChargeKernel.setArg(0, pmeGrid->getDeviceBuffer());
}
}
}
if (exceptionIndices != NULL)
cl.executeKernel(exceptionsKernel, exceptionIndices->getSize());
if (cosSinSums != NULL && cl.getContextIndex() == 0) {
mm_float4 boxSize = cl.getPeriodicBoxSize();
mm_float4 recipBoxSize = mm_float4((float) (2*M_PI/boxSize.x), (float) (2*M_PI/boxSize.y), (float) (2*M_PI/boxSize.z), 0);
float recipCoefficient = (float) (ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z));
ewaldSumsKernel.setArg(3, recipBoxSize);
ewaldSumsKernel.setArg(4, recipCoefficient);
cl.executeKernel(ewaldSumsKernel, cosSinSums->getSize());
ewaldForcesKernel.setArg(3, recipBoxSize);
ewaldForcesKernel.setArg(4, recipCoefficient);
cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms());
}
if (pmeGrid != NULL && cl.getContextIndex() == 0) {
mm_float4 boxSize = cl.getPeriodicBoxSize();
mm_float4 invBoxSize = cl.getInvPeriodicBoxSize();
pmeUpdateBsplinesKernel.setArg(5, boxSize);
pmeUpdateBsplinesKernel.setArg(6, invBoxSize);
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu) {
pmeSpreadChargeKernel.setArg(5, boxSize);
pmeSpreadChargeKernel.setArg(6, invBoxSize);
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo(), 1);
}
else {
sort->sort(*pmeAtomGridIndex);
pmeAtomRangeKernel.setArg(3, boxSize);
pmeAtomRangeKernel.setArg(4, invBoxSize);
cl.executeKernel(pmeAtomRangeKernel, cl.getNumAtoms());
if (cl.getSupports64BitGlobalAtomics()) {
cl.clearBuffer(pmeGrid->getDeviceBuffer(), pmeGrid->getSize()*2);
pmeSpreadChargeKernel.setArg(5, boxSize);
pmeSpreadChargeKernel.setArg(6, invBoxSize);
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms(), PmeOrder*PmeOrder*PmeOrder);
cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize());
}
else
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
}
fft->execFFT(*pmeGrid, true);
pmeConvolutionKernel.setArg(5, invBoxSize);
pmeConvolutionKernel.setArg(6, (float) (1.0/(M_PI*boxSize.x*boxSize.y*boxSize.z)));
cl.executeKernel(pmeConvolutionKernel, cl.getNumAtoms());
fft->execFFT(*pmeGrid, false);
pmeInterpolateForceKernel.setArg(5, boxSize);
pmeInterpolateForceKernel.setArg(6, invBoxSize);
cl.executeKernel(pmeInterpolateForceKernel, cl.getNumAtoms());
}
double energy = ewaldSelfEnergy;
if (dispersionCoefficient != 0.0) {
mm_float4 boxSize = cl.getPeriodicBoxSize();
energy += dispersionCoefficient/(boxSize.x*boxSize.y*boxSize.z);
}
return energy;
}
class OpenCLCustomNonbondedForceInfo : public OpenCLForceInfo {
public:
OpenCLCustomNonbondedForceInfo(int requiredBuffers, const CustomNonbondedForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
vector params1;
vector params2;
force.getParticleParameters(particle1, params1);
force.getParticleParameters(particle2, params2);
for (int i = 0; i < (int) params1.size(); i++)
if (params1[i] != params2[i])
return false;
return true;
}
int getNumParticleGroups() {
return force.getNumExclusions();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2;
force.getExclusionParticles(index, particle1, particle2);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
return true;
}
private:
const CustomNonbondedForce& force;
};
OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (tabulatedFunctionParams != NULL)
delete tabulatedFunctionParams;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i];
}
void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
string prefix = "custom"+intToString(forceIndex)+"_";
// Record parameters and exclusions.
int numParticles = force.getNumParticles();
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0)
globals = new OpenCLArray(cl, force.getNumGlobalParameters(), "customNonbondedGlobals", false, CL_MEM_READ_ONLY);
vector > paramVector(numParticles);
vector > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
vector parameters;
force.getParticleParameters(i, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
exclusionList[i].push_back(i);
}
for (int i = 0; i < force.getNumExclusions(); i++) {
int particle1, particle2;
force.getExclusionParticles(i, particle1, particle2);
exclusionList[particle1].push_back(particle2);
exclusionList[particle2].push_back(particle1);
}
params->setParameterValues(paramVector);
// Record the tabulated functions.
OpenCLExpressionUtilities::FunctionPlaceholder fp;
map functions;
vector > functionDefinitions;
vector tabulatedFunctionParamsVec(force.getNumFunctions());
for (int i = 0; i < force.getNumFunctions(); i++) {
string name;
vector values;
double min, max;
force.getFunctionParameters(i, name, values, min, max);
string arrayName = prefix+"table"+intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = &fp;
tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max);
tabulatedFunctions.push_back(new OpenCLArray(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer()));
}
if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = new OpenCLArray(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(cl_float4), tabulatedFunctionParams->getDeviceBuffer()));
}
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
map