/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "OpenCLKernels.h"
#include "OpenCLForceInfo.h"
#include "openmm/LangevinIntegrator.h"
#include "openmm/Context.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "OpenCLExpressionUtilities.h"
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLNonbondedUtilities.h"
#include "OpenCLKernelSources.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include
using namespace OpenMM;
using namespace std;
static string doubleToString(double value) {
stringstream s;
s.precision(8);
s << scientific << value << "f";
return s.str();
}
static string intToString(int value) {
stringstream s;
s << value;
return s.str();
}
void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
}
void OpenCLCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context) {
if (cl.getNonbondedUtilities().getUseCutoff() && cl.getComputeForceCount()%100 == 0)
cl.reorderAtoms();
cl.setComputeForceCount(cl.getComputeForceCount()+1);
cl.clearBuffer(cl.getForceBuffers());
cl.getNonbondedUtilities().prepareInteractions();
}
void OpenCLCalcForcesAndEnergyKernel::finishForceComputation(ContextImpl& context) {
cl.getNonbondedUtilities().computeInteractions();
cl.reduceBuffer(cl.getForceBuffers(), cl.getNumForceBuffers());
}
void OpenCLCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& context) {
if (cl.getNonbondedUtilities().getUseCutoff() && cl.getComputeForceCount()%100 == 0)
cl.reorderAtoms();
cl.setComputeForceCount(cl.getComputeForceCount()+1);
cl.clearBuffer(cl.getEnergyBuffer());
cl.getNonbondedUtilities().prepareInteractions();
}
double OpenCLCalcForcesAndEnergyKernel::finishEnergyComputation(ContextImpl& context) {
cl.getNonbondedUtilities().computeInteractions();
OpenCLArray& energy = cl.getEnergyBuffer();
energy.download();
double sum = 0.0f;
for (int i = 0; i < energy.getSize(); i++)
sum += energy[i];
return sum;
}
void OpenCLUpdateStateDataKernel::initialize(const System& system) {
}
double OpenCLUpdateStateDataKernel::getTime(const ContextImpl& context) const {
return cl.getTime();
}
void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
cl.setTime(time);
}
void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, std::vector& positions) {
OpenCLArray& posq = cl.getPosq();
posq.download();
OpenCLArray& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
mm_float4 periodicBoxSize = cl.getNonbondedUtilities().getPeriodicBoxSize();
for (int i = 0; i < numParticles; ++i) {
mm_float4 pos = posq[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x-offset.x*periodicBoxSize.x, pos.y-offset.y*periodicBoxSize.y, pos.z-offset.z*periodicBoxSize.z);
}
}
void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const std::vector& positions) {
OpenCLArray& posq = cl.getPosq();
OpenCLArray& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
for (int i = 0; i < numParticles; ++i) {
mm_float4& pos = posq[i];
const Vec3& p = positions[order[i]];
pos.x = (cl_float) p[0];
pos.y = (cl_float) p[1];
pos.z = (cl_float) p[2];
}
posq.upload();
for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++)
cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0);
}
void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, std::vector& velocities) {
OpenCLArray& velm = cl.getVelm();
velm.download();
OpenCLArray& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
velocities.resize(numParticles);
for (int i = 0; i < numParticles; ++i) {
mm_float4 vel = velm[i];
velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
}
}
void OpenCLUpdateStateDataKernel::setVelocities(ContextImpl& context, const std::vector& velocities) {
OpenCLArray& velm = cl.getVelm();
OpenCLArray& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
for (int i = 0; i < numParticles; ++i) {
mm_float4& vel = velm[i];
const Vec3& p = velocities[order[i]];
vel.x = (cl_float) p[0];
vel.y = (cl_float) p[1];
vel.z = (cl_float) p[2];
}
velm.upload();
}
void OpenCLUpdateStateDataKernel::getForces(ContextImpl& context, std::vector& forces) {
OpenCLArray& force = cl.getForce();
force.download();
OpenCLArray& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
forces.resize(numParticles);
for (int i = 0; i < numParticles; ++i) {
mm_float4 f = force[i];
forces[order[i]] = Vec3(f.x, f.y, f.z);
}
}
class OpenCLBondForceInfo : public OpenCLForceInfo {
public:
OpenCLBondForceInfo(int requiredBuffers, const HarmonicBondForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2;
double length, k;
force.getBondParameters(index, particle1, particle2, length, k);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
double length1, length2, k1, k2;
force.getBondParameters(group1, particle1, particle2, length1, k1);
force.getBondParameters(group2, particle1, particle2, length2, k2);
return (length1 == length2 && k1 == k2);
}
private:
const HarmonicBondForce& force;
};
OpenCLCalcHarmonicBondForceKernel::~OpenCLCalcHarmonicBondForceKernel() {
if (params != NULL)
delete params;
if (indices != NULL)
delete indices;
}
void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
numBonds = force.getNumBonds();
if (numBonds == 0)
return;
params = new OpenCLArray(cl, numBonds, "bondParams");
indices = new OpenCLArray(cl, numBonds, "bondIndices");
vector forceBufferCounter(system.getNumParticles(), 0);
vector paramVector(numBonds);
vector indicesVector(numBonds);
for (int i = 0; i < numBonds; i++) {
int particle1, particle2;
double length, k;
force.getBondParameters(i, particle1, particle2, length, k);
paramVector[i] = mm_float2((cl_float) length, (cl_float) k);
indicesVector[i] = mm_int4(particle1, particle2, forceBufferCounter[particle1]++, forceBufferCounter[particle2]++);
}
params->upload(paramVector);
indices->upload(indicesVector);
int maxBuffers = 1;
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
cl.addForce(new OpenCLBondForceInfo(maxBuffers, force));
cl::Program program = cl.createProgram(OpenCLKernelSources::harmonicBondForce);
kernel = cl::Kernel(program, "calcHarmonicBondForce");
}
void OpenCLCalcHarmonicBondForceKernel::executeForces(ContextImpl& context) {
if (numBonds == 0)
return;
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg(0, cl.getPaddedNumAtoms());
kernel.setArg(1, numBonds);
kernel.setArg(2, cl.getForceBuffers().getDeviceBuffer());
kernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(4, cl.getPosq().getDeviceBuffer());
kernel.setArg(5, params->getDeviceBuffer());
kernel.setArg(6, indices->getDeviceBuffer());
}
cl.executeKernel(kernel, numBonds);
}
double OpenCLCalcHarmonicBondForceKernel::executeEnergy(ContextImpl& context) {
executeForces(context);
return 0.0;
}
class OpenCLCustomBondForceInfo : public OpenCLForceInfo {
public:
OpenCLCustomBondForceInfo(int requiredBuffers, const CustomBondForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2;
vector parameters;
force.getBondParameters(index, particle1, particle2, parameters);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
vector parameters1, parameters2;
force.getBondParameters(group1, particle1, particle2, parameters1);
force.getBondParameters(group2, particle1, particle2, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomBondForce& force;
};
OpenCLCalcCustomBondForceKernel::~OpenCLCalcCustomBondForceKernel() {
if (params != NULL)
delete params;
if (indices != NULL)
delete indices;
if (globals != NULL)
delete globals;
}
void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
numBonds = force.getNumBonds();
if (numBonds == 0)
return;
params = new OpenCLParameterSet(cl, force.getNumPerBondParameters(), numBonds, "customBondParams");
indices = new OpenCLArray(cl, numBonds, "customBondIndices");
string extraArguments;
if (force.getNumGlobalParameters() > 0) {
globals = new OpenCLArray(cl, force.getNumGlobalParameters(), "customBondGlobals", false, CL_MEM_READ_ONLY);
extraArguments += ", __constant float* globals";
}
vector forceBufferCounter(system.getNumParticles(), 0);
vector > paramVector(numBonds);
vector indicesVector(numBonds);
for (int i = 0; i < numBonds; i++) {
int particle1, particle2;
vector parameters;
force.getBondParameters(i, particle1, particle2, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
indicesVector[i] = mm_int4(particle1, particle2, forceBufferCounter[particle1]++, forceBufferCounter[particle2]++);
}
params->setParameterValues(paramVector);
indices->upload(indicesVector);
int maxBuffers = 1;
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
cl.addForce(new OpenCLCustomBondForceInfo(maxBuffers, force));
// Record information for the expressions.
vector paramNames;
for (int i = 0; i < force.getNumPerBondParameters(); i++)
paramNames.push_back(force.getPerBondParameterName(i));
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdR = "] = forceExpression;
// Create the kernels.
map variables;
variables["r"] = "r";
for (int i = 0; i < force.getNumPerBondParameters(); i++) {
const string& name = force.getPerBondParameterName(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+intToString(i)+"]";
variables[name] = value;
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
extraArguments += ", __global "+buffer.getType()+"* "+buffer.getName();
compute< > functions;
compute << OpenCLExpressionUtilities::createExpressions(expressions, variables, functions, "temp", "");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
replacements["EXTRA_ARGUMENTS"] = extraArguments;
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customBondForce, replacements));
kernel = cl::Kernel(program, "computeCustomBondForces");
}
void OpenCLCalcCustomBondForceKernel::executeForces(ContextImpl& context) {
if (numBonds == 0)
return;
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg(0, cl.getPaddedNumAtoms());
kernel.setArg(1, numBonds);
kernel.setArg(2, cl.getForceBuffers().getDeviceBuffer());
kernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(4, cl.getPosq().getDeviceBuffer());
kernel.setArg(5, indices->getDeviceBuffer());
int nextIndex = 6;
if (globals != NULL)
kernel.setArg(nextIndex++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
kernel.setArg(nextIndex++, buffer.getBuffer());
}
}
cl.executeKernel(kernel, numBonds);
}
double OpenCLCalcCustomBondForceKernel::executeEnergy(ContextImpl& context) {
executeForces(context);
return 0.0;
}
class OpenCLAngleForceInfo : public OpenCLForceInfo {
public:
OpenCLAngleForceInfo(int requiredBuffers, const HarmonicAngleForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2, particle3;
double angle, k;
force.getAngleParameters(index, particle1, particle2, particle3, angle, k);
particles.resize(3);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3;
double angle1, angle2, k1, k2;
force.getAngleParameters(group1, particle1, particle2, particle3, angle1, k1);
force.getAngleParameters(group2, particle1, particle2, particle3, angle2, k2);
return (angle1 == angle2 && k1 == k2);
}
private:
const HarmonicAngleForce& force;
};
OpenCLCalcHarmonicAngleForceKernel::~OpenCLCalcHarmonicAngleForceKernel() {
if (params != NULL)
delete params;
if (indices != NULL)
delete indices;
}
void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
numAngles = force.getNumAngles();
if (numAngles == 0)
return;
params = new OpenCLArray(cl, numAngles, "angleParams");
indices = new OpenCLArray(cl, numAngles, "angleIndices");
vector forceBufferCounter(system.getNumParticles(), 0);
vector paramVector(numAngles);
vector indicesVector(numAngles);
for (int i = 0; i < numAngles; i++) {
int particle1, particle2, particle3;
double angle, k;
force.getAngleParameters(i, particle1, particle2, particle3, angle, k);
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
indicesVector[i] = mm_int8(particle1, particle2, particle3,
forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, 0, 0);
}
params->upload(paramVector);
indices->upload(indicesVector);
int maxBuffers = 1;
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
cl.addForce(new OpenCLAngleForceInfo(maxBuffers, force));
cl::Program program = cl.createProgram(OpenCLKernelSources::harmonicAngleForce);
kernel = cl::Kernel(program, "calcHarmonicAngleForce");
}
void OpenCLCalcHarmonicAngleForceKernel::executeForces(ContextImpl& context) {
if (numAngles == 0)
return;
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg(0, cl.getPaddedNumAtoms());
kernel.setArg(1, numAngles);
kernel.setArg(2, cl.getForceBuffers().getDeviceBuffer());
kernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(4, cl.getPosq().getDeviceBuffer());
kernel.setArg(5, params->getDeviceBuffer());
kernel.setArg(6, indices->getDeviceBuffer());
}
cl.executeKernel(kernel, numAngles);
}
double OpenCLCalcHarmonicAngleForceKernel::executeEnergy(ContextImpl& context) {
executeForces(context);
return 0.0;
}
class OpenCLPeriodicTorsionForceInfo : public OpenCLForceInfo {
public:
OpenCLPeriodicTorsionForceInfo(int requiredBuffers, const PeriodicTorsionForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2, particle3, particle4, periodicity;
double phase, k;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, periodicity, phase, k);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4, periodicity1, periodicity2;
double phase1, phase2, k1, k2;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, periodicity1, phase1, k1);
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, periodicity2, phase2, k2);
return (periodicity1 == periodicity2 && phase1 == phase2 && k1 == k2);
}
private:
const PeriodicTorsionForce& force;
};
OpenCLCalcPeriodicTorsionForceKernel::~OpenCLCalcPeriodicTorsionForceKernel() {
if (params != NULL)
delete params;
if (indices != NULL)
delete indices;
}
void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
numTorsions = force.getNumTorsions();
if (numTorsions == 0)
return;
params = new OpenCLArray(cl, numTorsions, "periodicTorsionParams");
indices = new OpenCLArray(cl, numTorsions, "periodicTorsionIndices");
vector forceBufferCounter(system.getNumParticles(), 0);
vector paramVector(numTorsions);
vector indicesVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int particle1, particle2, particle3, particle4, periodicity;
double phase, k;
force.getTorsionParameters(i, particle1, particle2, particle3, particle4, periodicity, phase, k);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
indicesVector[i] = mm_int8(particle1, particle2, particle3, particle4,
forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, forceBufferCounter[particle4]++);
}
params->upload(paramVector);
indices->upload(indicesVector);
int maxBuffers = 1;
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
cl.addForce(new OpenCLPeriodicTorsionForceInfo(maxBuffers, force));
cl::Program program = cl.createProgram(OpenCLKernelSources::periodicTorsionForce);
kernel = cl::Kernel(program, "calcPeriodicTorsionForce");
}
void OpenCLCalcPeriodicTorsionForceKernel::executeForces(ContextImpl& context) {
if (numTorsions == 0)
return;
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg(0, cl.getPaddedNumAtoms());
kernel.setArg(1, numTorsions);
kernel.setArg(2, cl.getForceBuffers().getDeviceBuffer());
kernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(4, cl.getPosq().getDeviceBuffer());
kernel.setArg(5, params->getDeviceBuffer());
kernel.setArg(6, indices->getDeviceBuffer());
}
cl.executeKernel(kernel, numTorsions);
}
double OpenCLCalcPeriodicTorsionForceKernel::executeEnergy(ContextImpl& context) {
executeForces(context);
return 0.0;
}
class OpenCLRBTorsionForceInfo : public OpenCLForceInfo {
public:
OpenCLRBTorsionForceInfo(int requiredBuffers, const RBTorsionForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2, particle3, particle4;
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4;
double c0a, c0b, c1a, c1b, c2a, c2b, c3a, c3b, c4a, c4b, c5a, c5b;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, c0a, c1a, c2a, c3a, c4a, c5a);
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, c0b, c1b, c2b, c3b, c4b, c5b);
return (c0a == c0b && c1a == c1b && c2a == c2b && c3a == c3b && c4a == c4b && c5a == c5b);
}
private:
const RBTorsionForce& force;
};
OpenCLCalcRBTorsionForceKernel::~OpenCLCalcRBTorsionForceKernel() {
if (params != NULL)
delete params;
if (indices != NULL)
delete indices;
}
void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
numTorsions = force.getNumTorsions();
if (numTorsions == 0)
return;
params = new OpenCLArray(cl, numTorsions, "rbTorsionParams");
indices = new OpenCLArray(cl, numTorsions, "rbTorsionIndices");
vector forceBufferCounter(system.getNumParticles(), 0);
vector paramVector(numTorsions);
vector indicesVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int particle1, particle2, particle3, particle4;
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(i, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
indicesVector[i] = mm_int8(particle1, particle2, particle3, particle4,
forceBufferCounter[particle1]++, forceBufferCounter[particle2]++, forceBufferCounter[particle3]++, forceBufferCounter[particle4]++);
}
params->upload(paramVector);
indices->upload(indicesVector);
int maxBuffers = 1;
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
cl.addForce(new OpenCLRBTorsionForceInfo(maxBuffers, force));
cl::Program program = cl.createProgram(OpenCLKernelSources::rbTorsionForce);
kernel = cl::Kernel(program, "calcRBTorsionForce");
}
void OpenCLCalcRBTorsionForceKernel::executeForces(ContextImpl& context) {
if (numTorsions == 0)
return;
if (!hasInitializedKernel) {
hasInitializedKernel = true;
kernel.setArg(0, cl.getPaddedNumAtoms());
kernel.setArg(1, numTorsions);
kernel.setArg(2, cl.getForceBuffers().getDeviceBuffer());
kernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
kernel.setArg(4, cl.getPosq().getDeviceBuffer());
kernel.setArg(5, params->getDeviceBuffer());
kernel.setArg(6, indices->getDeviceBuffer());
}
cl.executeKernel(kernel, numTorsions);
}
double OpenCLCalcRBTorsionForceKernel::executeEnergy(ContextImpl& context) {
executeForces(context);
return 0.0;
}
class OpenCLNonbondedForceInfo : public OpenCLForceInfo {
public:
OpenCLNonbondedForceInfo(int requiredBuffers, const NonbondedForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, sigma1, sigma2, epsilon1, epsilon2;
force.getParticleParameters(particle1, charge1, sigma1, epsilon1);
force.getParticleParameters(particle2, charge2, sigma2, epsilon2);
return (charge1 == charge2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
int getNumParticleGroups() {
return force.getNumExceptions();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(index, particle1, particle2, chargeProd, sigma, epsilon);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
double chargeProd1, chargeProd2, sigma1, sigma2, epsilon1, epsilon2;
force.getExceptionParameters(group1, particle1, particle2, chargeProd1, sigma1, epsilon1);
force.getExceptionParameters(group2, particle1, particle2, chargeProd2, sigma2, epsilon2);
return (chargeProd1 == chargeProd2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
private:
const NonbondedForce& force;
};
OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
if (exceptionParams != NULL)
delete exceptionParams;
if (exceptionIndices != NULL)
delete exceptionIndices;
}
void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
// Identify which exceptions are 1-4 interactions.
vector > exclusions;
vector exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
exclusions.push_back(pair(particle1, particle2));
if (chargeProd != 0.0 || epsilon != 0.0)
exceptions.push_back(i);
}
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
sigmaEpsilon = new OpenCLArray(cl, numParticles, "sigmaEpsilon");
OpenCLArray& posq = cl.getPosq();
vector sigmaEpsilonVector(numParticles);
vector > exclusionList(numParticles);
double sumSquaredCharges = 0.0;
bool hasCoulomb = false;
bool hasLJ = false;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
posq[i].w = (float) charge;
sigmaEpsilonVector[i] = mm_float2((float) (0.5*sigma), (float) (2.0*sqrt(epsilon)));
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
hasLJ = true;
}
for (int i = 0; i < (int) exclusions.size(); i++) {
exclusionList[exclusions[i].first].push_back(exclusions[i].second);
exclusionList[exclusions[i].second].push_back(exclusions[i].first);
}
posq.upload();
sigmaEpsilon->upload(sigmaEpsilonVector);
bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic);
Vec3 boxVectors[3];
system.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
map defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
if (useCutoff) {
// Compute the reaction field constants.
double reactionFieldK = pow(force.getCutoffDistance(), -3.0)*(force.getReactionFieldDielectric()-1.0)/(2.0*force.getReactionFieldDielectric()+1.0);
double reactionFieldC = (1.0 / force.getCutoffDistance())*(3.0*force.getReactionFieldDielectric())/(2.0*force.getReactionFieldDielectric()+1.0);
defines["REACTION_FIELD_K"] = doubleToString(reactionFieldK);
defines["REACTION_FIELD_C"] = doubleToString(reactionFieldC);
}
if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
// Compute the Ewald parameters.
double alpha;
int kmaxx, kmaxy, kmaxz;
NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
defines["EWALD_ALPHA"] = doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
double selfEnergyScale = ONE_4PI_EPS0*alpha/std::sqrt(M_PI);
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI);
// Create the reciprocal space kernels.
map replacements;
replacements["NUM_ATOMS"] = intToString(numParticles);
replacements["KMAX_X"] = intToString(kmaxx);
replacements["KMAX_Y"] = intToString(kmaxy);
replacements["KMAX_Z"] = intToString(kmaxz);
replacements["RECIPROCAL_BOX_SIZE_X"] = doubleToString(2.0*M_PI/boxVectors[0][0]);
replacements["RECIPROCAL_BOX_SIZE_Y"] = doubleToString(2.0*M_PI/boxVectors[1][1]);
replacements["RECIPROCAL_BOX_SIZE_Z"] = doubleToString(2.0*M_PI/boxVectors[2][2]);
replacements["RECIPROCAL_COEFFICIENT"] = doubleToString(ONE_4PI_EPS0*4*M_PI/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]));
replacements["EXP_COEFFICIENT"] = doubleToString(-1.0/(4.0*alpha*alpha));
cl::Program program = cl.createProgram(OpenCLKernelSources::ewald, replacements);
ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums");
ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces");
cosSinSums = new OpenCLArray(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), "cosSinSums");
}
else if (force.getNonbondedMethod() == NonbondedForce::PME)
throw OpenMMException("OpenMMPlatform does not yet support PME");
else
ewaldSelfEnergy = 0.0;
// Add the interaction to the default nonbonded kernel.
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source);
if (hasLJ)
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float2", sizeof(cl_float2), sigmaEpsilon->getDeviceBuffer()));
cutoffSquared = force.getCutoffDistance()*force.getCutoffDistance();
// Initialize the exceptions.
int numExceptions = exceptions.size();
int maxBuffers = cl.getNonbondedUtilities().getNumForceBuffers();
if (numExceptions > 0) {
exceptionParams = new OpenCLArray(cl, numExceptions, "exceptionParams");
exceptionIndices = new OpenCLArray(cl, numExceptions, "exceptionIndices");
vector exceptionParamsVector(numExceptions);
vector exceptionIndicesVector(numExceptions);
vector forceBufferCounter(system.getNumParticles(), 0);
for (int i = 0; i < numExceptions; i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[i], particle1, particle2, chargeProd, sigma, epsilon);
exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionIndicesVector[i] = mm_int4(particle1, particle2, forceBufferCounter[particle1]++, forceBufferCounter[particle2]++);
}
exceptionParams->upload(exceptionParamsVector);
exceptionIndices->upload(exceptionIndicesVector);
for (int i = 0; i < (int) forceBufferCounter.size(); i++)
maxBuffers = max(maxBuffers, forceBufferCounter[i]);
}
cl.addForce(new OpenCLNonbondedForceInfo(maxBuffers, force));
if (useCutoff) {
defines["USE_CUTOFF"] = "1";
}
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
cl::Program program = cl.createProgram(OpenCLKernelSources::nonbondedExceptions, defines);
exceptionsKernel = cl::Kernel(program, "computeNonbondedExceptions");
}
void OpenCLCalcNonbondedForceKernel::executeForces(ContextImpl& context) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
if (exceptionIndices != NULL) {
int numExceptions = exceptionIndices->getSize();
exceptionsKernel.setArg(0, cl.getPaddedNumAtoms());
exceptionsKernel.setArg(1, numExceptions);
exceptionsKernel.setArg(2, (cl_float) cutoffSquared);
exceptionsKernel.setArg(3, cl.getNonbondedUtilities().getPeriodicBoxSize());
exceptionsKernel.setArg(4, cl.getForceBuffers().getDeviceBuffer());
exceptionsKernel.setArg(5, cl.getEnergyBuffer().getDeviceBuffer());
exceptionsKernel.setArg(6, cl.getPosq().getDeviceBuffer());
exceptionsKernel.setArg(7, exceptionParams->getDeviceBuffer());
exceptionsKernel.setArg(8, exceptionIndices->getDeviceBuffer());
}
if (cosSinSums != NULL) {
ewaldSumsKernel.setArg(0, cl.getEnergyBuffer().getDeviceBuffer());
ewaldSumsKernel.setArg(1, cl.getPosq().getDeviceBuffer());
ewaldSumsKernel.setArg(2, cosSinSums->getDeviceBuffer());
ewaldForcesKernel.setArg(0, cl.getForceBuffers().getDeviceBuffer());
ewaldForcesKernel.setArg(1, cl.getPosq().getDeviceBuffer());
ewaldForcesKernel.setArg(2, cosSinSums->getDeviceBuffer());
}
}
if (exceptionIndices != NULL)
cl.executeKernel(exceptionsKernel, exceptionIndices->getSize());
if (cosSinSums != NULL) {
cl.executeKernel(ewaldSumsKernel, cosSinSums->getSize());
cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms());
}
}
double OpenCLCalcNonbondedForceKernel::executeEnergy(ContextImpl& context) {
executeForces(context);
return ewaldSelfEnergy;
}
class OpenCLCustomNonbondedForceInfo : public OpenCLForceInfo {
public:
OpenCLCustomNonbondedForceInfo(int requiredBuffers, const CustomNonbondedForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
vector params1;
vector params2;
force.getParticleParameters(particle1, params1);
force.getParticleParameters(particle2, params2);
for (int i = 0; i < (int) params1.size(); i++)
if (params1[i] != params2[i])
return false;
return true;
}
int getNumParticleGroups() {
return force.getNumExclusions();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2;
force.getExclusionParticles(index, particle1, particle2);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
return true;
}
private:
const CustomNonbondedForce& force;
};
OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (tabulatedFunctionParams != NULL)
delete tabulatedFunctionParams;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i];
}
void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
string prefix = "custom"+intToString(forceIndex)+"_";
// Record parameters and exclusions.
int numParticles = force.getNumParticles();
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0)
globals = new OpenCLArray(cl, force.getNumGlobalParameters(), "customNonbondedGlobals", false, CL_MEM_READ_ONLY);
vector > paramVector(numParticles);
vector > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
vector parameters;
force.getParticleParameters(i, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
exclusionList[i].push_back(i);
}
for (int i = 0; i < force.getNumExclusions(); i++) {
int particle1, particle2;
force.getExclusionParticles(i, particle1, particle2);
exclusionList[particle1].push_back(particle2);
exclusionList[particle2].push_back(particle1);
}
params->setParameterValues(paramVector);
// Record the tabulated functions.
OpenCLExpressionUtilities::FunctionPlaceholder fp;
map functions;
vector > functionDefinitions;
vector tabulatedFunctionParamsVec(force.getNumFunctions());
for (int i = 0; i < force.getNumFunctions(); i++) {
string name;
vector values;
double min, max;
bool interpolating;
force.getFunctionParameters(i, name, values, min, max, interpolating);
string arrayName = prefix+"table"+intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = &fp;
tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), 0.0f);
vector f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, interpolating);
tabulatedFunctions.push_back(new OpenCLArray(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float4", sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer()));
}
if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = new OpenCLArray(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float4", sizeof(cl_float4), tabulatedFunctionParams->getDeviceBuffer()));
}
// Record information for the expressions.
vector paramNames;
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
paramNames.push_back(force.getPerParticleParameterName(i));
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
map forceExpressions;
forceExpressions["tempEnergy += "] = energyExpression;
forceExpressions["tempForce -= "] = forceExpression;
// Create the kernels.
map variables;
variables["r"] = "r";
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables[name+"1"] = prefix+"params"+params->getParameterSuffix(i, "1");
variables[name+"2"] = prefix+"params"+params->getParameterSuffix(i, "2");
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+intToString(i)+"]";
variables[name] = prefix+value;
}
stringstream compute;
compute << OpenCLExpressionUtilities::createExpressions(forceExpressions, variables, functionDefinitions, prefix+"temp", prefix+"functionParams");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
string source = cl.replaceStrings(OpenCLKernelSources::customNonbonded, replacements);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source);
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"params"+intToString(i+1), buffer.getType(), buffer.getSize(), buffer.getBuffer()));
}
if (globals != NULL) {
globals->upload(globalParamValues);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", sizeof(cl_float), globals->getDeviceBuffer()));
}
cl.addForce(new OpenCLCustomNonbondedForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
}
void OpenCLCalcCustomNonbondedForceKernel::executeForces(ContextImpl& context) {
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
}
double OpenCLCalcCustomNonbondedForceKernel::executeEnergy(ContextImpl& context) {
executeForces(context);
return 0.0;
}
class OpenCLGBSAOBCForceInfo : public OpenCLForceInfo {
public:
OpenCLGBSAOBCForceInfo(int requiredBuffers, const GBSAOBCForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, radius1, radius2, scale1, scale2;
force.getParticleParameters(particle1, charge1, radius1, scale1);
force.getParticleParameters(particle2, charge2, radius2, scale2);
return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
}
private:
const GBSAOBCForce& force;
};
OpenCLCalcGBSAOBCForceKernel::~OpenCLCalcGBSAOBCForceKernel() {
if (params != NULL)
delete params;
if (bornSum != NULL)
delete bornSum;
if (bornRadii != NULL)
delete bornRadii;
if (bornForce != NULL)
delete bornForce;
if (obcChain != NULL)
delete obcChain;
}
void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
params = new OpenCLArray(cl, cl.getPaddedNumAtoms(), "gbsaObcParams");
bornRadii = new OpenCLArray(cl, cl.getPaddedNumAtoms(), "bornRadii");
obcChain = new OpenCLArray(cl, cl.getPaddedNumAtoms(), "obcChain");
bornSum = new OpenCLArray(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornSum");
bornForce = new OpenCLArray(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornForce");
OpenCLArray& posq = cl.getPosq();
int numParticles = force.getNumParticles();
vector paramsVector(numParticles);
const double dielectricOffset = 0.009;
for (int i = 0; i < numParticles; i++) {
double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor);
radius -= dielectricOffset;
paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
posq[i].w = (float) charge;
}
posq.upload();
params->upload(paramsVector);
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
string source = OpenCLKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, force.getCutoffDistance(), vector >(), source);
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("obcParams", "float2", sizeof(cl_float2), params->getDeviceBuffer()));;
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("bornForce", "float", sizeof(cl_float), bornForce->getDeviceBuffer()));;
cl.addForce(new OpenCLGBSAOBCForceInfo(nb.getNumForceBuffers(), force));
}
void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
if (!hasCreatedKernels) {
// These Kernels cannot be created in initialize(), because the OpenCLNonbondedUtilities has not been initialized yet then.
hasCreatedKernels = true;
map defines;
if (nb.getForceBufferPerAtomBlock())
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
if (nb.getUseCutoff())
defines["USE_CUTOFF"] = "1";
if (nb.getUsePeriodic())
defines["USE_PERIODIC"] = "1";
defines["PERIODIC_BOX_SIZE_X"] = doubleToString(nb.getPeriodicBoxSize().x);
defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(nb.getPeriodicBoxSize().y);
defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(nb.getPeriodicBoxSize().z);
defines["CUTOFF_SQUARED"] = doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance());
defines["PREFACTOR"] = doubleToString(prefactor);
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
string file = (cl.getSIMDWidth() == 32 ? OpenCLKernelSources::gbsaObc_nvidia : OpenCLKernelSources::gbsaObc_default);
cl::Program program = cl.createProgram(file, defines);
computeBornSumKernel = cl::Kernel(program, "computeBornSum");
computeBornSumKernel.setArg(0, bornSum->getDeviceBuffer());
computeBornSumKernel.setArg(1, OpenCLContext::ThreadBlockSize*sizeof(cl_float), NULL);
computeBornSumKernel.setArg(2, cl.getPosq().getDeviceBuffer());
computeBornSumKernel.setArg(3, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
computeBornSumKernel.setArg(4, params->getDeviceBuffer());
computeBornSumKernel.setArg(5, OpenCLContext::ThreadBlockSize*sizeof(cl_float2), NULL);
computeBornSumKernel.setArg(6, OpenCLContext::ThreadBlockSize*sizeof(cl_float), NULL);
if (nb.getUseCutoff()) {
computeBornSumKernel.setArg(7, nb.getInteractingTiles().getDeviceBuffer());
computeBornSumKernel.setArg(8, nb.getInteractionFlags().getDeviceBuffer());
computeBornSumKernel.setArg(9, nb.getInteractionCount().getDeviceBuffer());
}
else {
computeBornSumKernel.setArg(7, nb.getTiles().getDeviceBuffer());
computeBornSumKernel.setArg(8, nb.getTiles().getSize());
}
force1Kernel = cl::Kernel(program, "computeGBSAForce1");
force1Kernel.setArg(0, cl.getForceBuffers().getDeviceBuffer());
force1Kernel.setArg(1, cl.getEnergyBuffer().getDeviceBuffer());
force1Kernel.setArg(2, cl.getPosq().getDeviceBuffer());
force1Kernel.setArg(3, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
force1Kernel.setArg(4, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
force1Kernel.setArg(5, bornRadii->getDeviceBuffer());
force1Kernel.setArg(6, OpenCLContext::ThreadBlockSize*sizeof(cl_float), NULL);
force1Kernel.setArg(7, bornForce->getDeviceBuffer());
force1Kernel.setArg(8, OpenCLContext::ThreadBlockSize*sizeof(cl_float), NULL);
force1Kernel.setArg(9, OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
if (nb.getUseCutoff()) {
force1Kernel.setArg(10, nb.getInteractingTiles().getDeviceBuffer());
force1Kernel.setArg(11, nb.getInteractionFlags().getDeviceBuffer());
force1Kernel.setArg(12, nb.getInteractionCount().getDeviceBuffer());
}
else {
force1Kernel.setArg(10, nb.getTiles().getDeviceBuffer());
force1Kernel.setArg(11, nb.getTiles().getSize());
}
program = cl.createProgram(OpenCLKernelSources::gbsaObcReductions, defines);
reduceBornSumKernel = cl::Kernel(program, "reduceBornSum");
reduceBornSumKernel.setArg(0, cl.getPaddedNumAtoms());
reduceBornSumKernel.setArg(1, nb.getNumForceBuffers());
reduceBornSumKernel.setArg(2, 1.0f);
reduceBornSumKernel.setArg(3, 0.8f);
reduceBornSumKernel.setArg(4, 4.85f);
reduceBornSumKernel.setArg(5, bornSum->getDeviceBuffer());
reduceBornSumKernel.setArg(6, params->getDeviceBuffer());
reduceBornSumKernel.setArg(7, bornRadii->getDeviceBuffer());
reduceBornSumKernel.setArg(8, obcChain->getDeviceBuffer());
reduceBornForceKernel = cl::Kernel(program, "reduceBornForce");
reduceBornForceKernel.setArg(0, cl.getPaddedNumAtoms());
reduceBornForceKernel.setArg(1, nb.getNumForceBuffers());
reduceBornForceKernel.setArg(2, bornForce->getDeviceBuffer());
reduceBornForceKernel.setArg(3, cl.getEnergyBuffer().getDeviceBuffer());
reduceBornForceKernel.setArg(4, params->getDeviceBuffer());
reduceBornForceKernel.setArg(5, bornRadii->getDeviceBuffer());
reduceBornForceKernel.setArg(6, obcChain->getDeviceBuffer());
}
cl.clearBuffer(*bornSum);
cl.clearBuffer(*bornForce);
cl.executeKernel(computeBornSumKernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(reduceBornSumKernel, cl.getPaddedNumAtoms());
cl.executeKernel(force1Kernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(reduceBornForceKernel, cl.getPaddedNumAtoms());
}
double OpenCLCalcGBSAOBCForceKernel::executeEnergy(ContextImpl& context) {
executeForces(context);
return 0.0;
}
class OpenCLCustomGBForceInfo : public OpenCLForceInfo {
public:
OpenCLCustomGBForceInfo(int requiredBuffers, const CustomGBForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
vector params1;
vector params2;
force.getParticleParameters(particle1, params1);
force.getParticleParameters(particle2, params2);
for (int i = 0; i < (int) params1.size(); i++)
if (params1[i] != params2[i])
return false;
return true;
}
int getNumParticleGroups() {
return force.getNumExclusions();
}
void getParticlesInGroup(int index, std::vector& particles) {
int particle1, particle2;
force.getExclusionParticles(index, particle1, particle2);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
return true;
}
private:
const CustomGBForce& force;
};
OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() {
if (params != NULL)
delete params;
if (computedValues != NULL)
delete computedValues;
if (energyDerivs != NULL)
delete energyDerivs;
if (globals != NULL)
delete globals;
if (valueBuffers != NULL)
delete valueBuffers;
if (tabulatedFunctionParams != NULL)
delete tabulatedFunctionParams;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i];
}
void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) {
bool useExclusionsForValue = false;
vector computedValueNames(force.getNumComputedValues());
vector computedValueExpressions(force.getNumComputedValues());
if (force.getNumComputedValues() > 0) {
CustomGBForce::ComputationType type;
force.getComputedValueParameters(0, computedValueNames[0], computedValueExpressions[0], type);
if (type == CustomGBForce::SingleParticle)
throw OpenMMException("OpenCLPlatform requires that the first computed value for a CustomGBForce be of type ParticlePair or ParticlePairNoExclusions.");
useExclusionsForValue = (type == CustomGBForce::ParticlePair);
for (int i = 1; i < force.getNumComputedValues(); i++) {
force.getComputedValueParameters(i, computedValueNames[i], computedValueExpressions[i], type);
if (type != CustomGBForce::SingleParticle)
throw OpenMMException("OpenCLPlatform requires that a CustomGBForce only have one computed value of type ParticlePair or ParticlePairNoExclusions.");
}
}
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
string prefix = "custom"+intToString(forceIndex)+"_";
// Record parameters and exclusions.
int numParticles = force.getNumParticles();
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customGBParameters");
computedValues = new OpenCLParameterSet(cl, force.getNumComputedValues(), numParticles, "customGBComputedValues");
if (force.getNumGlobalParameters() > 0)
globals = new OpenCLArray(cl, force.getNumGlobalParameters(), "customGBGlobals", false, CL_MEM_READ_ONLY);
vector > paramVector(numParticles);
vector > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
vector parameters;
force.getParticleParameters(i, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
exclusionList[i].push_back(i);
}
for (int i = 0; i < force.getNumExclusions(); i++) {
int particle1, particle2;
force.getExclusionParticles(i, particle1, particle2);
exclusionList[particle1].push_back(particle2);
exclusionList[particle2].push_back(particle1);
}
params->setParameterValues(paramVector);
// Record the tabulated functions.
OpenCLExpressionUtilities::FunctionPlaceholder fp;
map functions;
vector > functionDefinitions;
vector tabulatedFunctionParamsVec(force.getNumFunctions());
stringstream tableArgs;
for (int i = 0; i < force.getNumFunctions(); i++) {
string name;
vector values;
double min, max;
bool interpolating;
force.getFunctionParameters(i, name, values, min, max, interpolating);
string arrayName = prefix+"table"+intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = &fp;
tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), 0.0f);
vector f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, interpolating);
tabulatedFunctions.push_back(new OpenCLArray(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float4", sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer()));
tableArgs << ", __global float4* " << arrayName;
}
if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = new OpenCLArray(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float4", sizeof(cl_float4), tabulatedFunctionParams->getDeviceBuffer()));
tableArgs << ", __constant float4* " << prefix << "functionParams";
}
// Record the global parameters.
vector paramNames;
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
paramNames.push_back(force.getPerParticleParameterName(i));
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
// Record derivatives of expressions needed for the chain rule terms.
vector valueDerivExpressions;
vector > energyDerivExpressions;
for (int i = 0; i < force.getNumComputedValues(); i++) {
Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[i], functions).optimize();
if (i == 0)
valueDerivExpressions.push_back(ex.differentiate("r").optimize());
else
valueDerivExpressions.push_back(ex.differentiate(computedValueNames[i-1]).optimize());
}
energyDerivExpressions.resize(force.getNumEnergyTerms());
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression;
CustomGBForce::ComputationType type;
force.getEnergyTermParameters(i, expression, type);
Lepton::ParsedExpression ex = Lepton::Parser::parse(expression, functions).optimize();
for (int j = 0; j < force.getNumComputedValues(); j++) {
if (type == CustomGBForce::SingleParticle)
energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]).optimize());
else {
energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]+"1").optimize());
energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]+"2").optimize());
}
}
}
energyDerivs = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), "customGBEnergyDerivatives");
// Create the kernels.
bool useCutoff = (force.getNonbondedMethod() != CustomGBForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomGBForce::NoCutoff && force.getNonbondedMethod() != CustomGBForce::CutoffNonPeriodic);
{
// Create the N2 value kernel.
map variables;
map rename;
variables["r"] = "r";
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables[name+"1"] = "params"+params->getParameterSuffix(i, "1");
variables[name+"2"] = "params"+params->getParameterSuffix(i, "2");
rename[name+"1"] = name+"2";
rename[name+"2"] = name+"1";
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+intToString(i)+"]";
variables[name] = value;
}
map n2ValueExpressions;
stringstream n2ValueSource;
Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[0], functions).optimize();
n2ValueExpressions["tempValue1 = "] = ex;
n2ValueExpressions["tempValue2 = "] = ex.renameVariables(rename);
n2ValueSource << OpenCLExpressionUtilities::createExpressions(n2ValueExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
map replacements;
replacements["COMPUTE_VALUE"] = n2ValueSource.str();
stringstream extraArgs, loadLocal1, loadLocal2, load1, load2;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", __constant float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* global_" << paramName << ", __local " << buffer.getType() << "* local_" << paramName;
loadLocal1 << "local_" << paramName << "[get_local_id(0)] = " << paramName << "1;\n";
loadLocal2 << "local_" << paramName << "[get_local_id(0)] = global_" << paramName << "[j];\n";
load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n";
load2 << buffer.getType() << " " << paramName << "2 = local_" << paramName << "[atom2];\n";
}
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
map defines;
if (cl.getNonbondedUtilities().getForceBufferPerAtomBlock())
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
if (useCutoff)
defines["USE_CUTOFF"] = "1";
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
if (useExclusionsForValue)
defines["USE_EXCLUSIONS"] = "1";
Vec3 boxVectors[3];
system.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
defines["PERIODIC_BOX_SIZE_X"] = doubleToString(boxVectors[0][0]);
defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(boxVectors[1][1]);
defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(boxVectors[2][2]);
defines["CUTOFF_SQUARED"] = doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
string file = (cl.getSIMDWidth() == 32 ? OpenCLKernelSources::customGBValueN2_nvidia : OpenCLKernelSources::customGBValueN2_default);
cl::Program program = cl.createProgram(cl.replaceStrings(file, replacements), defines);
pairValueKernel = cl::Kernel(program, "computeN2Value");
}
{
// Create the kernel to reduce the N2 value and calculate other values.
stringstream reductionSource, extraArgs;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", __constant float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* " << paramName;
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* global_" << valueName;
reductionSource << buffer.getType() << " local_" << valueName << ";\n";
}
reductionSource << "local_values" << computedValues->getParameterSuffix(0) << " = sum;\n";
map variables;
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]";
for (int i = 1; i < force.getNumComputedValues(); i++) {
variables[computedValueNames[i-1]] = "local_values"+computedValues->getParameterSuffix(i-1);
map valueExpressions;
valueExpressions["local_values"+computedValues->getParameterSuffix(i)+" = "] = Lepton::Parser::parse(computedValueExpressions[i], functions).optimize();
reductionSource << OpenCLExpressionUtilities::createExpressions(valueExpressions, variables, functionDefinitions, "value"+intToString(i)+"_temp", "functionParams");
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
string valueName = "values"+intToString(i+1);
reductionSource << "global_" << valueName << "[index] = local_" << valueName << ";\n";
}
map replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["COMPUTE_VALUES"] = reductionSource.str();
map defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customGBValuePerParticle, replacements), defines);
perParticleValueKernel = cl::Kernel(program, "computePerParticleValues");
}
{
// Create the N2 energy kernel.
map variables;
variables["r"] = "r";
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables[name+"1"] = "params"+params->getParameterSuffix(i, "1");
variables[name+"2"] = "params"+params->getParameterSuffix(i, "2");
}
for (int i = 0; i < force.getNumComputedValues(); i++) {
variables[computedValueNames[i]+"1"] = "values"+computedValues->getParameterSuffix(i, "1");
variables[computedValueNames[i]+"2"] = "values"+computedValues->getParameterSuffix(i, "2");
}
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]";
map n2EnergyExpressions;
stringstream n2EnergySource;
bool anyExclusions = false;
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression;
CustomGBForce::ComputationType type;
force.getEnergyTermParameters(i, expression, type);
if (type == CustomGBForce::SingleParticle)
continue;
bool exclude = (type == CustomGBForce::ParticlePair);
anyExclusions |= exclude;
n2EnergyExpressions["tempEnergy += "] = Lepton::Parser::parse(expression, functions).optimize();
n2EnergyExpressions["dEdR += "] = Lepton::Parser::parse(expression, functions).differentiate("r").optimize();
for (int j = 0; j < force.getNumComputedValues(); j++) {
n2EnergyExpressions["/*"+intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_1")+" += "] = energyDerivExpressions[i][2*j];
n2EnergyExpressions["/*"+intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_2")+" += "] = energyDerivExpressions[i][2*j+1];
}
if (exclude)
n2EnergySource << "if (!isExcluded) {\n";
n2EnergySource << OpenCLExpressionUtilities::createExpressions(n2EnergyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
if (exclude)
n2EnergySource << "}\n";
}
map replacements;
replacements["COMPUTE_INTERACTION"] = n2EnergySource.str();
stringstream extraArgs, loadLocal1, loadLocal2, clearLocal, load1, load2, recordDeriv, storeDerivs1, storeDerivs2, declareTemps, setTemps;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", __constant float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* global_" << paramName << ", __local " << buffer.getType() << "* local_" << paramName;
loadLocal1 << "local_" << paramName << "[get_local_id(0)] = " << paramName << "1;\n";
loadLocal2 << "local_" << paramName << "[get_local_id(0)] = global_" << paramName << "[j];\n";
load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n";
load2 << buffer.getType() << " " << paramName << "2 = local_" << paramName << "[atom2];\n";
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* global_" << valueName << ", __local " << buffer.getType() << "* local_" << valueName;
loadLocal1 << "local_" << valueName << "[get_local_id(0)] = " << valueName << "1;\n";
loadLocal2 << "local_" << valueName << "[get_local_id(0)] = global_" << valueName << "[j];\n";
load1 << buffer.getType() << " " << valueName << "1 = global_" << valueName << "[atom1];\n";
load2 << buffer.getType() << " " << valueName << "2 = local_" << valueName << "[atom2];\n";
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* derivBuffers" << index << ", __local " << buffer.getType() << "* local_deriv" << index;
clearLocal << "local_deriv" << index << "[get_local_id(0)] = 0.0f;\n";
load1 << buffer.getType() << " deriv" << index << "_1 = 0.0f;\n";
load2 << buffer.getType() << " deriv" << index << "_2 = 0.0f;\n";
recordDeriv << "local_deriv" << index << "[atom2] += deriv" << index << "_2;\n";
storeDerivs1 << "STORE_DERIVATIVE_1(" << index << ")";
storeDerivs2 << "STORE_DERIVATIVE_2(" << index << ")";
declareTemps << "__local " << buffer.getType() << " tempDerivBuffer" << index << "[64];\n";
setTemps << "tempDerivBuffer" << index << "[get_local_id(0)] = deriv" << index << "_1;\n";
}
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
replacements["CLEAR_LOCAL_DERIVATIVES"] = clearLocal.str();
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
replacements["RECORD_DERIVATIVE_2"] = recordDeriv.str();
replacements["STORE_DERIVATIVES_1"] = storeDerivs1.str();
replacements["STORE_DERIVATIVES_2"] = storeDerivs2.str();
replacements["DECLARE_TEMP_BUFFERS"] = declareTemps.str();
replacements["SET_TEMP_BUFFERS"] = setTemps.str();
map defines;
if (cl.getNonbondedUtilities().getForceBufferPerAtomBlock())
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
if (useCutoff)
defines["USE_CUTOFF"] = "1";
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
if (anyExclusions)
defines["USE_EXCLUSIONS"] = "1";
Vec3 boxVectors[3];
system.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
defines["PERIODIC_BOX_SIZE_X"] = doubleToString(boxVectors[0][0]);
defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(boxVectors[1][1]);
defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(boxVectors[2][2]);
defines["CUTOFF_SQUARED"] = doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
string file = (cl.getSIMDWidth() == 32 ? OpenCLKernelSources::customGBEnergyN2_nvidia : OpenCLKernelSources::customGBEnergyN2_default);
cl::Program program = cl.createProgram(cl.replaceStrings(file, replacements), defines);
pairEnergyKernel = cl::Kernel(program, "computeN2Energy");
}
{
// Create the kernel to reduce the derivatives and calculate per-particle energy terms.
stringstream compute, extraArgs, reduce;
map energyExpressions;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", __constant float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* " << paramName;
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* " << valueName;
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* derivBuffers" << index;
reduce << "REDUCE_VALUE(derivBuffers" << index << ", " << buffer.getType() << ")\n";
compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
}
map variables;
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]";
for (int i = 0; i < force.getNumComputedValues(); i++)
variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]");
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression;
CustomGBForce::ComputationType type;
force.getEnergyTermParameters(i, expression, type);
if (type != CustomGBForce::SingleParticle)
continue;
energyExpressions["/*"+intToString(i+1)+"*/ energy += "] = Lepton::Parser::parse(expression, functions).optimize();
for (int j = 0; j < force.getNumComputedValues(); j++)
energyExpressions["/*"+intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j)+" += "] = energyDerivExpressions[i][j];
}
compute << OpenCLExpressionUtilities::createExpressions(energyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
string index = intToString(i+1);
compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n";
}
map replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["REDUCE_DERIVATIVES"] = reduce.str();
replacements["COMPUTE_ENERGY"] = compute.str();
map defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customGBEnergyPerParticle, replacements), defines);
perParticleEnergyKernel = cl::Kernel(program, "computePerParticleEnergy");
}
{
// Create the code to calculate chain rules terms (as part of the default nonbonded kernel).
map globalVariables;
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+intToString(i)+"]";
globalVariables[name] = prefix+value;
}
map variables = globalVariables;
map rename;
variables["r"] = "r";
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables[name+"1"] = prefix+"params"+params->getParameterSuffix(i, "1");
variables[name+"2"] = prefix+"params"+params->getParameterSuffix(i, "2");
rename[name+"1"] = name+"2";
rename[name+"2"] = name+"1";
}
map derivExpressions;
stringstream chainSource;
Lepton::ParsedExpression dVdR = Lepton::Parser::parse(computedValueExpressions[0], functions).differentiate("r").optimize();
derivExpressions["float dVdR1 = "] = dVdR;
derivExpressions["float dVdR2 = "] = dVdR.renameVariables(rename);
chainSource << OpenCLExpressionUtilities::createExpressions(derivExpressions, variables, functionDefinitions, prefix+"temp0_", prefix+"functionParams");
chainSource << "tempForce -= dVdR1*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(0, "1") << ";\n";
chainSource << "tempForce -= dVdR2*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(0, "2") << ";\n";
variables = globalVariables;
map rename1;
map rename2;
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables[name+"1"] = prefix+"params"+params->getParameterSuffix(i, "1");
variables[name+"2"] = prefix+"params"+params->getParameterSuffix(i, "2");
rename1[name] = name+"1";
rename2[name] = name+"2";
}
for (int i = 0; i < force.getNumComputedValues(); i++) {
const string& name = computedValueNames[i];
variables[name+"1"] = prefix+"values"+computedValues->getParameterSuffix(i, "1");
variables[name+"2"] = prefix+"values"+computedValues->getParameterSuffix(i, "2");
rename1[name] = name+"1";
rename2[name] = name+"2";
if (i == 0)
continue;
Lepton::ParsedExpression dVdV = Lepton::Parser::parse(computedValueExpressions[1], functions).differentiate(computedValueNames[i-1]).optimize();
string var = "dV"+intToString(i+1)+"dV"+intToString(i)+"_";
derivExpressions.clear();
derivExpressions["float "+var+"1 = "] = dVdV.renameVariables(rename1);
derivExpressions["float "+var+"2 = "] = dVdV.renameVariables(rename2);
chainSource << OpenCLExpressionUtilities::createExpressions(derivExpressions, variables, functionDefinitions, prefix+"temp"+intToString(i)+"_", prefix+"functionParams");
chainSource << "dVdR1 *= "+var+"1;\n";
chainSource << "dVdR2 *= "+var+"2;\n";
chainSource << "tempForce -= dVdR1*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(i, "1") << ";\n";
chainSource << "tempForce -= dVdR2*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(i, "2") << ";\n";
}
map replacements;
replacements["COMPUTE_FORCE"] = chainSource.str();
string source = cl.replaceStrings(OpenCLKernelSources::customGBChainRule, replacements);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source);
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = prefix+"params"+intToString(i+1);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getBuffer()));
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string paramName = prefix+"values"+intToString(i+1);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getBuffer()));
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string paramName = prefix+"dEdV"+intToString(i+1);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getBuffer()));
}
if (globals != NULL) {
globals->upload(globalParamValues);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", sizeof(cl_float), globals->getDeviceBuffer()));
}
}
cl.addForce(new OpenCLCustomGBForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
}
void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
if (!hasInitializedKernels) {
hasInitializedKernels = true;
valueBuffers = new OpenCLArray(cl, cl.getPaddedNumAtoms()*cl.getNumForceBuffers(), "customGBValueBuffers");
int index = 0;
pairValueKernel.setArg(index++, cl.getPosq().getDeviceBuffer());
pairValueKernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
pairValueKernel.setArg(index++, cl.getNonbondedUtilities().getExclusions().getDeviceBuffer());
pairValueKernel.setArg(index++, cl.getNonbondedUtilities().getExclusionIndices().getDeviceBuffer());
pairValueKernel.setArg(index++, valueBuffers->getDeviceBuffer());
pairValueKernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float), NULL);
pairValueKernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float), NULL);
if (nb.getUseCutoff()) {
pairValueKernel.setArg(index++, nb.getInteractingTiles().getDeviceBuffer());
pairValueKernel.setArg(index++, nb.getInteractionFlags().getDeviceBuffer());
pairValueKernel.setArg(index++, nb.getInteractionCount().getDeviceBuffer());
}
else {
pairValueKernel.setArg(index++, nb.getTiles().getDeviceBuffer());
pairValueKernel.setArg(index++, nb.getTiles().getSize());
}
if (globals != NULL)
pairValueKernel.setArg(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
pairValueKernel.setArg(index++, buffer.getBuffer());
pairValueKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
}
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
pairValueKernel.setArg(index++, tabulatedFunctions[i]->getDeviceBuffer());
pairValueKernel.setArg(index++, tabulatedFunctionParams->getDeviceBuffer());
}
index = 0;
perParticleValueKernel.setArg(index++, cl.getPaddedNumAtoms());
perParticleValueKernel.setArg(index++, nb.getNumForceBuffers());
perParticleValueKernel.setArg(index++, valueBuffers->getDeviceBuffer());
if (globals != NULL)
perParticleValueKernel.setArg(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++)
perParticleValueKernel.setArg(index++, params->getBuffers()[i].getBuffer());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
perParticleValueKernel.setArg(index++, computedValues->getBuffers()[i].getBuffer());
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
perParticleValueKernel.setArg(index++, tabulatedFunctions[i]->getDeviceBuffer());
perParticleValueKernel.setArg(index++, tabulatedFunctionParams->getDeviceBuffer());
}
index = 0;
pairEnergyKernel.setArg(index++, cl.getForceBuffers().getDeviceBuffer());
pairEnergyKernel.setArg(index++, cl.getEnergyBuffer().getDeviceBuffer());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
pairEnergyKernel.setArg(index++, cl.getPosq().getDeviceBuffer());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
pairEnergyKernel.setArg(index++, cl.getNonbondedUtilities().getExclusions().getDeviceBuffer());
pairEnergyKernel.setArg(index++, cl.getNonbondedUtilities().getExclusionIndices().getDeviceBuffer());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
if (nb.getUseCutoff()) {
pairEnergyKernel.setArg(index++, nb.getInteractingTiles().getDeviceBuffer());
pairEnergyKernel.setArg