/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "OpenCLKernels.h"
#include "OpenCLForceInfo.h"
#include "openmm/LangevinIntegrator.h"
#include "openmm/Context.h"
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/CMAPTorsionForceImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomCentroidBondForceImpl.h"
#include "openmm/internal/CustomCompoundBondForceImpl.h"
#include "openmm/internal/CustomHbondForceImpl.h"
#include "openmm/internal/CustomManyParticleForceImpl.h"
#include "openmm/internal/CustomNonbondedForceImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/internal/OSRngSeed.h"
#include "OpenCLBondedUtilities.h"
#include "OpenCLExpressionUtilities.h"
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLNonbondedUtilities.h"
#include "OpenCLKernelSources.h"
#include "lepton/CustomFunction.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include "ReferenceTabulatedFunction.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h"
#include "jama_eig.h"
#include
#include
#include
using namespace OpenMM;
using namespace std;
using namespace Lepton;
static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseMixedPrecision())
kernel.setArg(index, cl.getPosqCorrection().getDeviceBuffer());
else
kernel.setArg(index, NULL);
}
static void setPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision())
kernel.setArg(index, cl.getPeriodicBoxSizeDouble());
else
kernel.setArg(index, cl.getPeriodicBoxSize());
}
static void setPeriodicBoxArgs(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision()) {
kernel.setArg(index++, cl.getPeriodicBoxSizeDouble());
kernel.setArg(index++, cl.getInvPeriodicBoxSizeDouble());
kernel.setArg(index++, cl.getPeriodicBoxVecXDouble());
kernel.setArg(index++, cl.getPeriodicBoxVecYDouble());
kernel.setArg(index, cl.getPeriodicBoxVecZDouble());
}
else {
kernel.setArg(index++, cl.getPeriodicBoxSize());
kernel.setArg(index++, cl.getInvPeriodicBoxSize());
kernel.setArg(index++, cl.getPeriodicBoxVecX());
kernel.setArg(index++, cl.getPeriodicBoxVecY());
kernel.setArg(index, cl.getPeriodicBoxVecZ());
}
}
static bool isZeroExpression(const Lepton::ParsedExpression& expression) {
const Lepton::Operation& op = expression.getRootNode().getOperation();
if (op.getId() != Lepton::Operation::CONSTANT)
return false;
return (dynamic_cast(op).getValue() == 0.0);
}
static bool usesVariable(const Lepton::ExpressionTreeNode& node, const string& variable) {
const Lepton::Operation& op = node.getOperation();
if (op.getId() == Lepton::Operation::VARIABLE && op.getName() == variable)
return true;
for (auto& child : node.getChildren())
if (usesVariable(child, variable))
return true;
return false;
}
static bool usesVariable(const Lepton::ParsedExpression& expression, const string& variable) {
return usesVariable(expression.getRootNode(), variable);
}
static pair makeVariable(const string& name, const string& value) {
return make_pair(ExpressionTreeNode(new Operation::Variable(name)), value);
}
void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
}
void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
cl.setForcesValid(true);
cl.clearAutoclearBuffers();
for (auto computation : cl.getPreComputations())
computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
cl.setComputeForceCount(cl.getComputeForceCount()+1);
nb.prepareInteractions(groups);
map& derivs = cl.getEnergyParamDerivWorkspace();
for (auto& param : context.getParameters())
derivs[param.first] = 0;
}
double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cl.getBondedUtilities().computeInteractions(groups);
cl.getNonbondedUtilities().computeInteractions(groups, includeForces, includeEnergy);
double sum = 0.0;
for (auto computation : cl.getPostComputations())
sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
cl.reduceForces();
cl.getIntegrationUtilities().distributeForcesFromVirtualSites();
if (includeEnergy)
sum += cl.reduceEnergy();
if (!cl.getForcesValid())
valid = false;
return sum;
}
void OpenCLUpdateStateDataKernel::initialize(const System& system) {
}
double OpenCLUpdateStateDataKernel::getTime(const ContextImpl& context) const {
return cl.getTime();
}
void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
vector& contexts = cl.getPlatformData().contexts;
for (auto ctx : contexts)
ctx->setTime(time);
}
void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, vector& positions) {
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
vector posCorrection;
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
}
else if (cl.getUseMixedPrecision()) {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
cl.getPosq().download(posq, false);
posCorrection.resize(numParticles);
cl.getPosqCorrection().download(posCorrection);
}
else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
}
// Filling in the output array is done in parallel for speed.
cl.getPlatformData().threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Compute the position of each particle to return to the user. This is done in parallel for speed.
const vector& order = cl.getAtomIndex();
int numParticles = cl.getNumAtoms();
Vec3 boxVectors[3];
cl.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
for (int i = start; i < end; ++i) {
mm_double4 pos = posq[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
else if (cl.getUseMixedPrecision()) {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
for (int i = start; i < end; ++i) {
mm_float4 pos1 = posq[i];
mm_float4 pos2 = posCorrection[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3((double)pos1.x+(double)pos2.x, (double)pos1.y+(double)pos2.y, (double)pos1.z+(double)pos2.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
for (int i = start; i < end; ++i) {
mm_float4 pos = posq[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
});
cl.getPlatformData().threads.waitForThreads();
}
void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const vector& positions) {
const vector& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
mm_double4& pos = posq[i];
const Vec3& p = positions[order[i]];
pos.x = p[0];
pos.y = p[1];
pos.z = p[2];
}
for (int i = numParticles; i < cl.getPaddedNumAtoms(); i++)
posq[i] = mm_double4(0.0, 0.0, 0.0, 0.0);
cl.getPosq().upload(posq);
}
else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
mm_float4& pos = posq[i];
const Vec3& p = positions[order[i]];
pos.x = (cl_float) p[0];
pos.y = (cl_float) p[1];
pos.z = (cl_float) p[2];
}
for (int i = numParticles; i < cl.getPaddedNumAtoms(); i++)
posq[i] = mm_float4(0.0f, 0.0f, 0.0f, 0.0f);
cl.getPosq().upload(posq);
}
if (cl.getUseMixedPrecision()) {
mm_float4* posCorrection = (mm_float4*) cl.getPinnedBuffer();
for (int i = 0; i < numParticles; ++i) {
mm_float4& c = posCorrection[i];
const Vec3& p = positions[order[i]];
c.x = (cl_float) (p[0]-(cl_float)p[0]);
c.y = (cl_float) (p[1]-(cl_float)p[1]);
c.z = (cl_float) (p[2]-(cl_float)p[2]);
c.w = 0;
}
for (int i = numParticles; i < cl.getPaddedNumAtoms(); i++)
posCorrection[i] = mm_float4(0.0f, 0.0f, 0.0f, 0.0f);
cl.getPosqCorrection().upload(posCorrection);
}
for (auto& offset : cl.getPosCellOffsets())
offset = mm_int4(0, 0, 0, 0);
cl.reorderAtoms();
}
void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, vector& velocities) {
const vector& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
velocities.resize(numParticles);
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
mm_double4* velm = (mm_double4*) cl.getPinnedBuffer();
cl.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
mm_double4 vel = velm[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
}
}
else {
mm_float4* velm = (mm_float4*) cl.getPinnedBuffer();
cl.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
mm_float4 vel = velm[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
}
}
}
void OpenCLUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector& velocities) {
const vector& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
mm_double4* velm = (mm_double4*) cl.getPinnedBuffer();
cl.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
mm_double4& vel = velm[i];
const Vec3& p = velocities[order[i]];
vel.x = p[0];
vel.y = p[1];
vel.z = p[2];
}
for (int i = numParticles; i < cl.getPaddedNumAtoms(); i++)
velm[i] = mm_double4(0.0, 0.0, 0.0, 0.0);
cl.getVelm().upload(velm);
}
else {
mm_float4* velm = (mm_float4*) cl.getPinnedBuffer();
cl.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
mm_float4& vel = velm[i];
const Vec3& p = velocities[order[i]];
vel.x = p[0];
vel.y = p[1];
vel.z = p[2];
}
for (int i = numParticles; i < cl.getPaddedNumAtoms(); i++)
velm[i] = mm_float4(0.0f, 0.0f, 0.0f, 0.0f);
cl.getVelm().upload(velm);
}
}
void OpenCLUpdateStateDataKernel::getForces(ContextImpl& context, vector& forces) {
const vector& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
forces.resize(numParticles);
if (cl.getUseDoublePrecision()) {
mm_double4* force = (mm_double4*) cl.getPinnedBuffer();
cl.getForce().download(force);
for (int i = 0; i < numParticles; ++i) {
mm_double4 f = force[i];
forces[order[i]] = Vec3(f.x, f.y, f.z);
}
}
else {
mm_float4* force = (mm_float4*) cl.getPinnedBuffer();
cl.getForce().download(force);
for (int i = 0; i < numParticles; ++i) {
mm_float4 f = force[i];
forces[order[i]] = Vec3(f.x, f.y, f.z);
}
}
}
void OpenCLUpdateStateDataKernel::getEnergyParameterDerivatives(ContextImpl& context, map& derivs) {
const vector& paramDerivNames = cl.getEnergyParamDerivNames();
int numDerivs = paramDerivNames.size();
if (numDerivs == 0)
return;
derivs = cl.getEnergyParamDerivWorkspace();
OpenCLArray& derivArray = cl.getEnergyParamDerivBuffer();
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
vector derivBuffers;
derivArray.download(derivBuffers);
for (int i = numDerivs; i < derivArray.getSize(); i += numDerivs)
for (int j = 0; j < numDerivs; j++)
derivBuffers[j] += derivBuffers[i+j];
for (int i = 0; i < numDerivs; i++)
derivs[paramDerivNames[i]] += derivBuffers[i];
}
else {
vector derivBuffers;
derivArray.download(derivBuffers);
for (int i = numDerivs; i < derivArray.getSize(); i += numDerivs)
for (int j = 0; j < numDerivs; j++)
derivBuffers[j] += derivBuffers[i+j];
for (int i = 0; i < numDerivs; i++)
derivs[paramDerivNames[i]] += derivBuffers[i];
}
}
void OpenCLUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const {
cl.getPeriodicBoxVectors(a, b, c);
}
void OpenCLUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) {
vector& contexts = cl.getPlatformData().contexts;
// If any particles have been wrapped to the first periodic box, we need to unwrap them
// to avoid changing their positions.
vector positions;
for (auto offset : cl.getPosCellOffsets()) {
if (offset.x != 0 || offset.y != 0 || offset.z != 0) {
getPositions(context, positions);
break;
}
}
// Update the vectors.
for (auto ctx : contexts)
ctx->setPeriodicBoxVectors(a, b, c);
if (positions.size() > 0)
setPositions(context, positions);
}
void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
int version = 2;
stream.write((char*) &version, sizeof(int));
int precision = (cl.getUseDoublePrecision() ? 2 : cl.getUseMixedPrecision() ? 1 : 0);
stream.write((char*) &precision, sizeof(int));
double time = cl.getTime();
stream.write((char*) &time, sizeof(double));
int stepCount = cl.getStepCount();
stream.write((char*) &stepCount, sizeof(int));
int stepsSinceReorder = cl.getStepsSinceReorder();
stream.write((char*) &stepsSinceReorder, sizeof(int));
char* buffer = (char*) cl.getPinnedBuffer();
cl.getPosq().download(buffer);
stream.write(buffer, cl.getPosq().getSize()*cl.getPosq().getElementSize());
if (cl.getUseMixedPrecision()) {
cl.getPosqCorrection().download(buffer);
stream.write(buffer, cl.getPosqCorrection().getSize()*cl.getPosqCorrection().getElementSize());
}
cl.getVelm().download(buffer);
stream.write(buffer, cl.getVelm().getSize()*cl.getVelm().getElementSize());
stream.write((char*) &cl.getAtomIndex()[0], sizeof(cl_int)*cl.getAtomIndex().size());
stream.write((char*) &cl.getPosCellOffsets()[0], sizeof(mm_int4)*cl.getPosCellOffsets().size());
Vec3 boxVectors[3];
cl.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
stream.write((char*) boxVectors, 3*sizeof(Vec3));
cl.getIntegrationUtilities().createCheckpoint(stream);
SimTKOpenMMUtilities::createCheckpoint(stream);
}
void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
int version;
stream.read((char*) &version, sizeof(int));
if (version != 2)
throw OpenMMException("Checkpoint was created with a different version of OpenMM");
int precision;
stream.read((char*) &precision, sizeof(int));
int expectedPrecision = (cl.getUseDoublePrecision() ? 2 : cl.getUseMixedPrecision() ? 1 : 0);
if (precision != expectedPrecision)
throw OpenMMException("Checkpoint was created with a different numeric precision");
double time;
stream.read((char*) &time, sizeof(double));
int stepCount, stepsSinceReorder;
stream.read((char*) &stepCount, sizeof(int));
stream.read((char*) &stepsSinceReorder, sizeof(int));
vector& contexts = cl.getPlatformData().contexts;
for (auto ctx : contexts) {
ctx->setTime(time);
ctx->setStepCount(stepCount);
ctx->setStepsSinceReorder(stepsSinceReorder);
}
char* buffer = (char*) cl.getPinnedBuffer();
stream.read(buffer, cl.getPosq().getSize()*cl.getPosq().getElementSize());
cl.getPosq().upload(buffer);
if (cl.getUseMixedPrecision()) {
stream.read(buffer, cl.getPosqCorrection().getSize()*cl.getPosqCorrection().getElementSize());
cl.getPosqCorrection().upload(buffer);
}
stream.read(buffer, cl.getVelm().getSize()*cl.getVelm().getElementSize());
cl.getVelm().upload(buffer);
stream.read((char*) &cl.getAtomIndex()[0], sizeof(cl_int)*cl.getAtomIndex().size());
cl.getAtomIndexArray().upload(cl.getAtomIndex());
stream.read((char*) &cl.getPosCellOffsets()[0], sizeof(mm_int4)*cl.getPosCellOffsets().size());
Vec3 boxVectors[3];
stream.read((char*) &boxVectors, 3*sizeof(Vec3));
for (auto ctx : contexts)
ctx->setPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
cl.getIntegrationUtilities().loadCheckpoint(stream);
SimTKOpenMMUtilities::loadCheckpoint(stream);
for (auto listener : cl.getReorderListeners())
listener->execute();
}
void OpenCLApplyConstraintsKernel::initialize(const System& system) {
}
void OpenCLApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
map defines;
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
cl::Program program = cl.createProgram(OpenCLKernelSources::constraints, defines);
applyDeltasKernel = cl::Kernel(program, "applyPositionDeltas");
applyDeltasKernel.setArg(0, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, applyDeltasKernel, 1);
applyDeltasKernel.setArg(2, cl.getIntegrationUtilities().getPosDelta().getDeviceBuffer());
}
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
cl.clearBuffer(integration.getPosDelta());
integration.applyConstraints(tol);
cl.executeKernel(applyDeltasKernel, cl.getNumAtoms());
integration.computeVirtualSites();
}
void OpenCLApplyConstraintsKernel::applyToVelocities(ContextImpl& context, double tol) {
cl.getIntegrationUtilities().applyVelocityConstraints(tol);
}
void OpenCLVirtualSitesKernel::initialize(const System& system) {
}
void OpenCLVirtualSitesKernel::computePositions(ContextImpl& context) {
cl.getIntegrationUtilities().computeVirtualSites();
}
class OpenCLCalcHarmonicBondForceKernel::ForceInfo : public OpenCLForceInfo {
public:
ForceInfo(const HarmonicBondForce& force) : OpenCLForceInfo(0), force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
double length, k;
force.getBondParameters(index, particle1, particle2, length, k);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
double length1, length2, k1, k2;
force.getBondParameters(group1, particle1, particle2, length1, k1);
force.getBondParameters(group2, particle1, particle2, length2, k2);
return (length1 == length2 && k1 == k2);
}
private:
const HarmonicBondForce& force;
};
OpenCLCalcHarmonicBondForceKernel::~OpenCLCalcHarmonicBondForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumBonds()/numContexts;
numBonds = endIndex-startIndex;
if (numBonds == 0)
return;
vector > atoms(numBonds, vector(2));
params = OpenCLArray::create(cl, numBonds, "bondParams");
vector paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
double length, k;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
paramVector[i] = mm_float2((cl_float) length, (cl_float) k);
}
params->upload(paramVector);
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::harmonicBondForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params->getDeviceBuffer(), "float2");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::bondForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cl.addForce(info);
}
double OpenCLCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void OpenCLCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumBonds()/numContexts;
if (numBonds != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of bonds has changed");
if (numBonds == 0)
return;
// Record the per-bond parameters.
vector paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
int atom1, atom2;
double length, k;
force.getBondParameters(startIndex+i, atom1, atom2, length, k);
paramVector[i] = mm_float2((cl_float) length, (cl_float) k);
}
params->upload(paramVector);
// Mark that the current reordering may be invalid.
cl.invalidateMolecules(info);
}
class OpenCLCalcCustomBondForceKernel::ForceInfo : public OpenCLForceInfo {
public:
ForceInfo(const CustomBondForce& force) : OpenCLForceInfo(0), force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
vector parameters;
force.getBondParameters(index, particle1, particle2, parameters);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
vector parameters1, parameters2;
force.getBondParameters(group1, particle1, particle2, parameters1);
force.getBondParameters(group2, particle1, particle2, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomBondForce& force;
};
OpenCLCalcCustomBondForceKernel::~OpenCLCalcCustomBondForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumBonds()/numContexts;
numBonds = endIndex-startIndex;
if (numBonds == 0)
return;
vector > atoms(numBonds, vector(2));
params = new OpenCLParameterSet(cl, force.getNumPerBondParameters(), numBonds, "customBondParams");
vector > paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
vector parameters;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
}
params->setParameterValues(paramVector);
info = new ForceInfo(force);
cl.addForce(info);
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["real dEdR = "] = forceExpression;
// Create the kernels.
map variables;
variables["r"] = "r";
for (int i = 0; i < force.getNumPerBondParameters(); i++) {
const string& name = force.getPerBondParameterName(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = OpenCLArray::create(cl, force.getNumGlobalParameters(), "customBondGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]";
variables[name] = value;
}
}
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string paramName = force.getEnergyParameterDerivativeName(i);
string derivVariable = cl.getBondedUtilities().addEnergyParameterDerivative(paramName);
Lepton::ParsedExpression derivExpression = energyExpression.differentiate(paramName).optimize();
expressions[derivVariable+" += "] = derivExpression;
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string argName = cl.getBondedUtilities().addArgument(buffer.getMemory(), buffer.getType());
compute< functions;
vector > functionNames;
compute << cl.getExpressionUtilities().createExpressions(expressions, variables, functions, functionNames, "temp");
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = compute.str();
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::bondForce, replacements), force.getForceGroup());
}
double OpenCLCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
return 0.0;
}
void OpenCLCalcCustomBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomBondForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumBonds()/numContexts;
if (numBonds != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of bonds has changed");
if (numBonds == 0)
return;
// Record the per-bond parameters.
vector > paramVector(numBonds);
vector parameters;
for (int i = 0; i < numBonds; i++) {
int atom1, atom2;
force.getBondParameters(startIndex+i, atom1, atom2, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cl.invalidateMolecules(info);
}
class OpenCLCalcHarmonicAngleForceKernel::ForceInfo : public OpenCLForceInfo {
public:
ForceInfo(const HarmonicAngleForce& force) : OpenCLForceInfo(0), force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3;
double angle, k;
force.getAngleParameters(index, particle1, particle2, particle3, angle, k);
particles.resize(3);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3;
double angle1, angle2, k1, k2;
force.getAngleParameters(group1, particle1, particle2, particle3, angle1, k1);
force.getAngleParameters(group2, particle1, particle2, particle3, angle2, k2);
return (angle1 == angle2 && k1 == k2);
}
private:
const HarmonicAngleForce& force;
};
OpenCLCalcHarmonicAngleForceKernel::~OpenCLCalcHarmonicAngleForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0)
return;
vector > atoms(numAngles, vector(3));
params = OpenCLArray::create(cl, numAngles, "angleParams");
vector paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
double angle, k;
force.getAngleParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], angle, k);
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
}
params->upload(paramVector);
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::harmonicAngleForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params->getDeviceBuffer(), "float2");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::angleForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cl.addForce(info);
}
double OpenCLCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void OpenCLCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumAngles()/numContexts;
if (numAngles != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of angles has changed");
if (numAngles == 0)
return;
// Record the per-angle parameters.
vector paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
int atom1, atom2, atom3;
double angle, k;
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
paramVector[i] = mm_float2((cl_float) angle, (cl_float) k);
}
params->upload(paramVector);
// Mark that the current reordering may be invalid.
cl.invalidateMolecules(info);
}
class OpenCLCalcCustomAngleForceKernel::ForceInfo : public OpenCLForceInfo {
public:
ForceInfo(const CustomAngleForce& force) : OpenCLForceInfo(0), force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3;
vector parameters;
force.getAngleParameters(index, particle1, particle2, particle3, parameters);
particles.resize(3);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3;
vector parameters1, parameters2;
force.getAngleParameters(group1, particle1, particle2, particle3, parameters1);
force.getAngleParameters(group2, particle1, particle2, particle3, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomAngleForce& force;
};
OpenCLCalcCustomAngleForceKernel::~OpenCLCalcCustomAngleForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0)
return;
vector > atoms(numAngles, vector(3));
params = new OpenCLParameterSet(cl, force.getNumPerAngleParameters(), numAngles, "customAngleParams");
vector > paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
vector parameters;
force.getAngleParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
}
params->setParameterValues(paramVector);
info = new ForceInfo(force);
cl.addForce(info);
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("theta").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["real dEdAngle = "] = forceExpression;
// Create the kernels.
map variables;
variables["theta"] = "theta";
for (int i = 0; i < force.getNumPerAngleParameters(); i++) {
const string& name = force.getPerAngleParameterName(i);
variables[name] = "angleParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = OpenCLArray::create(cl, force.getNumGlobalParameters(), "customAngleGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]";
variables[name] = value;
}
}
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string paramName = force.getEnergyParameterDerivativeName(i);
string derivVariable = cl.getBondedUtilities().addEnergyParameterDerivative(paramName);
Lepton::ParsedExpression derivExpression = energyExpression.differentiate(paramName).optimize();
expressions[derivVariable+" += "] = derivExpression;
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string argName = cl.getBondedUtilities().addArgument(buffer.getMemory(), buffer.getType());
compute< functions;
vector > functionNames;
compute << cl.getExpressionUtilities().createExpressions(expressions, variables, functions, functionNames, "temp");
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = compute.str();
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::angleForce, replacements), force.getForceGroup());
}
double OpenCLCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
return 0.0;
}
void OpenCLCalcCustomAngleForceKernel::copyParametersToContext(ContextImpl& context, const CustomAngleForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumAngles()/numContexts;
if (numAngles != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of angles has changed");
if (numAngles == 0)
return;
// Record the per-angle parameters.
vector > paramVector(numAngles);
vector parameters;
for (int i = 0; i < numAngles; i++) {
int atom1, atom2, atom3;
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cl.invalidateMolecules(info);
}
class OpenCLCalcPeriodicTorsionForceKernel::ForceInfo : public OpenCLForceInfo {
public:
ForceInfo(const PeriodicTorsionForce& force) : OpenCLForceInfo(0), force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3, particle4, periodicity;
double phase, k;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, periodicity, phase, k);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4, periodicity1, periodicity2;
double phase1, phase2, k1, k2;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, periodicity1, phase1, k1);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, periodicity2, phase2, k2);
return (periodicity1 == periodicity2 && phase1 == phase2 && k1 == k2);
}
private:
const PeriodicTorsionForce& force;
};
OpenCLCalcPeriodicTorsionForceKernel::~OpenCLCalcPeriodicTorsionForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
vector > atoms(numTorsions, vector(4));
params = OpenCLArray::create(cl, numTorsions, "periodicTorsionParams");
vector paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int periodicity;
double phase, k;
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
}
params->upload(paramVector);
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::periodicTorsionForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params->getDeviceBuffer(), "float4");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cl.addForce(info);
}
double OpenCLCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void OpenCLCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
if (numTorsions != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of torsions has changed");
if (numTorsions == 0)
return;
// Record the per-torsion parameters.
vector paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int atom1, atom2, atom3, atom4, periodicity;
double phase, k;
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
paramVector[i] = mm_float4((cl_float) k, (cl_float) phase, (cl_float) periodicity, 0.0f);
}
params->upload(paramVector);
// Mark that the current reordering may be invalid.
cl.invalidateMolecules(info);
}
class OpenCLCalcRBTorsionForceKernel::ForceInfo : public OpenCLForceInfo {
public:
ForceInfo(const RBTorsionForce& force) : OpenCLForceInfo(0), force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3, particle4;
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4;
double c0a, c0b, c1a, c1b, c2a, c2b, c3a, c3b, c4a, c4b, c5a, c5b;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, c0a, c1a, c2a, c3a, c4a, c5a);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, c0b, c1b, c2b, c3b, c4b, c5b);
return (c0a == c0b && c1a == c1b && c2a == c2b && c3a == c3b && c4a == c4b && c5a == c5b);
}
private:
const RBTorsionForce& force;
};
OpenCLCalcRBTorsionForceKernel::~OpenCLCalcRBTorsionForceKernel() {
if (params != NULL)
delete params;
}
void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
vector > atoms(numTorsions, vector(4));
params = OpenCLArray::create(cl, numTorsions, "rbTorsionParams");
vector paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], c0, c1, c2, c3, c4, c5);
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
}
params->upload(paramVector);
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = OpenCLKernelSources::rbTorsionForce;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(params->getDeviceBuffer(), "float8");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cl.addForce(info);
}
double OpenCLCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void OpenCLCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context, const RBTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
if (numTorsions != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of torsions has changed");
if (numTorsions == 0)
return;
// Record the per-torsion parameters.
vector paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int atom1, atom2, atom3, atom4;
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, c0, c1, c2, c3, c4, c5);
paramVector[i] = mm_float8((cl_float) c0, (cl_float) c1, (cl_float) c2, (cl_float) c3, (cl_float) c4, (cl_float) c5, 0.0f, 0.0f);
}
params->upload(paramVector);
// Mark that the current reordering may be invalid.
cl.invalidateMolecules(info);
}
class OpenCLCalcCMAPTorsionForceKernel::ForceInfo : public OpenCLForceInfo {
public:
ForceInfo(const CMAPTorsionForce& force) : OpenCLForceInfo(0), force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int map, a1, a2, a3, a4, b1, b2, b3, b4;
force.getTorsionParameters(index, map, a1, a2, a3, a4, b1, b2, b3, b4);
particles.resize(8);
particles[0] = a1;
particles[1] = a2;
particles[2] = a3;
particles[3] = a4;
particles[4] = b1;
particles[5] = b2;
particles[6] = b3;
particles[7] = b4;
}
bool areGroupsIdentical(int group1, int group2) {
int map1, map2, a1, a2, a3, a4, b1, b2, b3, b4;
force.getTorsionParameters(group1, map1, a1, a2, a3, a4, b1, b2, b3, b4);
force.getTorsionParameters(group2, map2, a1, a2, a3, a4, b1, b2, b3, b4);
return (map1 == map2);
}
private:
const CMAPTorsionForce& force;
};
OpenCLCalcCMAPTorsionForceKernel::~OpenCLCalcCMAPTorsionForceKernel() {
if (coefficients != NULL)
delete coefficients;
if (mapPositions != NULL)
delete mapPositions;
if (torsionMaps != NULL)
delete torsionMaps;
}
void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
int numMaps = force.getNumMaps();
vector coeffVec;
mapPositionsVec.resize(numMaps);
vector energy;
vector > c;
int currentPosition = 0;
for (int i = 0; i < numMaps; i++) {
int size;
force.getMapParameters(i, size, energy);
CMAPTorsionForceImpl::calcMapDerivatives(size, energy, c);
mapPositionsVec[i] = mm_int2(currentPosition, size);
currentPosition += 4*size*size;
for (int j = 0; j < size*size; j++) {
coeffVec.push_back(mm_float4((float) c[j][0], (float) c[j][1], (float) c[j][2], (float) c[j][3]));
coeffVec.push_back(mm_float4((float) c[j][4], (float) c[j][5], (float) c[j][6], (float) c[j][7]));
coeffVec.push_back(mm_float4((float) c[j][8], (float) c[j][9], (float) c[j][10], (float) c[j][11]));
coeffVec.push_back(mm_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
}
}
vector > atoms(numTorsions, vector(8));
vector torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++)
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
coefficients = OpenCLArray::create(cl, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions = OpenCLArray::create(cl, numMaps, "cmapTorsionMapPositions");
torsionMaps = OpenCLArray::create(cl, numTorsions, "cmapTorsionMaps");
coefficients->upload(coeffVec);
mapPositions->upload(mapPositionsVec);
torsionMaps->upload(torsionMapsVec);
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COEFF"] = cl.getBondedUtilities().addArgument(coefficients->getDeviceBuffer(), "float4");
replacements["MAP_POS"] = cl.getBondedUtilities().addArgument(mapPositions->getDeviceBuffer(), "int2");
replacements["MAPS"] = cl.getBondedUtilities().addArgument(torsionMaps->getDeviceBuffer(), "int");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cl.addForce(info);
}
double OpenCLCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void OpenCLCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force) {
int numMaps = force.getNumMaps();
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (mapPositions->getSize() != numMaps)
throw OpenMMException("updateParametersInContext: The number of maps has changed");
if (torsionMaps->getSize() != numTorsions)
throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed");
// Update the maps.
vector coeffVec;
vector energy;
vector > c;
int currentPosition = 0;
for (int i = 0; i < numMaps; i++) {
int size;
force.getMapParameters(i, size, energy);
if (size != mapPositionsVec[i].y)
throw OpenMMException("updateParametersInContext: The size of a map has changed");
CMAPTorsionForceImpl::calcMapDerivatives(size, energy, c);
currentPosition += 4*size*size;
for (int j = 0; j < size*size; j++) {
coeffVec.push_back(mm_float4((float) c[j][0], (float) c[j][1], (float) c[j][2], (float) c[j][3]));
coeffVec.push_back(mm_float4((float) c[j][4], (float) c[j][5], (float) c[j][6], (float) c[j][7]));
coeffVec.push_back(mm_float4((float) c[j][8], (float) c[j][9], (float) c[j][10], (float) c[j][11]));
coeffVec.push_back(mm_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
}
}
coefficients->upload(coeffVec);
// Update the indices.
vector torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int index[8];
force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]);
}
torsionMaps->upload(torsionMapsVec);
}
class OpenCLCalcCustomTorsionForceKernel::ForceInfo : public OpenCLForceInfo {
public:
ForceInfo(const CustomTorsionForce& force) : OpenCLForceInfo(0), force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3, particle4;
vector parameters;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, parameters);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4;
vector parameters1, parameters2;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, parameters1);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomTorsionForce& force;
};
OpenCLCalcCustomTorsionForceKernel::~OpenCLCalcCustomTorsionForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
vector > atoms(numTorsions, vector(4));
params = new OpenCLParameterSet(cl, force.getNumPerTorsionParameters(), numTorsions, "customTorsionParams");
vector > paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
vector parameters;
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
}
params->setParameterValues(paramVector);
info = new ForceInfo(force);
cl.addForce(info);
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("theta").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["real dEdAngle = "] = forceExpression;
// Create the kernels.
map variables;
variables["theta"] = "theta";
for (int i = 0; i < force.getNumPerTorsionParameters(); i++) {
const string& name = force.getPerTorsionParameterName(i);
variables[name] = "torsionParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = OpenCLArray::create(cl, force.getNumGlobalParameters(), "customTorsionGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cl.intToString(i)+"]";
variables[name] = value;
}
}
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string paramName = force.getEnergyParameterDerivativeName(i);
string derivVariable = cl.getBondedUtilities().addEnergyParameterDerivative(paramName);
Lepton::ParsedExpression derivExpression = energyExpression.differentiate(paramName).optimize();
expressions[derivVariable+" += "] = derivExpression;
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string argName = cl.getBondedUtilities().addArgument(buffer.getMemory(), buffer.getType());
compute< functions;
vector > functionNames;
compute << cl.getExpressionUtilities().createExpressions(expressions, variables, functions, functionNames, "temp");
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = compute.str();
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::torsionForce, replacements), force.getForceGroup());
}
double OpenCLCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
return 0.0;
}
void OpenCLCalcCustomTorsionForceKernel::copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force) {
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cl.getContextIndex()+1)*force.getNumTorsions()/numContexts;
if (numTorsions != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of torsions has changed");
if (numTorsions == 0)
return;
// Record the per-torsion parameters.
vector > paramVector(numTorsions);
vector parameters;
for (int i = 0; i < numTorsions; i++) {
int atom1, atom2, atom3, atom4;
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (cl_float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cl.invalidateMolecules(info);
}
class OpenCLCalcNonbondedForceKernel::ForceInfo : public OpenCLForceInfo {
public:
ForceInfo(int requiredBuffers, const NonbondedForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, sigma1, sigma2, epsilon1, epsilon2;
force.getParticleParameters(particle1, charge1, sigma1, epsilon1);
force.getParticleParameters(particle2, charge2, sigma2, epsilon2);
return (charge1 == charge2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
int getNumParticleGroups() {
return force.getNumExceptions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(index, particle1, particle2, chargeProd, sigma, epsilon);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
double chargeProd1, chargeProd2, sigma1, sigma2, epsilon1, epsilon2;
force.getExceptionParameters(group1, particle1, particle2, chargeProd1, sigma1, epsilon1);
force.getExceptionParameters(group2, particle1, particle2, chargeProd2, sigma2, epsilon2);
return (chargeProd1 == chargeProd2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
private:
const NonbondedForce& force;
};
class OpenCLCalcNonbondedForceKernel::PmeIO : public CalcPmeReciprocalForceKernel::IO {
public:
PmeIO(OpenCLContext& cl, cl::Kernel addForcesKernel) : cl(cl), addForcesKernel(addForcesKernel), forceTemp(NULL) {
forceTemp = OpenCLArray::create(cl, cl.getNumAtoms(), "PmeForce");
addForcesKernel.setArg(0, forceTemp->getDeviceBuffer());
}
~PmeIO() {
if (forceTemp != NULL)
delete forceTemp;
}
float* getPosq() {
cl.getPosq().download(posq);
return (float*) &posq[0];
}
void setForce(float* force) {
forceTemp->upload(force);
addForcesKernel.setArg(1, cl.getForce().getDeviceBuffer());
cl.executeKernel(addForcesKernel, cl.getNumAtoms());
}
private:
OpenCLContext& cl;
vector posq;
OpenCLArray* forceTemp;
cl::Kernel addForcesKernel;
};
class OpenCLCalcNonbondedForceKernel::PmePreComputation : public OpenCLContext::ForcePreComputation {
public:
PmePreComputation(OpenCLContext& cl, Kernel& pme, CalcPmeReciprocalForceKernel::IO& io) : cl(cl), pme(pme), io(io) {
}
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
Vec3 boxVectors[3] = {Vec3(cl.getPeriodicBoxSize().x, 0, 0), Vec3(0, cl.getPeriodicBoxSize().y, 0), Vec3(0, 0, cl.getPeriodicBoxSize().z)};
pme.getAs().beginComputation(io, boxVectors, includeEnergy);
}
private:
OpenCLContext& cl;
Kernel pme;
CalcPmeReciprocalForceKernel::IO& io;
};
class OpenCLCalcNonbondedForceKernel::PmePostComputation : public OpenCLContext::ForcePostComputation {
public:
PmePostComputation(Kernel& pme, CalcPmeReciprocalForceKernel::IO& io) : pme(pme), io(io) {
}
double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
return pme.getAs().finishComputation(io);
}
private:
Kernel pme;
CalcPmeReciprocalForceKernel::IO& io;
};
class OpenCLCalcNonbondedForceKernel::SyncQueuePreComputation : public OpenCLContext::ForcePreComputation {
public:
SyncQueuePreComputation(OpenCLContext& cl, cl::CommandQueue queue, int forceGroup) : cl(cl), queue(queue), forceGroup(forceGroup) {
}
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
if ((groups&(1< events(1);
cl.getQueue().enqueueMarker(&events[0]);
queue.enqueueWaitForEvents(events);
}
}
private:
OpenCLContext& cl;
cl::CommandQueue queue;
int forceGroup;
};
class OpenCLCalcNonbondedForceKernel::SyncQueuePostComputation : public OpenCLContext::ForcePostComputation {
public:
SyncQueuePostComputation(OpenCLContext& cl, cl::Event& event, OpenCLArray& pmeEnergyBuffer, int forceGroup) : cl(cl), event(event),
pmeEnergyBuffer(pmeEnergyBuffer), forceGroup(forceGroup) {
}
void setKernel(cl::Kernel kernel) {
addEnergyKernel = kernel;
addEnergyKernel.setArg(0, pmeEnergyBuffer.getDeviceBuffer());
addEnergyKernel.setArg(1, cl.getEnergyBuffer().getDeviceBuffer());
addEnergyKernel.setArg(2, pmeEnergyBuffer.getSize());
}
double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
if ((groups&(1< events(1);
events[0] = event;
event = cl::Event();
cl.getQueue().enqueueWaitForEvents(events);
if (includeEnergy)
cl.executeKernel(addEnergyKernel, pmeEnergyBuffer.getSize());
}
return 0.0;
}
private:
OpenCLContext& cl;
cl::Event& event;
cl::Kernel addEnergyKernel;
OpenCLArray& pmeEnergyBuffer;
int forceGroup;
};
OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
if (exceptionParams != NULL)
delete exceptionParams;
if (cosSinSums != NULL)
delete cosSinSums;
if (pmeGrid != NULL)
delete pmeGrid;
if (pmeGrid2 != NULL)
delete pmeGrid2;
if (pmeBsplineModuliX != NULL)
delete pmeBsplineModuliX;
if (pmeBsplineModuliY != NULL)
delete pmeBsplineModuliY;
if (pmeBsplineModuliZ != NULL)
delete pmeBsplineModuliZ;
if (pmeDispersionBsplineModuliX != NULL)
delete pmeDispersionBsplineModuliX;
if (pmeDispersionBsplineModuliY != NULL)
delete pmeDispersionBsplineModuliY;
if (pmeDispersionBsplineModuliZ != NULL)
delete pmeDispersionBsplineModuliZ;
if (pmeBsplineTheta != NULL)
delete pmeBsplineTheta;
if (pmeAtomRange != NULL)
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex;
if (pmeEnergyBuffer != NULL)
delete pmeEnergyBuffer;
if (sort != NULL)
delete sort;
if (fft != NULL)
delete fft;
if (dispersionFft != NULL)
delete dispersionFft;
if (pmeio != NULL)
delete pmeio;
}
void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
// Identify which exceptions are 1-4 interactions.
vector > exclusions;
vector exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
exclusions.push_back(pair(particle1, particle2));
if (chargeProd != 0.0 || epsilon != 0.0)
exceptions.push_back(i);
}
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
sigmaEpsilon = OpenCLArray::create(cl, cl.getPaddedNumAtoms(), "sigmaEpsilon");
vector posqf(cl.getPaddedNumAtoms(), mm_float4(0,0,0,0));
vector posqd(cl.getPaddedNumAtoms(), mm_double4(0,0,0,0));
vector sigmaEpsilonVector(cl.getPaddedNumAtoms(), mm_float2(0,0));
vector > exclusionList(numParticles);
double sumSquaredCharges = 0.0;
double sumSquaredC6 = 0.0;
hasCoulomb = false;
hasLJ = false;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
if (cl.getUseDoublePrecision())
posqd[i] = mm_double4(0, 0, 0, charge);
else
posqf[i] = mm_float4(0, 0, 0, (float) charge);
double sig = 0.5*sigma;
double eps = 2.0*sqrt(epsilon);
sigmaEpsilonVector[i] = mm_float2((float) sig, (float) eps);
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
hasLJ = true;
}
for (auto exclusion : exclusions) {
exclusionList[exclusion.first].push_back(exclusion.second);
exclusionList[exclusion.second].push_back(exclusion.first);
}
if (cl.getUseDoublePrecision())
cl.getPosq().upload(posqd);
else
cl.getPosq().upload(posqf);
sigmaEpsilon->upload(sigmaEpsilonVector);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
doLJPME = (nonbondedMethod == LJPME);
map defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
defines["USE_LJ_SWITCH"] = (useCutoff && force.getUseSwitchingFunction() ? "1" : "0");
if (useCutoff) {
// Compute the reaction field constants.
double reactionFieldK = pow(force.getCutoffDistance(), -3.0)*(force.getReactionFieldDielectric()-1.0)/(2.0*force.getReactionFieldDielectric()+1.0);
double reactionFieldC = (1.0 / force.getCutoffDistance())*(3.0*force.getReactionFieldDielectric())/(2.0*force.getReactionFieldDielectric()+1.0);
defines["REACTION_FIELD_K"] = cl.doubleToString(reactionFieldK);
defines["REACTION_FIELD_C"] = cl.doubleToString(reactionFieldC);
// Compute the switching coefficients.
if (force.getUseSwitchingFunction()) {
defines["LJ_SWITCH_CUTOFF"] = cl.doubleToString(force.getSwitchingDistance());
defines["LJ_SWITCH_C3"] = cl.doubleToString(10/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 3.0));
defines["LJ_SWITCH_C4"] = cl.doubleToString(15/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 4.0));
defines["LJ_SWITCH_C5"] = cl.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0));
}
}
if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0 && !doLJPME)
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else
dispersionCoefficient = 0.0;
alpha = 0;
ewaldSelfEnergy = 0.0;
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
int kmaxx, kmaxy, kmaxz;
NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
defines["EWALD_ALPHA"] = cl.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
if (cl.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
// Create the reciprocal space kernels.
map replacements;
replacements["NUM_ATOMS"] = cl.intToString(numParticles);
replacements["KMAX_X"] = cl.intToString(kmaxx);
replacements["KMAX_Y"] = cl.intToString(kmaxy);
replacements["KMAX_Z"] = cl.intToString(kmaxz);
replacements["EXP_COEFFICIENT"] = cl.doubleToString(-1.0/(4.0*alpha*alpha));
cl::Program program = cl.createProgram(OpenCLKernelSources::ewald, replacements);
ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums");
ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2));
cosSinSums = new OpenCLArray(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
// Compute the PME parameters.
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = OpenCLFFT3D::findLegalDimension(gridSizeX);
gridSizeY = OpenCLFFT3D::findLegalDimension(gridSizeY);
gridSizeZ = OpenCLFFT3D::findLegalDimension(gridSizeZ);
if (doLJPME) {
NonbondedForceImpl::calcPMEParameters(system, force, dispersionAlpha, dispersionGridSizeX,
dispersionGridSizeY, dispersionGridSizeZ, true);
dispersionGridSizeX = OpenCLFFT3D::findLegalDimension(dispersionGridSizeX);
dispersionGridSizeY = OpenCLFFT3D::findLegalDimension(dispersionGridSizeY);
dispersionGridSizeZ = OpenCLFFT3D::findLegalDimension(dispersionGridSizeZ);
}
defines["EWALD_ALPHA"] = cl.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
defines["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
defines["EWALD_DISPERSION_ALPHA"] = cl.doubleToString(dispersionAlpha);
if (cl.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
if (doLJPME)
ewaldSelfEnergy += pow(dispersionAlpha, 6)*sumSquaredC6/12.0;
pmeDefines["PME_ORDER"] = cl.intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = cl.intToString(numParticles);
pmeDefines["RECIP_EXP_FACTOR"] = cl.doubleToString(M_PI*M_PI/(alpha*alpha));
pmeDefines["GRID_SIZE_X"] = cl.intToString(gridSizeX);
pmeDefines["GRID_SIZE_Y"] = cl.intToString(gridSizeY);
pmeDefines["GRID_SIZE_Z"] = cl.intToString(gridSizeZ);
pmeDefines["EPSILON_FACTOR"] = cl.doubleToString(sqrt(ONE_4PI_EPS0));
pmeDefines["M_PI"] = cl.doubleToString(M_PI);
bool deviceIsCpu = (cl.getDevice().getInfo() == CL_DEVICE_TYPE_CPU);
if (deviceIsCpu)
pmeDefines["DEVICE_IS_CPU"] = "1";
if (cl.getPlatformData().useCpuPme && !doLJPME) {
// Create the CPU PME kernel.
try {
cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cl.getPlatformData().context);
cpuPme.getAs().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha, false);
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
cl::Kernel addForcesKernel = cl::Kernel(program, "addForces");
pmeio = new PmeIO(cl, addForcesKernel);
cl.addPreComputation(new PmePreComputation(cl, cpuPme, *pmeio));
cl.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
}
catch (OpenMMException& ex) {
// The CPU PME plugin isn't available.
}
}
if (pmeio == NULL) {
// Create required data structures.
if (doLJPME) {
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
double dar2 = dalphaR*dalphaR;
double dar4 = dar2*dar2;
double multShift6 = -invRCut6*(1.0 - exp(-dar2) * (1.0 + dar2 + 0.5*dar4));
defines["INVCUT6"] = cl.doubleToString(invRCut6);
defines["MULTSHIFT6"] = cl.doubleToString(multShift6);
}
int elementSize = (cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
if (doLJPME)
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
pmeGrid = new OpenCLArray(cl, gridElements, 2*elementSize, "pmeGrid");
pmeGrid2 = new OpenCLArray(cl, gridElements, 2*elementSize, "pmeGrid2");
if (cl.getSupports64BitGlobalAtomics())
cl.addAutoclearBuffer(*pmeGrid2);
else
cl.addAutoclearBuffer(*pmeGrid);
pmeBsplineModuliX = new OpenCLArray(cl, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new OpenCLArray(cl, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new OpenCLArray(cl, gridSizeZ, elementSize, "pmeBsplineModuliZ");
if (doLJPME) {
pmeDispersionBsplineModuliX = new OpenCLArray(cl, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY = new OpenCLArray(cl, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ = new OpenCLArray(cl, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
}
pmeBsplineTheta = new OpenCLArray(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
pmeAtomRange = OpenCLArray::create(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = OpenCLArray::create(cl, numParticles, "pmeAtomGridIndex");
int energyElementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer = new OpenCLArray(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cl.clearBuffer(*pmeEnergyBuffer);
sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms());
fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ, true);
if (doLJPME)
dispersionFft = new OpenCLFFT3D(cl, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, true);
string vendor = cl.getDevice().getInfo();
bool isNvidia = (vendor.size() >= 6 && vendor.substr(0, 6) == "NVIDIA");
if (isNvidia)
pmeDefines["USE_ALTERNATE_MEMORY_ACCESS_PATTERN"] = "1";
usePmeQueue = (!cl.getPlatformData().disablePmeStream && isNvidia);
if (usePmeQueue) {
pmeDefines["USE_PME_STREAM"] = "1";
pmeQueue = cl::CommandQueue(cl.getContext(), cl.getDevice());
int recipForceGroup = force.getReciprocalSpaceForceGroup();
if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup();
cl.addPreComputation(new SyncQueuePreComputation(cl, pmeQueue, recipForceGroup));
cl.addPostComputation(syncQueue = new SyncQueuePostComputation(cl, pmeSyncEvent, *pmeEnergyBuffer, recipForceGroup));
}
// Initialize the b-spline moduli.
for (int grid = 0; grid < 2; grid++) {
int xsize, ysize, zsize;
OpenCLArray *xmoduli, *ymoduli, *zmoduli;
if (grid == 0) {
xsize = gridSizeX;
ysize = gridSizeY;
zsize = gridSizeZ;
xmoduli = pmeBsplineModuliX;
ymoduli = pmeBsplineModuliY;
zmoduli = pmeBsplineModuliZ;
}
else {
if (!doLJPME)
continue;
xsize = dispersionGridSizeX;
ysize = dispersionGridSizeY;
zsize = dispersionGridSizeZ;
xmoduli = pmeDispersionBsplineModuliX;
ymoduli = pmeDispersionBsplineModuliY;
zmoduli = pmeDispersionBsplineModuliZ;
}
int maxSize = max(max(xsize, ysize), zsize);
vector data(PmeOrder);
vector ddata(PmeOrder);
vector bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
}
// Differentiate.
ddata[0] = -data[0];
for (int i = 1; i < PmeOrder; i++)
ddata[i] = data[i-1]-data[i];
double div = 1.0/(PmeOrder-1);
data[PmeOrder-1] = 0.0;
for (int i = 1; i < (PmeOrder-1); i++)
data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
data[0] = div*data[0];
for (int i = 0; i < maxSize; i++)
bsplines_data[i] = 0.0;
for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = data[i-1];
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector moduli(ndata);
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
for (int j = 0; j < ndata; j++) {
double arg = (2.0*M_PI*i*j)/ndata;
sc += bsplines_data[j]*cos(arg);
ss += bsplines_data[j]*sin(arg);
}
moduli[i] = (float) (sc*sc+ss*ss);
}
for (int i = 0; i < ndata; i++)
{
if (moduli[i] < 1.0e-7)
moduli[i] = (moduli[i-1]+moduli[i+1])*0.5f;
}
if (cl.getUseDoublePrecision()) {
if (dim == 0)
xmoduli->upload(moduli);
else if (dim == 1)
ymoduli->upload(moduli);
else
zmoduli->upload(moduli);
}
else {
vector modulif(ndata);
for (int i = 0; i < ndata; i++)
modulif[i] = (float) moduli[i];
if (dim == 0)
xmoduli->upload(modulif);
else if (dim == 1)
ymoduli->upload(modulif);
else
zmoduli->upload(modulif);
}
}
}
}
}
}
// Add the interaction to the default nonbonded kernel.
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
if (hasLJ)
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(cl_float2), sigmaEpsilon->getDeviceBuffer()));
// Initialize the exceptions.
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cl.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions);
vector > atoms(numExceptions, vector(2));
exceptionParams = OpenCLArray::create(cl, numExceptions, "exceptionParams");
vector exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
exceptionParams->upload(exceptionParamsVector);
map replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exceptionParams->getDeviceBuffer(), "float4");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
cl.addForce(info);
}
double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
bool deviceIsCpu = (cl.getDevice().getInfo() == CL_DEVICE_TYPE_CPU);
if (!hasInitializedKernel) {
hasInitializedKernel = true;
if (cosSinSums != NULL) {
ewaldSumsKernel.setArg(0, cl.getEnergyBuffer().getDeviceBuffer());
ewaldSumsKernel.setArg(1, cl.getPosq().getDeviceBuffer());
ewaldSumsKernel.setArg(2, cosSinSums->getDeviceBuffer());
ewaldForcesKernel.setArg(0, cl.getForceBuffers().getDeviceBuffer());
ewaldForcesKernel.setArg(1, cl.getPosq().getDeviceBuffer());
ewaldForcesKernel.setArg(2, cosSinSums->getDeviceBuffer());
}
if (pmeGrid != NULL) {
// Create kernels for Coulomb PME.
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
pmeUpdateBsplinesKernel = cl::Kernel(program, "updateBsplines");
pmeAtomRangeKernel = cl::Kernel(program, "findAtomRangeForGrid");
pmeZIndexKernel = cl::Kernel(program, "recordZIndex");
pmeSpreadChargeKernel = cl::Kernel(program, "gridSpreadCharge");
pmeConvolutionKernel = cl::Kernel(program, "reciprocalConvolution");
pmeEvalEnergyKernel = cl::Kernel(program, "gridEvaluateEnergy");
pmeInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
pmeUpdateBsplinesKernel.setArg(0, cl.getPosq().getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg(1, pmeBsplineTheta->getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL);
pmeUpdateBsplinesKernel.setArg(3, pmeAtomGridIndex->getDeviceBuffer());
pmeAtomRangeKernel.setArg(0, pmeAtomGridIndex->getDeviceBuffer());
pmeAtomRangeKernel.setArg(1, pmeAtomRange->getDeviceBuffer());
pmeAtomRangeKernel.setArg(2, cl.getPosq().getDeviceBuffer());
pmeZIndexKernel.setArg(0, pmeAtomGridIndex->getDeviceBuffer());
pmeZIndexKernel.setArg(1, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg(0, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg(1, pmeAtomGridIndex->getDeviceBuffer());
pmeSpreadChargeKernel.setArg(2, pmeAtomRange->getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics())
pmeSpreadChargeKernel.setArg(3, pmeGrid2->getDeviceBuffer());
else
pmeSpreadChargeKernel.setArg(3, pmeGrid->getDeviceBuffer());
pmeSpreadChargeKernel.setArg(4, pmeBsplineTheta->getDeviceBuffer());
pmeConvolutionKernel.setArg(0, pmeGrid2->getDeviceBuffer());
pmeConvolutionKernel.setArg(1, pmeBsplineModuliX->getDeviceBuffer());
pmeConvolutionKernel.setArg(2, pmeBsplineModuliY->getDeviceBuffer());
pmeConvolutionKernel.setArg(3, pmeBsplineModuliZ->getDeviceBuffer());
pmeEvalEnergyKernel.setArg(0, pmeGrid2->getDeviceBuffer());
pmeEvalEnergyKernel.setArg(1, usePmeQueue ? pmeEnergyBuffer->getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer());
pmeEvalEnergyKernel.setArg(2, pmeBsplineModuliX->getDeviceBuffer());
pmeEvalEnergyKernel.setArg(3, pmeBsplineModuliY->getDeviceBuffer());
pmeEvalEnergyKernel.setArg(4, pmeBsplineModuliZ->getDeviceBuffer());
pmeInterpolateForceKernel.setArg(0, cl.getPosq().getDeviceBuffer());
pmeInterpolateForceKernel.setArg(1, cl.getForceBuffers().getDeviceBuffer());
pmeInterpolateForceKernel.setArg(2, pmeGrid->getDeviceBuffer());
pmeInterpolateForceKernel.setArg(11, pmeAtomGridIndex->getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) {
pmeFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeFinishSpreadChargeKernel.setArg(0, pmeGrid2->getDeviceBuffer());
pmeFinishSpreadChargeKernel.setArg(1, pmeGrid->getDeviceBuffer());
}
if (usePmeQueue)
syncQueue->setKernel(cl::Kernel(program, "addEnergy"));
if (doLJPME) {
// Create kernels for LJ PME.
pmeDefines["EWALD_ALPHA"] = cl.doubleToString(dispersionAlpha);
pmeDefines["GRID_SIZE_X"] = cl.intToString(dispersionGridSizeX);
pmeDefines["GRID_SIZE_Y"] = cl.intToString(dispersionGridSizeY);
pmeDefines["GRID_SIZE_Z"] = cl.intToString(dispersionGridSizeZ);
pmeDefines["EPSILON_FACTOR"] = "1";
pmeDefines["RECIP_EXP_FACTOR"] = cl.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha));
pmeDefines["USE_LJPME"] = "1";
program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
pmeDispersionUpdateBsplinesKernel = cl::Kernel(program, "updateBsplines");
pmeDispersionAtomRangeKernel = cl::Kernel(program, "findAtomRangeForGrid");
pmeDispersionZIndexKernel = cl::Kernel(program, "recordZIndex");
pmeDispersionSpreadChargeKernel = cl::Kernel(program, "gridSpreadCharge");
pmeDispersionConvolutionKernel = cl::Kernel(program, "reciprocalConvolution");
pmeDispersionEvalEnergyKernel = cl::Kernel(program, "gridEvaluateEnergy");
pmeDispersionInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
pmeDispersionUpdateBsplinesKernel.setArg(0, cl.getPosq().getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg(1, pmeBsplineTheta->getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg(2, OpenCLContext::ThreadBlockSize*PmeOrder*elementSize, NULL);
pmeDispersionUpdateBsplinesKernel.setArg(3, pmeAtomGridIndex->getDeviceBuffer());
pmeDispersionUpdateBsplinesKernel.setArg(12, sigmaEpsilon->getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg(0, pmeAtomGridIndex->getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg(1, pmeAtomRange->getDeviceBuffer());
pmeDispersionAtomRangeKernel.setArg(2, cl.getPosq().getDeviceBuffer());
pmeDispersionZIndexKernel.setArg(0, pmeAtomGridIndex->getDeviceBuffer());
pmeDispersionZIndexKernel.setArg(1, cl.getPosq().getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg(0, cl.getPosq().getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg(1, pmeAtomGridIndex->getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg(2, pmeAtomRange->getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics())
pmeDispersionSpreadChargeKernel.setArg(3, pmeGrid2->getDeviceBuffer());
else
pmeDispersionSpreadChargeKernel.setArg(3, pmeGrid->getDeviceBuffer());
pmeDispersionSpreadChargeKernel.setArg(4, pmeBsplineTheta->getDeviceBuffer());
if (deviceIsCpu || cl.getSupports64BitGlobalAtomics())
pmeDispersionSpreadChargeKernel.setArg(13, sigmaEpsilon->getDeviceBuffer());
else
pmeDispersionSpreadChargeKernel.setArg(5, sigmaEpsilon->getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg(0, pmeGrid2->getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg(1, pmeDispersionBsplineModuliX->getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg(2, pmeDispersionBsplineModuliY->getDeviceBuffer());
pmeDispersionConvolutionKernel.setArg(3, pmeDispersionBsplineModuliZ->getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg(0, pmeGrid2->getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg(1, usePmeQueue ? pmeEnergyBuffer->getDeviceBuffer() : cl.getEnergyBuffer().getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg(2, pmeDispersionBsplineModuliX->getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg(3, pmeDispersionBsplineModuliY->getDeviceBuffer());
pmeDispersionEvalEnergyKernel.setArg(4, pmeDispersionBsplineModuliZ->getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg(0, cl.getPosq().getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg(1, cl.getForceBuffers().getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg(2, pmeGrid->getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg(11, pmeAtomGridIndex->getDeviceBuffer());
pmeDispersionInterpolateForceKernel.setArg(12, sigmaEpsilon->getDeviceBuffer());
if (cl.getSupports64BitGlobalAtomics()) {
pmeDispersionFinishSpreadChargeKernel = cl::Kernel(program, "finishSpreadCharge");
pmeDispersionFinishSpreadChargeKernel.setArg(0, pmeGrid2->getDeviceBuffer());
pmeDispersionFinishSpreadChargeKernel.setArg(1, pmeGrid->getDeviceBuffer());
}
}
}
}
if (cosSinSums != NULL && includeReciprocal) {
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
mm_double4 recipBoxSize = mm_double4(2*M_PI/boxSize.x, 2*M_PI/boxSize.y, 2*M_PI/boxSize.z, 0.0);
double recipCoefficient = ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z);
if (cl.getUseDoublePrecision()) {
ewaldSumsKernel.setArg(3, recipBoxSize);
ewaldSumsKernel.setArg(4, recipCoefficient);
ewaldForcesKernel.setArg(3, recipBoxSize);
ewaldForcesKernel.setArg(4, recipCoefficient);
}
else {
ewaldSumsKernel.setArg(3, mm_float4((float) recipBoxSize.x, (float) recipBoxSize.y, (float) recipBoxSize.z, 0));
ewaldSumsKernel.setArg(4, (cl_float) recipCoefficient);
ewaldForcesKernel.setArg(3, mm_float4((float) recipBoxSize.x, (float) recipBoxSize.y, (float) recipBoxSize.z, 0));
ewaldForcesKernel.setArg(4, (cl_float) recipCoefficient);
}
cl.executeKernel(ewaldSumsKernel, cosSinSums->getSize());
cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms());
}
if (pmeGrid != NULL && includeReciprocal) {
if (usePmeQueue && !includeEnergy)
cl.setQueue(pmeQueue);
// Invert the periodic box vectors.
Vec3 boxVectors[3];
cl.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
double determinant = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2];
double scale = 1.0/determinant;
mm_double4 recipBoxVectors[3];
recipBoxVectors[0] = mm_double4(boxVectors[1][1]*boxVectors[2][2]*scale, 0, 0, 0);
recipBoxVectors[1] = mm_double4(-boxVectors[1][0]*boxVectors[2][2]*scale, boxVectors[0][0]*boxVectors[2][2]*scale, 0, 0);
recipBoxVectors[2] = mm_double4((boxVectors[1][0]*boxVectors[2][1]-boxVectors[1][1]*boxVectors[2][0])*scale, -boxVectors[0][0]*boxVectors[2][1]*scale, boxVectors[0][0]*boxVectors[1][1]*scale, 0);
mm_float4 recipBoxVectorsFloat[3];
for (int i = 0; i < 3; i++)
recipBoxVectorsFloat[i] = mm_float4((float) recipBoxVectors[i].x, (float) recipBoxVectors[i].y, (float) recipBoxVectors[i].z, 0);
// Execute the reciprocal space kernels.
setPeriodicBoxArgs(cl, pmeUpdateBsplinesKernel, 4);
if (cl.getUseDoublePrecision()) {
pmeUpdateBsplinesKernel.setArg(9, recipBoxVectors[0]);
pmeUpdateBsplinesKernel.setArg(10, recipBoxVectors[1]);
pmeUpdateBsplinesKernel.setArg(11, recipBoxVectors[2]);
}
else {
pmeUpdateBsplinesKernel.setArg(9, recipBoxVectorsFloat[0]);
pmeUpdateBsplinesKernel.setArg(10, recipBoxVectorsFloat[1]);
pmeUpdateBsplinesKernel.setArg(11, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg(10, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg(11, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg(12, recipBoxVectors[2]);
}
else {
pmeSpreadChargeKernel.setArg(10, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg