/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "CudaKernels.h"
#include "CudaForceInfo.h"
#include "openmm/LangevinIntegrator.h"
#include "openmm/Context.h"
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/CMAPTorsionForceImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomCompoundBondForceImpl.h"
#include "openmm/internal/CustomHbondForceImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h"
#include "CudaIntegrationUtilities.h"
#include "CudaNonbondedUtilities.h"
#include "CudaKernelSources.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "../src/SimTKUtilities/SimTKOpenMMUtilities.h"
#include
#include
using namespace OpenMM;
using namespace std;
using Lepton::ExpressionTreeNode;
using Lepton::Operation;
static bool isZeroExpression(const Lepton::ParsedExpression& expression) {
const Lepton::Operation& op = expression.getRootNode().getOperation();
if (op.getId() != Lepton::Operation::CONSTANT)
return false;
return (dynamic_cast(op).getValue() == 0.0);
}
static bool usesVariable(const Lepton::ExpressionTreeNode& node, const string& variable) {
const Lepton::Operation& op = node.getOperation();
if (op.getId() == Lepton::Operation::VARIABLE && op.getName() == variable)
return true;
for (int i = 0; i < (int) node.getChildren().size(); i++)
if (usesVariable(node.getChildren()[i], variable))
return true;
return false;
}
static bool usesVariable(const Lepton::ParsedExpression& expression, const string& variable) {
return usesVariable(expression.getRootNode(), variable);
}
static pair makeVariable(const string& name, const string& value) {
return make_pair(ExpressionTreeNode(new Operation::Variable(name)), value);
}
void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
}
void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
cu.setAsCurrent();
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
bool includeNonbonded = ((groups&(1<& contexts = cu.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->setTime(time);
}
void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector& positions) {
cu.setAsCurrent();
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
double4 periodicBoxSize = cu.getPeriodicBoxSize();
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
double4 pos = posq[i];
int4 offset = cu.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x-offset.x*periodicBoxSize.x, pos.y-offset.y*periodicBoxSize.y, pos.z-offset.z*periodicBoxSize.z);
}
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
float4 pos = posq[i];
int4 offset = cu.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x-offset.x*periodicBoxSize.x, pos.y-offset.y*periodicBoxSize.y, pos.z-offset.z*periodicBoxSize.z);
}
}
}
void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector& positions) {
cu.setAsCurrent();
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
double4& pos = posq[i];
const Vec3& p = positions[order[i]];
pos.x = p[0];
pos.y = p[1];
pos.z = p[2];
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
posq[i] = make_double4(0.0, 0.0, 0.0, 0.0);
cu.getPosq().upload(posq);
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
float4& pos = posq[i];
const Vec3& p = positions[order[i]];
pos.x = p[0];
pos.y = p[1];
pos.z = p[2];
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
posq[i] = make_float4(0.0, 0.0, 0.0, 0.0);
cu.getPosq().upload(posq);
}
for (int i = 0; i < (int) cu.getPosCellOffsets().size(); i++)
cu.getPosCellOffsets()[i] = make_int4(0, 0, 0, 0);
}
void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, vector& velocities) {
cu.setAsCurrent();
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
velocities.resize(numParticles);
if (cu.getUseDoublePrecision()) {
double4* velm = (double4*) cu.getPinnedBuffer();
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
double4 vel = velm[i];
int4 offset = cu.getPosCellOffsets()[i];
velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
}
}
else {
float4* velm = (float4*) cu.getPinnedBuffer();
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
float4 vel = velm[i];
int4 offset = cu.getPosCellOffsets()[i];
velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
}
}
}
void CudaUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector& velocities) {
cu.setAsCurrent();
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision()) {
double4* velm = (double4*) cu.getPinnedBuffer();
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
double4& vel = velm[i];
const Vec3& p = velocities[order[i]];
vel.x = p[0];
vel.y = p[1];
vel.z = p[2];
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
velm[i] = make_double4(0.0, 0.0, 0.0, 0.0);
cu.getVelm().upload(velm);
}
else {
float4* velm = (float4*) cu.getPinnedBuffer();
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
float4& vel = velm[i];
const Vec3& p = velocities[order[i]];
vel.x = p[0];
vel.y = p[1];
vel.z = p[2];
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
velm[i] = make_float4(0.0, 0.0, 0.0, 0.0);
cu.getVelm().upload(velm);
}
}
void CudaUpdateStateDataKernel::getForces(ContextImpl& context, vector& forces) {
cu.setAsCurrent();
long long* force = (long long*) cu.getPinnedBuffer();
cu.getForce().download(force);
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
int paddedNumParticles = cu.getPaddedNumAtoms();
forces.resize(numParticles);
double scale = 1.0/(double) 0xFFFFFFFF;
for (int i = 0; i < numParticles; ++i)
forces[order[i]] = Vec3(scale*force[i], scale*force[i+paddedNumParticles], scale*force[i+paddedNumParticles*2]);
}
void CudaUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const {
double4 box = cu.getPeriodicBoxSize();
a = Vec3(box.x, 0, 0);
b = Vec3(0, box.y, 0);
c = Vec3(0, 0, box.z);
}
void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const {
vector& contexts = cu.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->setPeriodicBoxSize(a[0], b[1], c[2]);
}
void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
cu.setAsCurrent();
int version = 1;
stream.write((char*) &version, sizeof(int));
double time = cu.getTime();
stream.write((char*) &time, sizeof(double));
int stepCount = cu.getStepCount();
stream.write((char*) &stepCount, sizeof(int));
int computeForceCount = cu.getComputeForceCount();
stream.write((char*) &computeForceCount, sizeof(int));
int bufferSize = cu.getPaddedNumAtoms()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
char* buffer = (char*) cu.getPinnedBuffer();
cu.getPosq().download(buffer);
stream.write(buffer, bufferSize);
cu.getVelm().download(buffer);
stream.write(buffer, bufferSize);
stream.write((char*) &cu.getAtomIndex()[0], sizeof(int)*cu.getAtomIndex().size());
stream.write((char*) &cu.getPosCellOffsets()[0], sizeof(int4)*cu.getPosCellOffsets().size());
double4 box = cu.getPeriodicBoxSize();
stream.write((char*) &box, sizeof(double4));
cu.getIntegrationUtilities().createCheckpoint(stream);
SimTKOpenMMUtilities::createCheckpoint(stream);
}
void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
cu.setAsCurrent();
int version;
stream.read((char*) &version, sizeof(int));
if (version != 1)
throw OpenMMException("Checkpoint was created with a different version of OpenMM");
double time;
stream.read((char*) &time, sizeof(double));
int stepCount, computeForceCount;
stream.read((char*) &stepCount, sizeof(int));
stream.read((char*) &computeForceCount, sizeof(int));
vector& contexts = cu.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++) {
contexts[i]->setTime(time);
contexts[i]->setStepCount(stepCount);
contexts[i]->setComputeForceCount(computeForceCount);
}
int bufferSize = cu.getPaddedNumAtoms()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
char* buffer = (char*) cu.getPinnedBuffer();
stream.read(buffer, bufferSize);
cu.getPosq().upload(buffer);
stream.read(buffer, bufferSize);
cu.getVelm().upload(buffer);
stream.read((char*) &cu.getAtomIndex()[0], sizeof(int)*cu.getAtomIndex().size());
cu.getAtomIndexArray().upload(cu.getAtomIndex());
stream.read((char*) &cu.getPosCellOffsets()[0], sizeof(int4)*cu.getPosCellOffsets().size());
double4 box;
stream.read((char*) &box, sizeof(double4));
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->setPeriodicBoxSize(box.x, box.y, box.z);
cu.getIntegrationUtilities().loadCheckpoint(stream);
SimTKOpenMMUtilities::loadCheckpoint(stream);
for (int i = 0; i < cu.getReorderListeners().size(); i++)
cu.getReorderListeners()[i]->execute();
}
void CudaApplyConstraintsKernel::initialize(const System& system) {
}
void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
map defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
CUmodule module = cu.createModule(CudaKernelSources::constraints, defines);
applyDeltasKernel = cu.getKernel(module, "applyPositionDeltas");
}
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
cu.clearBuffer(integration.getPosDelta());
integration.applyConstraints(tol);
void* args[] = {&cu.getPosq().getDevicePointer(), &cu.getIntegrationUtilities().getPosDelta().getDevicePointer()};
cu.executeKernel(applyDeltasKernel, args, cu.getNumAtoms());
integration.computeVirtualSites();
}
void CudaVirtualSitesKernel::initialize(const System& system) {
}
void CudaVirtualSitesKernel::computePositions(ContextImpl& context) {
cu.getIntegrationUtilities().computeVirtualSites();
}
class CudaHarmonicBondForceInfo : public CudaForceInfo {
public:
CudaHarmonicBondForceInfo(const HarmonicBondForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
double length, k;
force.getBondParameters(index, particle1, particle2, length, k);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
double length1, length2, k1, k2;
force.getBondParameters(group1, particle1, particle2, length1, k1);
force.getBondParameters(group2, particle1, particle2, length2, k2);
return (length1 == length2 && k1 == k2);
}
private:
const HarmonicBondForce& force;
};
CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
numBonds = endIndex-startIndex;
if (numBonds == 0)
return;
vector > atoms(numBonds, vector(2));
params = CudaArray::create(cu, numBonds, "bondParams");
vector paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
double length, k;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
paramVector[i] = make_float2((float) length, (float) k);
}
params->upload(paramVector);
map replacements;
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicBondForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup());
cu.addForce(new CudaHarmonicBondForceInfo(force));
}
double CudaCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
if (numBonds != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of bonds has changed");
// Record the per-bond parameters.
vector paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
int atom1, atom2;
double length, k;
force.getBondParameters(startIndex+i, atom1, atom2, length, k);
paramVector[i] = make_float2((float) length, (float) k);
}
params->upload(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCustomBondForceInfo : public CudaForceInfo {
public:
CudaCustomBondForceInfo(const CustomBondForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
vector parameters;
force.getBondParameters(index, particle1, particle2, parameters);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
vector parameters1, parameters2;
force.getBondParameters(group1, particle1, particle2, parameters1);
force.getBondParameters(group2, particle1, particle2, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomBondForce& force;
};
CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
numBonds = endIndex-startIndex;
if (numBonds == 0)
return;
vector > atoms(numBonds, vector(2));
params = new CudaParameterSet(cu, force.getNumPerBondParameters(), numBonds, "customBondParams");
vector > paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
vector parameters;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
cu.addForce(new CudaCustomBondForceInfo(force));
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdR = "] = forceExpression;
// Create the kernels.
map variables;
variables["r"] = "r";
for (int i = 0; i < force.getNumPerBondParameters(); i++) {
const string& name = force.getPerBondParameterName(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create(cu, force.getNumGlobalParameters(), "customBondGlobals");
globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
variables[name] = value;
}
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string argName = cu.getBondedUtilities().addArgument(buffer.getMemory(), buffer.getType());
compute< > functions;
compute << cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup());
}
double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
return 0.0;
}
void CudaCalcCustomBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomBondForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
if (numBonds != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of bonds has changed");
// Record the per-bond parameters.
vector > paramVector(numBonds);
vector parameters;
for (int i = 0; i < numBonds; i++) {
int atom1, atom2;
force.getBondParameters(startIndex+i, atom1, atom2, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaHarmonicAngleForceInfo : public CudaForceInfo {
public:
CudaHarmonicAngleForceInfo(const HarmonicAngleForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3;
double angle, k;
force.getAngleParameters(index, particle1, particle2, particle3, angle, k);
particles.resize(3);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3;
double angle1, angle2, k1, k2;
force.getAngleParameters(group1, particle1, particle2, particle3, angle1, k1);
force.getAngleParameters(group2, particle1, particle2, particle3, angle2, k2);
return (angle1 == angle2 && k1 == k2);
}
private:
const HarmonicAngleForce& force;
};
CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0)
return;
vector > atoms(numAngles, vector(3));
params = CudaArray::create(cu, numAngles, "angleParams");
vector paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
double angle, k;
force.getAngleParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], angle, k);
paramVector[i] = make_float2((float) angle, (float) k);
}
params->upload(paramVector);
map replacements;
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicAngleForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup());
cu.addForce(new CudaHarmonicAngleForceInfo(force));
}
double CudaCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
if (numAngles != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of angles has changed");
// Record the per-angle parameters.
vector paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
int atom1, atom2, atom3;
double angle, k;
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
paramVector[i] = make_float2((float) angle, (float) k);
}
params->upload(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCustomAngleForceInfo : public CudaForceInfo {
public:
CudaCustomAngleForceInfo(const CustomAngleForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3;
vector parameters;
force.getAngleParameters(index, particle1, particle2, particle3, parameters);
particles.resize(3);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3;
vector parameters1, parameters2;
force.getAngleParameters(group1, particle1, particle2, particle3, parameters1);
force.getAngleParameters(group2, particle1, particle2, particle3, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomAngleForce& force;
};
CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0)
return;
vector > atoms(numAngles, vector(3));
params = new CudaParameterSet(cu, force.getNumPerAngleParameters(), numAngles, "customAngleParams");
vector > paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
vector parameters;
force.getAngleParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
cu.addForce(new CudaCustomAngleForceInfo(force));
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("theta").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdAngle = "] = forceExpression;
// Create the kernels.
map variables;
variables["theta"] = "theta";
for (int i = 0; i < force.getNumPerAngleParameters(); i++) {
const string& name = force.getPerAngleParameterName(i);
variables[name] = "angleParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create(cu, force.getNumGlobalParameters(), "customAngleGlobals");
globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
variables[name] = value;
}
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string argName = cu.getBondedUtilities().addArgument(buffer.getMemory(), buffer.getType());
compute< > functions;
compute << cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup());
}
double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
return 0.0;
}
void CudaCalcCustomAngleForceKernel::copyParametersToContext(ContextImpl& context, const CustomAngleForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
if (numAngles != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of angles has changed");
// Record the per-angle parameters.
vector > paramVector(numAngles);
vector parameters;
for (int i = 0; i < numAngles; i++) {
int atom1, atom2, atom3;
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaPeriodicTorsionForceInfo : public CudaForceInfo {
public:
CudaPeriodicTorsionForceInfo(const PeriodicTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3, particle4, periodicity;
double phase, k;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, periodicity, phase, k);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4, periodicity1, periodicity2;
double phase1, phase2, k1, k2;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, periodicity1, phase1, k1);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, periodicity2, phase2, k2);
return (periodicity1 == periodicity2 && phase1 == phase2 && k1 == k2);
}
private:
const PeriodicTorsionForce& force;
};
CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
vector > atoms(numTorsions, vector(4));
params = CudaArray::create(cu, numTorsions, "periodicTorsionParams");
vector paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int periodicity;
double phase, k;
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
}
params->upload(paramVector);
map replacements;
replacements["COMPUTE_FORCE"] = CudaKernelSources::periodicTorsionForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
cu.addForce(new CudaPeriodicTorsionForceInfo(force));
}
double CudaCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
if (numTorsions != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of torsions has changed");
// Record the per-torsion parameters.
vector paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int atom1, atom2, atom3, atom4, periodicity;
double phase, k;
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
}
params->upload(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaRBTorsionForceInfo : public CudaForceInfo {
public:
CudaRBTorsionForceInfo(const RBTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3, particle4;
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4;
double c0a, c0b, c1a, c1b, c2a, c2b, c3a, c3b, c4a, c4b, c5a, c5b;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, c0a, c1a, c2a, c3a, c4a, c5a);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, c0b, c1b, c2b, c3b, c4b, c5b);
return (c0a == c0b && c1a == c1b && c2a == c2b && c3a == c3b && c4a == c4b && c5a == c5b);
}
private:
const RBTorsionForce& force;
};
CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
cu.setAsCurrent();
if (params1 != NULL)
delete params1;
if (params2 != NULL)
delete params2;
}
void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
vector > atoms(numTorsions, vector(4));
params1 = CudaArray::create(cu, numTorsions, "rbTorsionParams1");
params2 = CudaArray::create(cu, numTorsions, "rbTorsionParams2");
vector paramVector1(numTorsions);
vector paramVector2(numTorsions);
for (int i = 0; i < numTorsions; i++) {
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], c0, c1, c2, c3, c4, c5);
paramVector1[i] = make_float4((float) c0, (float) c1, (float) c2, (float) c3);
paramVector2[i] = make_float2((float) c4, (float) c5);
}
params1->upload(paramVector1);
params2->upload(paramVector2);
map replacements;
replacements["COMPUTE_FORCE"] = CudaKernelSources::rbTorsionForce;
replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1->getDevicePointer(), "float4");
replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2->getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
cu.addForce(new CudaRBTorsionForceInfo(force));
}
double CudaCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context, const RBTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
if (numTorsions != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of torsions has changed");
// Record the per-torsion parameters.
vector paramVector1(numTorsions);
vector paramVector2(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int atom1, atom2, atom3, atom4;
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, c0, c1, c2, c3, c4, c5);
paramVector1[i] = make_float4((float) c0, (float) c1, (float) c2, (float) c3);
paramVector2[i] = make_float2((float) c4, (float) c5);
}
params1->upload(paramVector1);
params2->upload(paramVector2);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCMAPTorsionForceInfo : public CudaForceInfo {
public:
CudaCMAPTorsionForceInfo(const CMAPTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int map, a1, a2, a3, a4, b1, b2, b3, b4;
force.getTorsionParameters(index, map, a1, a2, a3, a4, b1, b2, b3, b4);
particles.resize(8);
particles[0] = a1;
particles[1] = a2;
particles[2] = a3;
particles[3] = a4;
particles[4] = b1;
particles[5] = b2;
particles[6] = b3;
particles[7] = b4;
}
bool areGroupsIdentical(int group1, int group2) {
int map1, map2, a1, a2, a3, a4, b1, b2, b3, b4;
force.getTorsionParameters(group1, map1, a1, a2, a3, a4, b1, b2, b3, b4);
force.getTorsionParameters(group2, map2, a1, a2, a3, a4, b1, b2, b3, b4);
return (map1 == map2);
}
private:
const CMAPTorsionForce& force;
};
CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() {
if (coefficients != NULL)
delete coefficients;
if (mapPositions != NULL)
delete mapPositions;
if (torsionMaps != NULL)
delete torsionMaps;
}
void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
int numMaps = force.getNumMaps();
vector coeffVec;
vector mapPositionsVec(numMaps);
vector energy;
vector > c;
int currentPosition = 0;
for (int i = 0; i < numMaps; i++) {
int size;
force.getMapParameters(i, size, energy);
CMAPTorsionForceImpl::calcMapDerivatives(size, energy, c);
mapPositionsVec[i] = make_int2(currentPosition, size);
currentPosition += 4*size*size;
for (int j = 0; j < size*size; j++) {
coeffVec.push_back(make_float4((float) c[j][0], (float) c[j][1], (float) c[j][2], (float) c[j][3]));
coeffVec.push_back(make_float4((float) c[j][4], (float) c[j][5], (float) c[j][6], (float) c[j][7]));
coeffVec.push_back(make_float4((float) c[j][8], (float) c[j][9], (float) c[j][10], (float) c[j][11]));
coeffVec.push_back(make_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
}
}
vector > atoms(numTorsions, vector(8));
vector torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++)
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
coefficients = CudaArray::create(cu, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions = CudaArray::create(cu, numMaps, "cmapTorsionMapPositions");
torsionMaps = CudaArray::create(cu, numTorsions, "cmapTorsionMaps");
coefficients->upload(coeffVec);
mapPositions->upload(mapPositionsVec);
torsionMaps->upload(torsionMapsVec);
map replacements;
replacements["COEFF"] = cu.getBondedUtilities().addArgument(coefficients->getDevicePointer(), "float4");
replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions->getDevicePointer(), "int2");
replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps->getDevicePointer(), "int");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
cu.addForce(new CudaCMAPTorsionForceInfo(force));
}
double CudaCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
class CudaCustomTorsionForceInfo : public CudaForceInfo {
public:
CudaCustomTorsionForceInfo(const CustomTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3, particle4;
vector parameters;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, parameters);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4;
vector parameters1, parameters2;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, parameters1);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomTorsionForce& force;
};
CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
vector > atoms(numTorsions, vector(4));
params = new CudaParameterSet(cu, force.getNumPerTorsionParameters(), numTorsions, "customTorsionParams");
vector > paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
vector parameters;
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
cu.addForce(new CudaCustomTorsionForceInfo(force));
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("theta").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdAngle = "] = forceExpression;
// Create the kernels.
map variables;
variables["theta"] = "theta";
for (int i = 0; i < force.getNumPerTorsionParameters(); i++) {
const string& name = force.getPerTorsionParameterName(i);
variables[name] = "torsionParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create(cu, force.getNumGlobalParameters(), "customTorsionGlobals");
globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
variables[name] = value;
}
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string argName = cu.getBondedUtilities().addArgument(buffer.getMemory(), buffer.getType());
compute< > functions;
compute << cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
}
double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
return 0.0;
}
void CudaCalcCustomTorsionForceKernel::copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
if (numTorsions != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of torsions has changed");
// Record the per-torsion parameters.
vector > paramVector(numTorsions);
vector parameters;
for (int i = 0; i < numTorsions; i++) {
int atom1, atom2, atom3, atom4;
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaNonbondedForceInfo : public CudaForceInfo {
public:
CudaNonbondedForceInfo(const NonbondedForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, sigma1, sigma2, epsilon1, epsilon2;
force.getParticleParameters(particle1, charge1, sigma1, epsilon1);
force.getParticleParameters(particle2, charge2, sigma2, epsilon2);
return (charge1 == charge2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
int getNumParticleGroups() {
return force.getNumExceptions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(index, particle1, particle2, chargeProd, sigma, epsilon);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
double chargeProd1, chargeProd2, sigma1, sigma2, epsilon1, epsilon2;
force.getExceptionParameters(group1, particle1, particle2, chargeProd1, sigma1, epsilon1);
force.getExceptionParameters(group2, particle1, particle2, chargeProd2, sigma2, epsilon2);
return (chargeProd1 == chargeProd2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
private:
const NonbondedForce& force;
};
CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
cu.setAsCurrent();
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
if (exceptionParams != NULL)
delete exceptionParams;
if (cosSinSums != NULL)
delete cosSinSums;
if (pmeGrid != NULL)
delete pmeGrid;
if (pmeBsplineModuliX != NULL)
delete pmeBsplineModuliX;
if (pmeBsplineModuliY != NULL)
delete pmeBsplineModuliY;
if (pmeBsplineModuliZ != NULL)
delete pmeBsplineModuliZ;
if (pmeBsplineTheta != NULL)
delete pmeBsplineTheta;
if (pmeBsplineDTheta != NULL)
delete pmeBsplineDTheta;
if (pmeAtomRange != NULL)
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex;
if (sort != NULL)
delete sort;
if (hasInitializedFFT)
cufftDestroy(fft);
}
/**
* Select a size for an FFT that is a multiple of 2, 3, 5, and 7.
*/
static int findFFTDimension(int minimum) {
if (minimum < 1)
return 1;
while (true) {
// Attempt to factor the current value.
int unfactored = minimum;
for (int factor = 2; factor < 8; factor++) {
while (unfactored > 1 && unfactored%factor == 0)
unfactored /= factor;
}
if (unfactored == 1)
return minimum;
minimum++;
}
}
void CudaCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
cu.setAsCurrent();
// Identify which exceptions are 1-4 interactions.
vector > exclusions;
vector exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
exclusions.push_back(pair(particle1, particle2));
if (chargeProd != 0.0 || epsilon != 0.0)
exceptions.push_back(i);
}
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
sigmaEpsilon = CudaArray::create(cu, numParticles, "sigmaEpsilon");
CudaArray& posq = cu.getPosq();
float4* posqf = (float4*) cu.getPinnedBuffer();
double4* posqd = (double4*) cu.getPinnedBuffer();
vector sigmaEpsilonVector(numParticles);
vector > exclusionList(numParticles);
double sumSquaredCharges = 0.0;
hasCoulomb = false;
hasLJ = false;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
if (cu.getUseDoublePrecision())
posqd[i] = make_double4(0, 0, 0, charge);
else
posqf[i] = make_float4(0, 0, 0, (float) charge);
sigmaEpsilonVector[i] = make_float2((float) (0.5*sigma), (float) (2.0*sqrt(epsilon)));
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
hasLJ = true;
}
for (int i = 0; i < (int) exclusions.size(); i++) {
exclusionList[exclusions[i].first].push_back(exclusions[i].second);
exclusionList[exclusions[i].second].push_back(exclusions[i].first);
}
posq.upload(cu.getPinnedBuffer());
sigmaEpsilon->upload(sigmaEpsilonVector);
bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic);
map defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
if (useCutoff) {
// Compute the reaction field constants.
double reactionFieldK = pow(force.getCutoffDistance(), -3.0)*(force.getReactionFieldDielectric()-1.0)/(2.0*force.getReactionFieldDielectric()+1.0);
double reactionFieldC = (1.0 / force.getCutoffDistance())*(3.0*force.getReactionFieldDielectric())/(2.0*force.getReactionFieldDielectric()+1.0);
defines["REACTION_FIELD_K"] = cu.doubleToString(reactionFieldK);
defines["REACTION_FIELD_C"] = cu.doubleToString(reactionFieldC);
}
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0)
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else
dispersionCoefficient = 0.0;
alpha = 0;
if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
// Compute the Ewald parameters.
int kmaxx, kmaxy, kmaxz;
NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
// Create the reciprocal space kernels.
map replacements;
replacements["NUM_ATOMS"] = cu.intToString(numParticles);
replacements["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
replacements["KMAX_X"] = cu.intToString(kmaxx);
replacements["KMAX_Y"] = cu.intToString(kmaxy);
replacements["KMAX_Z"] = cu.intToString(kmaxz);
replacements["EXP_COEFFICIENT"] = cu.doubleToString(-1.0/(4.0*alpha*alpha));
replacements["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0);
replacements["M_PI"] = cu.doubleToString(M_PI);
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::ewald, replacements);
ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
cosSinSums = new CudaArray(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
else if (force.getNonbondedMethod() == NonbondedForce::PME) {
// Compute the PME parameters.
int gridSizeX, gridSizeY, gridSizeZ;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
gridSizeX = findFFTDimension(gridSizeX);
gridSizeY = findFFTDimension(gridSizeY);
gridSizeZ = findFFTDimension(gridSizeZ);
defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = cu.intToString(numParticles);
pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(alpha*alpha));
pmeDefines["GRID_SIZE_X"] = cu.intToString(gridSizeX);
pmeDefines["GRID_SIZE_Y"] = cu.intToString(gridSizeY);
pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(sqrt(ONE_4PI_EPS0));
pmeDefines["M_PI"] = cu.doubleToString(M_PI);
if (cu.getUseDoublePrecision())
pmeDefines["USE_DOUBLE_PRECISION"] = "1";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
pmeUpdateBsplinesKernel = cu.getKernel(module, "updateBsplines");
pmeAtomRangeKernel = cu.getKernel(module, "findAtomRangeForGrid");
pmeSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge");
pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
pmeInterpolateForceKernel = cu.getKernel(module, "gridInterpolateForce");
pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
// Create required data structures.
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
pmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid");
cu.addAutoclearBuffer(*pmeGrid);
pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
pmeBsplineTheta = new CudaArray(cu, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
pmeAtomRange = CudaArray::create(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = CudaArray::create(cu, numParticles, "pmeAtomGridIndex");
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
cufftResult result = cufftPlan3d(&fft, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2Z : CUFFT_C2C);
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
hasInitializedFFT = true;
// Initialize the b-spline moduli.
int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
vector data(PmeOrder);
vector ddata(PmeOrder);
vector bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
}
// Differentiate.
ddata[0] = -data[0];
for (int i = 1; i < PmeOrder; i++)
ddata[i] = data[i-1]-data[i];
double div = 1.0/(PmeOrder-1);
data[PmeOrder-1] = 0.0;
for (int i = 1; i < (PmeOrder-1); i++)
data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
data[0] = div*data[0];
for (int i = 0; i < maxSize; i++)
bsplines_data[i] = 0.0;
for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = data[i-1];
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
vector moduli(ndata);
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
for (int j = 0; j < ndata; j++) {
double arg = (2.0*M_PI*i*j)/ndata;
sc += bsplines_data[j]*cos(arg);
ss += bsplines_data[j]*sin(arg);
}
moduli[i] = sc*sc+ss*ss;
}
for (int i = 0; i < ndata; i++)
if (moduli[i] < 1.0e-7)
moduli[i] = (moduli[i-1]+moduli[i+1])*0.5;
if (cu.getUseDoublePrecision()) {
if (dim == 0)
pmeBsplineModuliX->upload(moduli);
else if (dim == 1)
pmeBsplineModuliY->upload(moduli);
else
pmeBsplineModuliZ->upload(moduli);
}
else {
vector modulif(ndata);
for (int i = 0; i < ndata; i++)
modulif[i] = (float) moduli[i];
if (dim == 0)
pmeBsplineModuliX->upload(modulif);
else if (dim == 1)
pmeBsplineModuliY->upload(modulif);
else
pmeBsplineModuliZ->upload(modulif);
}
}
}
else
ewaldSelfEnergy = 0.0;
// Add the interaction to the default nonbonded kernel.
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
if (hasLJ)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon->getDevicePointer()));
// Initialize the exceptions.
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions);
vector > atoms(numExceptions, vector(2));
exceptionParams = CudaArray::create(cu, numExceptions, "exceptionParams");
vector exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
exceptionParams->upload(exceptionParamsVector);
map replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams->getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
cu.addForce(new CudaNonbondedForceInfo(force));
}
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
if (cosSinSums != NULL && cu.getContextIndex() == 0 && includeReciprocal) {
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums->getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums->getSize());
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums->getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms());
}
if (pmeGrid != NULL && cu.getContextIndex() == 0 && includeReciprocal) {
void* bsplinesArgs[] = {&cu.getPosq().getDevicePointer(), &pmeBsplineTheta->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(),
cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()};
int bsplinesSharedSize = cu.ThreadBlockSize*PmeOrder*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
cu.executeKernel(pmeUpdateBsplinesKernel, bsplinesArgs, cu.getNumAtoms(), cu.ThreadBlockSize, bsplinesSharedSize);
sort->sort(*pmeAtomGridIndex);
void* rangeArgs[] = {&pmeAtomGridIndex->getDevicePointer(), &pmeAtomRange->getDevicePointer(), &cu.getPosq().getDevicePointer(),
cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeAtomRangeKernel, rangeArgs, cu.getNumAtoms());
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &pmeGrid->getDevicePointer(), &pmeBsplineTheta->getDevicePointer(),
cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), PmeOrder*PmeOrder*PmeOrder);
void* finishSpreadArgs[] = {&pmeGrid->getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, pmeGrid->getSize());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
void* convolutionArgs[] = {&pmeGrid->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), &pmeBsplineModuliX->getDevicePointer(),
&pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(), cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeConvolutionKernel, convolutionArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &pmeGrid->getDevicePointer(),
cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms());
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (dispersionCoefficient != 0.0 && includeDirect) {
double4 boxSize = cu.getPeriodicBoxSize();
energy += dispersionCoefficient/(boxSize.x*boxSize.y*boxSize.z);
}
return energy;
}
void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force) {
// Make sure the new parameters are acceptable.
cu.setAsCurrent();
if (force.getNumParticles() != cu.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
if (!hasCoulomb || !hasLJ) {
for (int i = 0; i < force.getNumParticles(); i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
if (!hasCoulomb && charge != 0.0)
throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Coulomb interactions, because all charges were originally 0");
if (!hasLJ && epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Lennard-Jones interactions, because all epsilons were originally 0");
}
}
vector exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
if (exceptionAtoms.size() > exceptions.size() && make_pair(particle1, particle2) == exceptionAtoms[exceptions.size()])
exceptions.push_back(i);
else if (chargeProd != 0.0 || epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
}
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
// Record the per-particle parameters.
CudaArray& posq = cu.getPosq();
posq.download(cu.getPinnedBuffer());
float4* posqf = (float4*) cu.getPinnedBuffer();
double4* posqd = (double4*) cu.getPinnedBuffer();
vector sigmaEpsilonVector(force.getNumParticles());
double sumSquaredCharges = 0.0;
const vector& order = cu.getAtomIndex();
for (int i = 0; i < force.getNumParticles(); i++) {
int index = order[i];
double charge, sigma, epsilon;
force.getParticleParameters(index, charge, sigma, epsilon);
if (cu.getUseDoublePrecision())
posqd[i].w = charge;
else
posqf[i].w = (float) charge;
sigmaEpsilonVector[index] = make_float2((float) (0.5*sigma), (float) (2.0*sqrt(epsilon)));
sumSquaredCharges += charge*charge;
}
posq.upload(cu.getPinnedBuffer());
sigmaEpsilon->upload(sigmaEpsilonVector);
// Record the exceptions.
if (numExceptions > 0) {
vector > atoms(numExceptions, vector(2));
vector exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
}
exceptionParams->upload(exceptionParamsVector);
}
// Compute other values.
NonbondedForce::NonbondedMethod method = force.getNonbondedMethod();
if (method == NonbondedForce::Ewald || method == NonbondedForce::PME)
ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (method == NonbondedForce::CutoffPeriodic || method == NonbondedForce::Ewald || method == NonbondedForce::PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cu.invalidateMolecules();
}
class CudaCustomNonbondedForceInfo : public CudaForceInfo {
public:
CudaCustomNonbondedForceInfo(const CustomNonbondedForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
vector params1;
vector params2;
force.getParticleParameters(particle1, params1);
force.getParticleParameters(particle2, params2);
for (int i = 0; i < (int) params1.size(); i++)
if (params1[i] != params2[i])
return false;
return true;
}
int getNumParticleGroups() {
return force.getNumExclusions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
force.getExclusionParticles(index, particle1, particle2);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
return true;
}
private:
const CustomNonbondedForce& force;
};
CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (tabulatedFunctionParams != NULL)
delete tabulatedFunctionParams;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i];
}
void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
cu.setAsCurrent();
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
string prefix = "custom"+cu.intToString(forceIndex)+"_";
// Record parameters and exclusions.
int numParticles = force.getNumParticles();
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0)
globals = CudaArray::create(cu, force.getNumGlobalParameters(), "customNonbondedGlobals");
vector > paramVector(numParticles);
vector > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
vector parameters;
force.getParticleParameters(i, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
exclusionList[i].push_back(i);
}
for (int i = 0; i < force.getNumExclusions(); i++) {
int particle1, particle2;
force.getExclusionParticles(i, particle1, particle2);
exclusionList[particle1].push_back(particle2);
exclusionList[particle2].push_back(particle1);
}
params->setParameterValues(paramVector);
// Record the tabulated functions.
CudaExpressionUtilities::FunctionPlaceholder fp;
map functions;
vector > functionDefinitions;
vector tabulatedFunctionParamsVec(force.getNumFunctions());
for (int i = 0; i < force.getNumFunctions(); i++) {
string name;
vector values;
double min, max;
force.getFunctionParameters(i, name, values, min, max);
string arrayName = prefix+"table"+cu.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = &fp;
tabulatedFunctionParamsVec[i] = make_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
tabulatedFunctions.push_back(CudaArray::create(cu, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer()));
}
if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = CudaArray::create(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters");
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(float4), tabulatedFunctionParams->getDevicePointer()));
}
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
map forceExpressions;
forceExpressions["tempEnergy += "] = energyExpression;
forceExpressions["tempForce -= "] = forceExpression;
// Create the kernels.
vector > variables;
ExpressionTreeNode rnode(new Operation::Variable("r"));
variables.push_back(make_pair(rnode, "r"));
variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2"));
variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR"));
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables.push_back(makeVariable(name+"1", prefix+"params"+params->getParameterSuffix(i, "1")));
variables.push_back(makeVariable(name+"2", prefix+"params"+params->getParameterSuffix(i, "2")));
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cu.intToString(i)+"]";
variables.push_back(makeVariable(name, prefix+value));
}
stringstream compute;
compute << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, prefix+"temp", prefix+"functionParams");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
string source = cu.replaceStrings(CudaKernelSources::customNonbonded, replacements);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
if (globals != NULL) {
globals->upload(globalParamValues);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer()));
}
cu.addForce(new CudaCustomNonbondedForceInfo(force));
}
double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
return 0.0;
}
void CudaCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force) {
cu.setAsCurrent();
int numParticles = force.getNumParticles();
if (numParticles != cu.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
// Record the per-particle parameters.
vector > paramVector(numParticles);
vector parameters;
for (int i = 0; i < numParticles; i++) {
force.getParticleParameters(i, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaGBSAOBCForceInfo : public CudaForceInfo {
public:
CudaGBSAOBCForceInfo(const GBSAOBCForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, radius1, radius2, scale1, scale2;
force.getParticleParameters(particle1, charge1, radius1, scale1);
force.getParticleParameters(particle2, charge2, radius2, scale2);
return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
}
private:
const GBSAOBCForce& force;
};
CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (bornSum != NULL)
delete bornSum;
if (bornRadii != NULL)
delete bornRadii;
if (bornForce != NULL)
delete bornForce;
if (obcChain != NULL)
delete obcChain;
}
void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
cu.setAsCurrent();
if (cu.getPlatformData().contexts.size() > 1)
throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
params = CudaArray::create(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
if (cu.getUseDoublePrecision()) {
bornRadii = CudaArray::create(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain = CudaArray::create(cu, cu.getPaddedNumAtoms(), "obcChain");
}
else {
bornRadii = CudaArray::create(cu, cu.getPaddedNumAtoms(), "bornRadii");
obcChain = CudaArray::create(cu, cu.getPaddedNumAtoms(), "obcChain");
}
bornSum = CudaArray::create(cu, cu.getPaddedNumAtoms(), "bornSum");
bornForce = CudaArray::create(cu, cu.getPaddedNumAtoms(), "bornForce");
cu.addAutoclearBuffer(*bornSum);
cu.addAutoclearBuffer(*bornForce);
CudaArray& posq = cu.getPosq();
float4* posqf = (float4*) cu.getPinnedBuffer();
double4* posqd = (double4*) cu.getPinnedBuffer();
vector paramsVector(cu.getPaddedNumAtoms());
const double dielectricOffset = 0.009;
for (int i = 0; i < force.getNumParticles(); i++) {
double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor);
radius -= dielectricOffset;
paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
if (cu.getUseDoublePrecision())
posqd[i] = make_double4(0, 0, 0, charge);
else
posqf[i] = make_float4(0, 0, 0, (float) charge);
}
posq.upload(cu.getPinnedBuffer());
params->upload(paramsVector);
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
string source = CudaKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, force.getCutoffDistance(), vector >(), source, force.getForceGroup());
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params->getDevicePointer()));;
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce->getDevicePointer()));;
cu.addForce(new CudaGBSAOBCForceInfo(force));
}
double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
if (!hasCreatedKernels) {
// These Kernels cannot be created in initialize(), because the CudaNonbondedUtilities has not been initialized yet then.
hasCreatedKernels = true;
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2);
map defines;
if (nb.getUseCutoff())
defines["USE_CUTOFF"] = "1";
if (nb.getUsePeriodic())
defines["USE_PERIODIC"] = "1";
if (cu.getComputeCapability() >= 3.0 && !cu.getUseDoublePrecision())
defines["ENABLE_SHUFFLE"] = "1";
defines["CUTOFF_SQUARED"] = cu.doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance());
defines["PREFACTOR"] = cu.doubleToString(prefactor);
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
defines["FORCE_WORK_GROUP_SIZE"] = cu.intToString(nb.getForceThreadBlockSize());
map replacements;
stringstream defineAccum;
if (cu.getAccumulateInDouble()) {
defineAccum << "typedef double accum;\n";
defineAccum << "typedef double4 accum4;\n";
defines["make_accum4"] = "make_double4";
}
else {
defineAccum << "typedef real accum;\n";
defineAccum << "typedef real4 accum4;\n";
defines["make_accum4"] = "make_real4";
}
replacements["DEFINE_ACCUM"] = defineAccum.str();
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::gbsaObc1, replacements), defines);
computeBornSumKernel = cu.getKernel(module, "computeBornSum");
computeSumArgs.push_back(&bornSum->getDevicePointer());
computeSumArgs.push_back(&cu.getPosq().getDevicePointer());
computeSumArgs.push_back(¶ms->getDevicePointer());
if (nb.getUseCutoff()) {
computeSumArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
computeSumArgs.push_back(&nb.getInteractionCount().getDevicePointer());
computeSumArgs.push_back(cu.getPeriodicBoxSizePointer());
computeSumArgs.push_back(cu.getInvPeriodicBoxSizePointer());
computeSumArgs.push_back(&maxTiles);
computeSumArgs.push_back(&nb.getInteractionFlags().getDevicePointer());
}
else
computeSumArgs.push_back(&maxTiles);
computeSumArgs.push_back(&nb.getExclusionIndices().getDevicePointer());
computeSumArgs.push_back(&nb.getExclusionRowIndices().getDevicePointer());
force1Kernel = cu.getKernel(module, "computeGBSAForce1");
force1Args.push_back(&cu.getForce().getDevicePointer());
force1Args.push_back(&bornForce->getDevicePointer());
force1Args.push_back(&cu.getEnergyBuffer().getDevicePointer());
force1Args.push_back(&cu.getPosq().getDevicePointer());
force1Args.push_back(&bornRadii->getDevicePointer());
if (nb.getUseCutoff()) {
force1Args.push_back(&nb.getInteractingTiles().getDevicePointer());
force1Args.push_back(&nb.getInteractionCount().getDevicePointer());
force1Args.push_back(cu.getPeriodicBoxSizePointer());
force1Args.push_back(cu.getInvPeriodicBoxSizePointer());
force1Args.push_back(&maxTiles);
force1Args.push_back(&nb.getInteractionFlags().getDevicePointer());
}
else
force1Args.push_back(&maxTiles);
force1Args.push_back(&nb.getExclusionIndices().getDevicePointer());
force1Args.push_back(&nb.getExclusionRowIndices().getDevicePointer());
reduceBornSumKernel = cu.getKernel(module, "reduceBornSum");
reduceBornForceKernel = cu.getKernel(module, "reduceBornForce");
}
if (nb.getUseCutoff()) {
if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize();
computeSumArgs[3] = &nb.getInteractingTiles().getDevicePointer();
force1Args[5] = &nb.getInteractingTiles().getDevicePointer();
computeSumArgs[8] = &nb.getInteractionFlags().getDevicePointer();
force1Args[10] = &nb.getInteractionFlags().getDevicePointer();
}
}
cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
float alpha = 1.0f, beta = 0.8f, gamma = 4.85f;
void* reduceSumArgs[] = {&alpha, &beta, &gamma, &bornSum->getDevicePointer(), ¶ms->getDevicePointer(),
&bornRadii->getDevicePointer(), &obcChain->getDevicePointer()};
cu.executeKernel(reduceBornSumKernel, reduceSumArgs, cu.getPaddedNumAtoms());
cu.executeKernel(force1Kernel, &force1Args[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
void* reduceForceArgs[] = {&bornForce->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), ¶ms->getDevicePointer(),
&bornRadii->getDevicePointer(), &obcChain->getDevicePointer()};
cu.executeKernel(reduceBornForceKernel, &reduceForceArgs[0], cu.getPaddedNumAtoms());
return 0.0;
}
void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force) {
// Make sure the new parameters are acceptable.
cu.setAsCurrent();
int numParticles = force.getNumParticles();
if (numParticles != cu.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
// Record the per-particle parameters.
CudaArray& posq = cu.getPosq();
float4* posqf = (float4*) cu.getPinnedBuffer();
double4* posqd = (double4*) cu.getPinnedBuffer();
posq.download(cu.getPinnedBuffer());
vector paramsVector(cu.getPaddedNumAtoms());
const double dielectricOffset = 0.009;
for (int i = 0; i < numParticles; i++) {
double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor);
radius -= dielectricOffset;
paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
if (cu.getUseDoublePrecision())
posqd[i].w = charge;
else
posqf[i].w = (float) charge;
}
posq.upload(cu.getPinnedBuffer());
params->upload(paramsVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCustomGBForceInfo : public CudaForceInfo {
public:
CudaCustomGBForceInfo(const CustomGBForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
vector params1;
vector params2;
force.getParticleParameters(particle1, params1);
force.getParticleParameters(particle2, params2);
for (int i = 0; i < (int) params1.size(); i++)
if (params1[i] != params2[i])
return false;
return true;
}
int getNumParticleGroups() {
return force.getNumExclusions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
force.getExclusionParticles(index, particle1, particle2);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
return true;
}
private:
const CustomGBForce& force;
};
CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (computedValues != NULL)
delete computedValues;
if (energyDerivs != NULL)
delete energyDerivs;
if (longEnergyDerivs != NULL)
delete longEnergyDerivs;
if (globals != NULL)
delete globals;
if (valueBuffers != NULL)
delete valueBuffers;
if (tabulatedFunctionParams != NULL)
delete tabulatedFunctionParams;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i];
}
void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) {
cu.setAsCurrent();
if (cu.getPlatformData().contexts.size() > 1)
throw OpenMMException("CustomGBForce does not support using multiple CUDA devices");
bool useExclusionsForValue = false;
numComputedValues = force.getNumComputedValues();
vector computedValueNames(force.getNumComputedValues());
vector computedValueExpressions(force.getNumComputedValues());
if (force.getNumComputedValues() > 0) {
CustomGBForce::ComputationType type;
force.getComputedValueParameters(0, computedValueNames[0], computedValueExpressions[0], type);
if (type == CustomGBForce::SingleParticle)
throw OpenMMException("CudaPlatform requires that the first computed value for a CustomGBForce be of type ParticlePair or ParticlePairNoExclusions.");
useExclusionsForValue = (type == CustomGBForce::ParticlePair);
for (int i = 1; i < force.getNumComputedValues(); i++) {
force.getComputedValueParameters(i, computedValueNames[i], computedValueExpressions[i], type);
if (type != CustomGBForce::SingleParticle)
throw OpenMMException("CudaPlatform requires that a CustomGBForce only have one computed value of type ParticlePair or ParticlePairNoExclusions.");
}
}
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
string prefix = "custom"+cu.intToString(forceIndex)+"_";
// Record parameters and exclusions.
int numParticles = force.getNumParticles();
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customGBParameters", true);
computedValues = new CudaParameterSet(cu, force.getNumComputedValues(), numParticles, "customGBComputedValues", true);
if (force.getNumGlobalParameters() > 0)
globals = CudaArray::create(cu, force.getNumGlobalParameters(), "customGBGlobals");
vector > paramVector(numParticles);
vector > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
vector parameters;
force.getParticleParameters(i, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
exclusionList[i].push_back(i);
}
for (int i = 0; i < force.getNumExclusions(); i++) {
int particle1, particle2;
force.getExclusionParticles(i, particle1, particle2);
exclusionList[particle1].push_back(particle2);
exclusionList[particle2].push_back(particle1);
}
params->setParameterValues(paramVector);
// Record the tabulated functions.
CudaExpressionUtilities::FunctionPlaceholder fp;
map functions;
vector > functionDefinitions;
vector tabulatedFunctionParamsVec(force.getNumFunctions());
stringstream tableArgs;
for (int i = 0; i < force.getNumFunctions(); i++) {
string name;
vector values;
double min, max;
force.getFunctionParameters(i, name, values, min, max);
string arrayName = prefix+"table"+cu.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = &fp;
tabulatedFunctionParamsVec[i] = make_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
tabulatedFunctions.push_back(CudaArray::create(cu, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer()));
tableArgs << ", const float4* __restrict__ " << arrayName;
}
if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = CudaArray::create(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters");
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(float4), tabulatedFunctionParams->getDevicePointer()));
tableArgs << ", const float4* " << prefix << "functionParams";
}
// Record the global parameters.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
// Record derivatives of expressions needed for the chain rule terms.
vector > valueGradientExpressions(force.getNumComputedValues());
vector > valueDerivExpressions(force.getNumComputedValues());
needParameterGradient = false;
for (int i = 1; i < force.getNumComputedValues(); i++) {
Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[i], functions).optimize();
valueGradientExpressions[i].push_back(ex.differentiate("x").optimize());
valueGradientExpressions[i].push_back(ex.differentiate("y").optimize());
valueGradientExpressions[i].push_back(ex.differentiate("z").optimize());
if (!isZeroExpression(valueGradientExpressions[i][0]) || !isZeroExpression(valueGradientExpressions[i][1]) || !isZeroExpression(valueGradientExpressions[i][2]))
needParameterGradient = true;
for (int j = 0; j < i; j++)
valueDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]).optimize());
}
vector > energyDerivExpressions(force.getNumEnergyTerms());
vector needChainForValue(force.getNumComputedValues(), false);
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression;
CustomGBForce::ComputationType type;
force.getEnergyTermParameters(i, expression, type);
Lepton::ParsedExpression ex = Lepton::Parser::parse(expression, functions).optimize();
for (int j = 0; j < force.getNumComputedValues(); j++) {
if (type == CustomGBForce::SingleParticle) {
energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]).optimize());
if (!isZeroExpression(energyDerivExpressions[i].back()))
needChainForValue[j] = true;
}
else {
energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]+"1").optimize());
if (!isZeroExpression(energyDerivExpressions[i].back()))
needChainForValue[j] = true;
energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]+"2").optimize());
if (!isZeroExpression(energyDerivExpressions[i].back()))
needChainForValue[j] = true;
}
}
}
longEnergyDerivs = CudaArray::create(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
energyDerivs = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
// Create the kernels.
bool useCutoff = (force.getNonbondedMethod() != CustomGBForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomGBForce::NoCutoff && force.getNonbondedMethod() != CustomGBForce::CutoffNonPeriodic);
{
// Create the N2 value kernel.
vector > variables;
map rename;
ExpressionTreeNode rnode(new Operation::Variable("r"));
variables.push_back(make_pair(rnode, "r"));
variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2"));
variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR"));
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables.push_back(makeVariable(name+"1", "params"+params->getParameterSuffix(i, "1")));
variables.push_back(makeVariable(name+"2", "params"+params->getParameterSuffix(i, "2")));
rename[name+"1"] = name+"2";
rename[name+"2"] = name+"1";
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cu.intToString(i)+"]";
variables.push_back(makeVariable(name, value));
}
map n2ValueExpressions;
stringstream n2ValueSource;
Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[0], functions).optimize();
n2ValueExpressions["tempValue1 = "] = ex;
n2ValueExpressions["tempValue2 = "] = ex.renameVariables(rename);
n2ValueSource << cu.getExpressionUtilities().createExpressions(n2ValueExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
map replacements;
string n2ValueStr = n2ValueSource.str();
replacements["COMPUTE_VALUE"] = n2ValueStr;
stringstream extraArgs, atomParams, loadLocal1, loadLocal2, load1, load2;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", const float* globals";
pairValueUsesParam.resize(params->getBuffers().size(), false);
int atomParamSize = 6;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+cu.intToString(i+1);
if (n2ValueStr.find(paramName+"1") != n2ValueStr.npos || n2ValueStr.find(paramName+"2") != n2ValueStr.npos) {
extraArgs << ", const " << buffer.getType() << "* __restrict__ global_" << paramName;
atomParams << buffer.getType() << " " << paramName << ";\n";
loadLocal1 << "localData[localAtomIndex]." << paramName << " = " << paramName << "1;\n";
loadLocal2 << "localData[localAtomIndex]." << paramName << " = global_" << paramName << "[j];\n";
load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n";
load2 << buffer.getType() << " " << paramName << "2 = localData[atom2]." << paramName << ";\n";
pairValueUsesParam[i] = true;
atomParamSize += buffer.getNumComponents();
}
}
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["ATOM_PARAMETER_DATA"] = atomParams.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
map defines;
if (useCutoff)
defines["USE_CUTOFF"] = "1";
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
if (useExclusionsForValue)
defines["USE_EXCLUSIONS"] = "1";
if (atomParamSize%2 == 0 && !cu.getUseDoublePrecision())
defines["NEED_PADDING"] = "1";
defines["WARPS_PER_GROUP"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize()/CudaContext::TileSize);
defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize());
defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::customGBValueN2, replacements), defines);
pairValueKernel = cu.getKernel(module, "computeN2Value");
if (useExclusionsForValue)
cu.getNonbondedUtilities().requestExclusions(exclusionList);
}
{
// Create the kernel to reduce the N2 value and calculate other values.
stringstream reductionSource, extraArgs;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", const float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+cu.intToString(i+1);
extraArgs << ", const " << buffer.getType() << "* __restrict__ " << paramName;
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+cu.intToString(i+1);
extraArgs << ", " << buffer.getType() << "* __restrict__ global_" << valueName;
reductionSource << buffer.getType() << " local_" << valueName << ";\n";
}
reductionSource << "local_values" << computedValues->getParameterSuffix(0) << " = sum;\n";
map variables;
variables["x"] = "pos.x";
variables["y"] = "pos.y";
variables["z"] = "pos.z";
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables[force.getGlobalParameterName(i)] = "globals["+cu.intToString(i)+"]";
for (int i = 1; i < force.getNumComputedValues(); i++) {
variables[computedValueNames[i-1]] = "local_values"+computedValues->getParameterSuffix(i-1);
map valueExpressions;
valueExpressions["local_values"+computedValues->getParameterSuffix(i)+" = "] = Lepton::Parser::parse(computedValueExpressions[i], functions).optimize();
reductionSource << cu.getExpressionUtilities().createExpressions(valueExpressions, variables, functionDefinitions, "value"+cu.intToString(i)+"_temp", prefix+"functionParams");
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
string valueName = "values"+cu.intToString(i+1);
reductionSource << "global_" << valueName << "[index] = local_" << valueName << ";\n";
}
map replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["COMPUTE_VALUES"] = reductionSource.str();
map defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customGBValuePerParticle, replacements), defines);
perParticleValueKernel = cu.getKernel(module, "computePerParticleValues");
}
{
// Create the N2 energy kernel.
vector > variables;
ExpressionTreeNode rnode(new Operation::Variable("r"));
variables.push_back(make_pair(rnode, "r"));
variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2"));
variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR"));
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables.push_back(makeVariable(name+"1", "params"+params->getParameterSuffix(i, "1")));
variables.push_back(makeVariable(name+"2", "params"+params->getParameterSuffix(i, "2")));
}
for (int i = 0; i < force.getNumComputedValues(); i++) {
variables.push_back(makeVariable(computedValueNames[i]+"1", "values"+computedValues->getParameterSuffix(i, "1")));
variables.push_back(makeVariable(computedValueNames[i]+"2", "values"+computedValues->getParameterSuffix(i, "2")));
}
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables.push_back(makeVariable(force.getGlobalParameterName(i), "globals["+cu.intToString(i)+"]"));
stringstream n2EnergySource;
bool anyExclusions = (force.getNumExclusions() > 0);
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression;
CustomGBForce::ComputationType type;
force.getEnergyTermParameters(i, expression, type);
if (type == CustomGBForce::SingleParticle)
continue;
bool exclude = (anyExclusions && type == CustomGBForce::ParticlePair);
map n2EnergyExpressions;
n2EnergyExpressions["tempEnergy += "] = Lepton::Parser::parse(expression, functions).optimize();
n2EnergyExpressions["dEdR += "] = Lepton::Parser::parse(expression, functions).differentiate("r").optimize();
for (int j = 0; j < force.getNumComputedValues(); j++) {
if (needChainForValue[j]) {
string index = cu.intToString(j+1);
n2EnergyExpressions["/*"+cu.intToString(i+1)+"*/ deriv"+index+"_1 += "] = energyDerivExpressions[i][2*j];
n2EnergyExpressions["/*"+cu.intToString(i+1)+"*/ deriv"+index+"_2 += "] = energyDerivExpressions[i][2*j+1];
}
}
if (exclude)
n2EnergySource << "if (!isExcluded) {\n";
n2EnergySource << cu.getExpressionUtilities().createExpressions(n2EnergyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
if (exclude)
n2EnergySource << "}\n";
}
map replacements;
string n2EnergyStr = n2EnergySource.str();
replacements["COMPUTE_INTERACTION"] = n2EnergyStr;
stringstream extraArgs, atomParams, loadLocal1, loadLocal2, clearLocal, load1, load2, declare1, recordDeriv, storeDerivs1, storeDerivs2;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", const float* globals";
pairEnergyUsesParam.resize(params->getBuffers().size(), false);
int atomParamSize = 7;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+cu.intToString(i+1);
if (n2EnergyStr.find(paramName+"1") != n2EnergyStr.npos || n2EnergyStr.find(paramName+"2") != n2EnergyStr.npos) {
extraArgs << ", const " << buffer.getType() << "* __restrict__ global_" << paramName;
atomParams << buffer.getType() << " " << paramName << ";\n";
loadLocal1 << "localData[localAtomIndex]." << paramName << " = " << paramName << "1;\n";
loadLocal2 << "localData[localAtomIndex]." << paramName << " = global_" << paramName << "[j];\n";
load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n";
load2 << buffer.getType() << " " << paramName << "2 = localData[atom2]." << paramName << ";\n";
pairEnergyUsesParam[i] = true;
atomParamSize += buffer.getNumComponents();
}
}
pairEnergyUsesValue.resize(computedValues->getBuffers().size(), false);
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+cu.intToString(i+1);
if (n2EnergyStr.find(valueName+"1") != n2EnergyStr.npos || n2EnergyStr.find(valueName+"2") != n2EnergyStr.npos) {
extraArgs << ", const " << buffer.getType() << "* __restrict__ global_" << valueName;
atomParams << buffer.getType() << " " << valueName << ";\n";
loadLocal1 << "localData[localAtomIndex]." << valueName << " = " << valueName << "1;\n";
loadLocal2 << "localData[localAtomIndex]." << valueName << " = global_" << valueName << "[j];\n";
load1 << buffer.getType() << " " << valueName << "1 = global_" << valueName << "[atom1];\n";
load2 << buffer.getType() << " " << valueName << "2 = localData[atom2]." << valueName << ";\n";
pairEnergyUsesValue[i] = true;
atomParamSize += buffer.getNumComponents();
}
}
extraArgs << ", unsigned long long* __restrict__ derivBuffers";
for (int i = 0; i < force.getNumComputedValues(); i++) {
string index = cu.intToString(i+1);
atomParams << "accum deriv" << index << ";\n";
clearLocal << "localData[localAtomIndex].deriv" << index << " = 0;\n";
declare1 << "accum deriv" << index << "_1 = 0;\n";
load2 << "real deriv" << index << "_2 = 0;\n";
recordDeriv << "localData[atom2].deriv" << index << " += deriv" << index << "_2;\n";
storeDerivs1 << "STORE_DERIVATIVE_1(" << index << ")\n";
storeDerivs2 << "STORE_DERIVATIVE_2(" << index << ")\n";
atomParamSize++;
}
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["ATOM_PARAMETER_DATA"] = atomParams.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
replacements["CLEAR_LOCAL_DERIVATIVES"] = clearLocal.str();
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
replacements["DECLARE_ATOM1_DERIVATIVES"] = declare1.str();
replacements["RECORD_DERIVATIVE_2"] = recordDeriv.str();
replacements["STORE_DERIVATIVES_1"] = storeDerivs1.str();
replacements["STORE_DERIVATIVES_2"] = storeDerivs2.str();
map defines;
stringstream defineAccum;
if (cu.getAccumulateInDouble()) {
defineAccum << "typedef double accum;\n";
defineAccum << "typedef double3 accum3;\n";
defines["make_accum3"] = "make_double3";
}
else {
defineAccum << "typedef real accum;\n";
defineAccum << "typedef real3 accum3;\n";
defines["make_accum3"] = "make_real3";
}
replacements["DEFINE_ACCUM"] = defineAccum.str();
if (useCutoff)
defines["USE_CUTOFF"] = "1";
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
if (anyExclusions)
defines["USE_EXCLUSIONS"] = "1";
if (atomParamSize%2 == 0 && !cu.getUseDoublePrecision())
defines["NEED_PADDING"] = "1";
defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize());
defines["WARPS_PER_GROUP"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize()/CudaContext::TileSize);
defines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::customGBEnergyN2, replacements), defines);
pairEnergyKernel = cu.getKernel(module, "computeN2Energy");
}
{
// Create the kernel to reduce the derivatives and calculate per-particle energy terms.
stringstream compute, extraArgs, load;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", const float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+cu.intToString(i+1);
extraArgs << ", const " << buffer.getType() << "* __restrict__ " << paramName;
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+cu.intToString(i+1);
extraArgs << ", const " << buffer.getType() << "* __restrict__ " << valueName;
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = cu.intToString(i+1);
extraArgs << ", " << buffer.getType() << "* __restrict__ derivBuffers" << index;
compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
}
extraArgs << ", const long long* __restrict__ derivBuffersIn";
for (int i = 0; i < energyDerivs->getNumParameters(); ++i)
load << "derivBuffers" << energyDerivs->getParameterSuffix(i, "[index]") <<
" = RECIP(0xFFFFFFFF)*derivBuffersIn[index+PADDED_NUM_ATOMS*" << cu.intToString(i) << "];\n";
// Compute the various expressions.
map variables;
variables["x"] = "pos.x";
variables["y"] = "pos.y";
variables["z"] = "pos.z";
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables[force.getGlobalParameterName(i)] = "globals["+cu.intToString(i)+"]";
for (int i = 0; i < force.getNumComputedValues(); i++)
variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]");
map expressions;
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression;
CustomGBForce::ComputationType type;
force.getEnergyTermParameters(i, expression, type);
if (type != CustomGBForce::SingleParticle)
continue;
Lepton::ParsedExpression parsed = Lepton::Parser::parse(expression, functions).optimize();
expressions["/*"+cu.intToString(i+1)+"*/ energy += "] = parsed;
for (int j = 0; j < force.getNumComputedValues(); j++)
expressions["/*"+cu.intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j)+" += "] = energyDerivExpressions[i][j];
Lepton::ParsedExpression gradx = parsed.differentiate("x").optimize();
Lepton::ParsedExpression grady = parsed.differentiate("y").optimize();
Lepton::ParsedExpression gradz = parsed.differentiate("z").optimize();
if (!isZeroExpression(gradx))
expressions["/*"+cu.intToString(i+1)+"*/ force.x -= "] = gradx;
if (!isZeroExpression(grady))
expressions["/*"+cu.intToString(i+1)+"*/ force.y -= "] = grady;
if (!isZeroExpression(gradz))
expressions["/*"+cu.intToString(i+1)+"*/ force.z -= "] = gradz;
}
for (int i = 1; i < force.getNumComputedValues(); i++)
for (int j = 0; j < i; j++)
expressions["real dV"+cu.intToString(i)+"dV"+cu.intToString(j)+" = "] = valueDerivExpressions[i][j];
compute << cu.getExpressionUtilities().createExpressions(expressions, variables, functionDefinitions, "temp", prefix+"functionParams");
// Record values.
compute << "forceBuffers[index] += (long long) (force.x*0xFFFFFFFF);\n";
compute << "forceBuffers[index+PADDED_NUM_ATOMS] += (long long) (force.y*0xFFFFFFFF);\n";
compute << "forceBuffers[index+PADDED_NUM_ATOMS*2] += (long long) (force.z*0xFFFFFFFF);\n";
for (int i = 1; i < force.getNumComputedValues(); i++) {
compute << "real totalDeriv"<getBuffers().size(); i++) {
string index = cu.intToString(i+1);
compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n";
}
map replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["LOAD_DERIVATIVES"] = load.str();
replacements["COMPUTE_ENERGY"] = compute.str();
map defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::customGBEnergyPerParticle, replacements), defines);
perParticleEnergyKernel = cu.getKernel(module, "computePerParticleEnergy");
}
if (needParameterGradient) {
// Create the kernel to compute chain rule terms for computed values that depend explicitly on particle coordinates.
stringstream compute, extraArgs;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", const float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+cu.intToString(i+1);
extraArgs << ", const " << buffer.getType() << "* __restrict__ " << paramName;
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+cu.intToString(i+1);
extraArgs << ", const " << buffer.getType() << "* __restrict__ " << valueName;
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = cu.intToString(i+1);
extraArgs << ", " << buffer.getType() << "* __restrict__ derivBuffers" << index;
compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
}
map variables;
variables["x"] = "pos.x";
variables["y"] = "pos.y";
variables["z"] = "pos.z";
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables[force.getGlobalParameterName(i)] = "globals["+cu.intToString(i)+"]";
for (int i = 0; i < force.getNumComputedValues(); i++)
variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]");
for (int i = 1; i < force.getNumComputedValues(); i++) {
string is = cu.intToString(i);
compute << "real3 dV"< derivExpressions;
string js = cu.intToString(j);
derivExpressions["real dV"+is+"dV"+js+" = "] = valueDerivExpressions[i][j];
compute << cu.getExpressionUtilities().createExpressions(derivExpressions, variables, functionDefinitions, "temp_"+is+"_"+js, prefix+"functionParams");
compute << "dV"< gradientExpressions;
if (!isZeroExpression(valueGradientExpressions[i][0]))
gradientExpressions["dV"+is+"dR.x += "] = valueGradientExpressions[i][0];
if (!isZeroExpression(valueGradientExpressions[i][1]))
gradientExpressions["dV"+is+"dR.y += "] = valueGradientExpressions[i][1];
if (!isZeroExpression(valueGradientExpressions[i][2]))
gradientExpressions["dV"+is+"dR.z += "] = valueGradientExpressions[i][2];
compute << cu.getExpressionUtilities().createExpressions(gradientExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
}
for (int i = 1; i < force.getNumComputedValues(); i++) {
string is = cu.intToString(i);
compute << "force -= deriv"<getParameterSuffix(i)<<"*dV"< replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["COMPUTE_FORCES"] = compute.str();
map defines;
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::customGBGradientChainRule, replacements), defines);
gradientChainRuleKernel = cu.getKernel(module, "computeGradientChainRuleTerms");
}
{
// Create the code to calculate chain rules terms as part of the default nonbonded kernel.
vector > globalVariables;
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cu.intToString(i)+"]";
globalVariables.push_back(makeVariable(name, prefix+value));
}
vector > variables = globalVariables;
map rename;
ExpressionTreeNode rnode(new Operation::Variable("r"));
variables.push_back(make_pair(rnode, "r"));
variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2"));
variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR"));
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables.push_back(makeVariable(name+"1", prefix+"params"+params->getParameterSuffix(i, "1")));
variables.push_back(makeVariable(name+"2", prefix+"params"+params->getParameterSuffix(i, "2")));
rename[name+"1"] = name+"2";
rename[name+"2"] = name+"1";
}
map derivExpressions;
stringstream chainSource;
Lepton::ParsedExpression dVdR = Lepton::Parser::parse(computedValueExpressions[0], functions).differentiate("r").optimize();
derivExpressions["real dV0dR1 = "] = dVdR;
derivExpressions["real dV0dR2 = "] = dVdR.renameVariables(rename);
chainSource << cu.getExpressionUtilities().createExpressions(derivExpressions, variables, functionDefinitions, prefix+"temp0_", prefix+"functionParams");
if (needChainForValue[0]) {
if (useExclusionsForValue)
chainSource << "if (!isExcluded) {\n";
chainSource << "tempForce -= dV0dR1*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(0, "1") << ";\n";
chainSource << "tempForce -= dV0dR2*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(0, "2") << ";\n";
if (useExclusionsForValue)
chainSource << "}\n";
}
for (int i = 1; i < force.getNumComputedValues(); i++) {
if (needChainForValue[i]) {
chainSource << "tempForce -= dV0dR1*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(i, "1") << ";\n";
chainSource << "tempForce -= dV0dR2*" << prefix << "dEdV" << energyDerivs->getParameterSuffix(i, "2") << ";\n";
}
}
map replacements;
string chainStr = chainSource.str();
replacements["COMPUTE_FORCE"] = chainStr;
string source = cu.replaceStrings(CudaKernelSources::customGBChainRule, replacements);
vector parameters;
vector arguments;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = prefix+"params"+cu.intToString(i+1);
if (chainStr.find(paramName+"1") != chainStr.npos || chainStr.find(paramName+"2") != chainStr.npos)
parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string paramName = prefix+"values"+cu.intToString(i+1);
if (chainStr.find(paramName+"1") != chainStr.npos || chainStr.find(paramName+"2") != chainStr.npos)
parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
if (needChainForValue[i]) {
CudaNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string paramName = prefix+"dEdV"+cu.intToString(i+1);
parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
}
if (globals != NULL) {
globals->upload(globalParamValues);
arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer()));
}
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
for (int i = 0; i < (int) parameters.size(); i++)
cu.getNonbondedUtilities().addParameter(parameters[i]);
for (int i = 0; i < (int) arguments.size(); i++)
cu.getNonbondedUtilities().addArgument(arguments[i]);
}
cu.addForce(new CudaCustomGBForceInfo(force));
cu.addAutoclearBuffer(*longEnergyDerivs);
}
double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
if (!hasInitializedKernels) {
hasInitializedKernels = true;
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2);
valueBuffers = CudaArray::create(cu, cu.getPaddedNumAtoms(), "customGBValueBuffers");
cu.addAutoclearBuffer(*valueBuffers);
cu.clearBuffer(valueBuffers->getDevicePointer(), sizeof(long long)*valueBuffers->getSize());
pairValueArgs.push_back(&cu.getPosq().getDevicePointer());
pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusions().getDevicePointer());
pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusionIndices().getDevicePointer());
pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusionRowIndices().getDevicePointer());
pairValueArgs.push_back(&valueBuffers->getDevicePointer());
if (nb.getUseCutoff()) {
pairValueArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
pairValueArgs.push_back(&nb.getInteractionCount().getDevicePointer());
pairValueArgs.push_back(cu.getPeriodicBoxSizePointer());
pairValueArgs.push_back(cu.getInvPeriodicBoxSizePointer());
pairValueArgs.push_back(&maxTiles);
pairValueArgs.push_back(&nb.getInteractionFlags().getDevicePointer());
}
else
pairValueArgs.push_back(&maxTiles);
if (globals != NULL)
pairValueArgs.push_back(&globals->getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairValueUsesParam[i])
pairValueArgs.push_back(¶ms->getBuffers()[i].getMemory());
}
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
pairValueArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
pairValueArgs.push_back(&tabulatedFunctionParams->getDevicePointer());
}
perParticleValueArgs.push_back(&cu.getPosq().getDevicePointer());
perParticleValueArgs.push_back(&valueBuffers->getDevicePointer());
if (globals != NULL)
perParticleValueArgs.push_back(&globals->getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++)
perParticleValueArgs.push_back(¶ms->getBuffers()[i].getMemory());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
perParticleValueArgs.push_back(&computedValues->getBuffers()[i].getMemory());
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
perParticleValueArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
perParticleValueArgs.push_back(&tabulatedFunctionParams->getDevicePointer());
}
pairEnergyArgs.push_back(&cu.getForce().getDevicePointer());
pairEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
pairEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
pairEnergyArgs.push_back(&cu.getNonbondedUtilities().getExclusions().getDevicePointer());
pairEnergyArgs.push_back(&cu.getNonbondedUtilities().getExclusionIndices().getDevicePointer());
pairEnergyArgs.push_back(&cu.getNonbondedUtilities().getExclusionRowIndices().getDevicePointer());
if (nb.getUseCutoff()) {
pairEnergyArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
pairEnergyArgs.push_back(&nb.getInteractionCount().getDevicePointer());
pairEnergyArgs.push_back(cu.getPeriodicBoxSizePointer());
pairEnergyArgs.push_back(cu.getInvPeriodicBoxSizePointer());
pairEnergyArgs.push_back(&maxTiles);
pairEnergyArgs.push_back(&nb.getInteractionFlags().getDevicePointer());
}
else
pairEnergyArgs.push_back(&maxTiles);
if (globals != NULL)
pairEnergyArgs.push_back(&globals->getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if (pairEnergyUsesParam[i])
pairEnergyArgs.push_back(¶ms->getBuffers()[i].getMemory());
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
if (pairEnergyUsesValue[i])
pairEnergyArgs.push_back(&computedValues->getBuffers()[i].getMemory());
}
pairEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer());
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
pairEnergyArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
pairEnergyArgs.push_back(&tabulatedFunctionParams->getDevicePointer());
}
perParticleEnergyArgs.push_back(&cu.getForce().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
perParticleEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
if (globals != NULL)
perParticleEnergyArgs.push_back(&globals->getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++)
perParticleEnergyArgs.push_back(¶ms->getBuffers()[i].getMemory());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
perParticleEnergyArgs.push_back(&computedValues->getBuffers()[i].getMemory());
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
perParticleEnergyArgs.push_back(&energyDerivs->getBuffers()[i].getMemory());
perParticleEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer());
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
perParticleEnergyArgs.push_back(&tabulatedFunctions[i]->getDevicePointer());
perParticleEnergyArgs.push_back(&tabulatedFunctionParams->getDevicePointer());
}
if (needParameterGradient) {
gradientChainRuleArgs.push_back(&cu.getForce().getDevicePointer());
gradientChainRuleArgs.push_back(&cu.getPosq().getDevicePointer());
if (globals != NULL)
gradientChainRuleArgs.push_back(&globals->getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++)
gradientChainRuleArgs.push_back(¶ms->getBuffers()[i].getMemory());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
gradientChainRuleArgs.push_back(&computedValues->getBuffers()[i].getMemory());
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
gradientChainRuleArgs.push_back(&energyDerivs->getBuffers()[i].getMemory());
}
}
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
if (nb.getUseCutoff()) {
if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize();
pairValueArgs[5] = &nb.getInteractingTiles().getDevicePointer();
pairEnergyArgs[6] = &nb.getInteractingTiles().getDevicePointer();
pairValueArgs[10] = &nb.getInteractionFlags().getDevicePointer();
pairEnergyArgs[11] = &nb.getInteractionFlags().getDevicePointer();
}
}
cu.executeKernel(pairValueKernel, &pairValueArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
cu.executeKernel(perParticleValueKernel, &perParticleValueArgs[0], cu.getPaddedNumAtoms());
cu.executeKernel(pairEnergyKernel, &pairEnergyArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
cu.executeKernel(perParticleEnergyKernel, &perParticleEnergyArgs[0], cu.getPaddedNumAtoms());
if (needParameterGradient)
cu.executeKernel(gradientChainRuleKernel, &gradientChainRuleArgs[0], cu.getPaddedNumAtoms());
return 0.0;
}
void CudaCalcCustomGBForceKernel::copyParametersToContext(ContextImpl& context, const CustomGBForce& force) {
cu.setAsCurrent();
int numParticles = force.getNumParticles();
if (numParticles != cu.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
// Record the per-particle parameters.
vector > paramVector(numParticles);
vector parameters;
for (int i = 0; i < numParticles; i++) {
force.getParticleParameters(i, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCustomExternalForceInfo : public CudaForceInfo {
public:
CudaCustomExternalForceInfo(const CustomExternalForce& force, int numParticles) : force(force), indices(numParticles, -1) {
vector params;
for (int i = 0; i < force.getNumParticles(); i++) {
int particle;
force.getParticleParameters(i, particle, params);
indices[particle] = i;
}
}
bool areParticlesIdentical(int particle1, int particle2) {
particle1 = indices[particle1];
particle2 = indices[particle2];
if (particle1 == -1 && particle2 == -1)
return true;
if (particle1 == -1 || particle2 == -1)
return false;
int temp;
vector params1;
vector params2;
force.getParticleParameters(particle1, temp, params1);
force.getParticleParameters(particle2, temp, params2);
for (int i = 0; i < (int) params1.size(); i++)
if (params1[i] != params2[i])
return false;
return true;
}
private:
const CustomExternalForce& force;
vector indices;
};
CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumParticles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumParticles()/numContexts;
numParticles = endIndex-startIndex;
if (numParticles == 0)
return;
vector > atoms(numParticles, vector(1));
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customExternalParams");
vector > paramVector(numParticles);
for (int i = 0; i < numParticles; i++) {
vector parameters;
force.getParticleParameters(startIndex+i, atoms[i][0], parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
cu.addForce(new CudaCustomExternalForceInfo(force, system.getNumParticles()));
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpressionX = energyExpression.differentiate("x").optimize();
Lepton::ParsedExpression forceExpressionY = energyExpression.differentiate("y").optimize();
Lepton::ParsedExpression forceExpressionZ = energyExpression.differentiate("z").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdX = "] = forceExpressionX;
expressions["float dEdY = "] = forceExpressionY;
expressions["float dEdZ = "] = forceExpressionZ;
// Create the kernels.
map variables;
variables["x"] = "pos1.x";
variables["y"] = "pos1.y";
variables["z"] = "pos1.z";
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables[name] = "particleParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create(cu, force.getNumGlobalParameters(), "customExternalGlobals");
globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
variables[name] = value;
}
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string argName = cu.getBondedUtilities().addArgument(buffer.getMemory(), buffer.getType());
compute< > functions;
compute << cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::customExternalForce, replacements), force.getForceGroup());
}
double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
return 0.0;
}
void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& context, const CustomExternalForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumParticles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumParticles()/numContexts;
if (numParticles != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of particles has changed");
// Record the per-particle parameters.
vector > paramVector(numParticles);
vector parameters;
for (int i = 0; i < numParticles; i++) {
int particle;
force.getParticleParameters(startIndex+i, particle, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCustomHbondForceInfo : public CudaForceInfo {
public:
CudaCustomHbondForceInfo(const CustomHbondForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
return true;
}
int getNumParticleGroups() {
return force.getNumDonors()+force.getNumAcceptors()+force.getNumExclusions();
}
void getParticlesInGroup(int index, vector& particles) {
int p1, p2, p3;
vector parameters;
if (index < force.getNumDonors()) {
force.getDonorParameters(index, p1, p2, p3, parameters);
particles.clear();
particles.push_back(p1);
if (p2 > -1)
particles.push_back(p2);
if (p3 > -1)
particles.push_back(p3);
return;
}
index -= force.getNumDonors();
if (index < force.getNumAcceptors()) {
force.getAcceptorParameters(index, p1, p2, p3, parameters);
particles.clear();
particles.push_back(p1);
if (p2 > -1)
particles.push_back(p2);
if (p3 > -1)
particles.push_back(p3);
return;
}
index -= force.getNumAcceptors();
int donor, acceptor;
force.getExclusionParticles(index, donor, acceptor);
particles.clear();
force.getDonorParameters(donor, p1, p2, p3, parameters);
particles.push_back(p1);
if (p2 > -1)
particles.push_back(p2);
if (p3 > -1)
particles.push_back(p3);
force.getAcceptorParameters(acceptor, p1, p2, p3, parameters);
particles.push_back(p1);
if (p2 > -1)
particles.push_back(p2);
if (p3 > -1)
particles.push_back(p3);
}
bool areGroupsIdentical(int group1, int group2) {
int p1, p2, p3;
vector params1, params2;
if (group1 < force.getNumDonors() && group2 < force.getNumDonors()) {
force.getDonorParameters(group1, p1, p2, p3, params1);
force.getDonorParameters(group2, p1, p2, p3, params2);
return (params1 == params2 && params1 == params2);
}
if (group1 < force.getNumDonors() || group2 < force.getNumDonors())
return false;
group1 -= force.getNumDonors();
group2 -= force.getNumDonors();
if (group1 < force.getNumAcceptors() && group2 < force.getNumAcceptors()) {
force.getAcceptorParameters(group1, p1, p2, p3, params1);
force.getAcceptorParameters(group2, p1, p2, p3, params2);
return (params1 == params2 && params1 == params2);
}
if (group1 < force.getNumAcceptors() || group2 < force.getNumAcceptors())
return false;
return true;
}
private:
const CustomHbondForce& force;
};
CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() {
cu.setAsCurrent();
if (donorParams != NULL)
delete donorParams;
if (acceptorParams != NULL)
delete acceptorParams;
if (donors != NULL)
delete donors;
if (acceptors != NULL)
delete acceptors;
if (globals != NULL)
delete globals;
if (donorExclusions != NULL)
delete donorExclusions;
if (acceptorExclusions != NULL)
delete acceptorExclusions;
if (tabulatedFunctionParams != NULL)
delete tabulatedFunctionParams;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i];
}
static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
computeDonor << value;
computeAcceptor << value;
}
static void applyDonorAndAcceptorForces(stringstream& applyToDonor, stringstream& applyToAcceptor, int atom, const string& value) {
string forceNames[] = {"f1", "f2", "f3"};
if (atom < 3)
applyToAcceptor << forceNames[atom]<<" += trim("<(cu, numDonors, "customHbondDonors");
acceptors = CudaArray::create(cu, numAcceptors, "customHbondAcceptors");
donorParams = new CudaParameterSet(cu, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
acceptorParams = new CudaParameterSet(cu, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
if (force.getNumGlobalParameters() > 0)
globals = CudaArray::create(cu, force.getNumGlobalParameters(), "customHbondGlobals");
vector > donorParamVector(numDonors);
vector donorVector(numDonors);
for (int i = 0; i < numDonors; i++) {
vector parameters;
force.getDonorParameters(startIndex+i, donorVector[i].x, donorVector[i].y, donorVector[i].z, parameters);
donorParamVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
donorParamVector[i][j] = (float) parameters[j];
}
donors->upload(donorVector);
donorParams->setParameterValues(donorParamVector);
vector > acceptorParamVector(numAcceptors);
vector acceptorVector(numAcceptors);
for (int i = 0; i < numAcceptors; i++) {
vector parameters;
force.getAcceptorParameters(i, acceptorVector[i].x, acceptorVector[i].y, acceptorVector[i].z, parameters);
acceptorParamVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
acceptorParamVector[i][j] = (float) parameters[j];
}
acceptors->upload(acceptorVector);
acceptorParams->setParameterValues(acceptorParamVector);
cu.addForce(new CudaCustomHbondForceInfo(force));
// Record exclusions.
vector donorExclusionVector(numDonors, make_int4(-1, -1, -1, -1));
vector acceptorExclusionVector(numAcceptors, make_int4(-1, -1, -1, -1));
for (int i = 0; i < force.getNumExclusions(); i++) {
int donor, acceptor;
force.getExclusionParticles(i, donor, acceptor);
if (donor < startIndex || donor >= endIndex)
continue;
donor -= startIndex;
if (donorExclusionVector[donor].x == -1)
donorExclusionVector[donor].x = acceptor;
else if (donorExclusionVector[donor].y == -1)
donorExclusionVector[donor].y = acceptor;
else if (donorExclusionVector[donor].z == -1)
donorExclusionVector[donor].z = acceptor;
else if (donorExclusionVector[donor].w == -1)
donorExclusionVector[donor].w = acceptor;
else
throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per donor");
if (acceptorExclusionVector[acceptor].x == -1)
acceptorExclusionVector[acceptor].x = donor;
else if (acceptorExclusionVector[acceptor].y == -1)
acceptorExclusionVector[acceptor].y = donor;
else if (acceptorExclusionVector[acceptor].z == -1)
acceptorExclusionVector[acceptor].z = donor;
else if (acceptorExclusionVector[acceptor].w == -1)
acceptorExclusionVector[acceptor].w = donor;
else
throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor");
}
donorExclusions = CudaArray::create(cu, numDonors, "customHbondDonorExclusions");
acceptorExclusions = CudaArray::create(cu, numAcceptors, "customHbondAcceptorExclusions");
donorExclusions->upload(donorExclusionVector);
acceptorExclusions->upload(acceptorExclusionVector);
// Record the tabulated functions.
CudaExpressionUtilities::FunctionPlaceholder fp;
map functions;
vector > functionDefinitions;
vector tabulatedFunctionParamsVec(force.getNumFunctions());
stringstream tableArgs;
for (int i = 0; i < force.getNumFunctions(); i++) {
string name;
vector values;
double min, max;
force.getFunctionParameters(i, name, values, min, max);
string arrayName = "table"+cu.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = &fp;
tabulatedFunctionParamsVec[i] = make_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
tabulatedFunctions.push_back(CudaArray::create(cu, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
tableArgs << ", const float4* __restrict__ " << arrayName;
}
if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = CudaArray::create(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters");
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
tableArgs << ", const float4* __restrict__ functionParams";
}
// Record information about parameters.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
if (globals != NULL)
globals->upload(globalParamValues);
map