/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "CudaKernels.h"
#include "CudaForceInfo.h"
#include "openmm/LangevinIntegrator.h"
#include "openmm/Context.h"
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/CMAPTorsionForceImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomCompoundBondForceImpl.h"
#include "openmm/internal/CustomHbondForceImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h"
#include "CudaIntegrationUtilities.h"
//#include "CudaNonbondedUtilities.h"
#include "CudaKernelSources.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "../src/SimTKUtilities/SimTKOpenMMUtilities.h"
#include
#include
using namespace OpenMM;
using namespace std;
using Lepton::ExpressionTreeNode;
using Lepton::Operation;
static bool isZeroExpression(const Lepton::ParsedExpression& expression) {
const Lepton::Operation& op = expression.getRootNode().getOperation();
if (op.getId() != Lepton::Operation::CONSTANT)
return false;
return (dynamic_cast(op).getValue() == 0.0);
}
static bool usesVariable(const Lepton::ExpressionTreeNode& node, const string& variable) {
const Lepton::Operation& op = node.getOperation();
if (op.getId() == Lepton::Operation::VARIABLE && op.getName() == variable)
return true;
for (int i = 0; i < (int) node.getChildren().size(); i++)
if (usesVariable(node.getChildren()[i], variable))
return true;
return false;
}
static bool usesVariable(const Lepton::ParsedExpression& expression, const string& variable) {
return usesVariable(expression.getRootNode(), variable);
}
static pair makeVariable(const string& name, const string& value) {
return make_pair(ExpressionTreeNode(new Operation::Variable(name)), value);
}
void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
}
void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
cuCtxSetCurrent(cu.getContext());
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// bool includeNonbonded = ((groups&(1<& contexts = cu.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->setTime(time);
}
void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector& positions) {
cuCtxSetCurrent(cu.getContext());
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
double4 periodicBoxSize = cu.getPeriodicBoxSize();
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
double4 pos = posq[i];
int4 offset = cu.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x-offset.x*periodicBoxSize.x, pos.y-offset.y*periodicBoxSize.y, pos.z-offset.z*periodicBoxSize.z);
}
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
float4 pos = posq[i];
int4 offset = cu.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x-offset.x*periodicBoxSize.x, pos.y-offset.y*periodicBoxSize.y, pos.z-offset.z*periodicBoxSize.z);
}
}
}
void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector& positions) {
cuCtxSetCurrent(cu.getContext());
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
double4& pos = posq[i];
const Vec3& p = positions[order[i]];
pos.x = p[0];
pos.y = p[1];
pos.z = p[2];
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
posq[i] = make_double4(0.0, 0.0, 0.0, 0.0);
cu.getPosq().upload(posq);
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
float4& pos = posq[i];
const Vec3& p = positions[order[i]];
pos.x = p[0];
pos.y = p[1];
pos.z = p[2];
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
posq[i] = make_float4(0.0, 0.0, 0.0, 0.0);
cu.getPosq().upload(posq);
}
for (int i = 0; i < (int) cu.getPosCellOffsets().size(); i++)
cu.getPosCellOffsets()[i] = make_int4(0, 0, 0, 0);
}
void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, vector& velocities) {
cuCtxSetCurrent(cu.getContext());
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
velocities.resize(numParticles);
if (cu.getUseDoublePrecision()) {
double4* velm = (double4*) cu.getPinnedBuffer();
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
double4 vel = velm[i];
int4 offset = cu.getPosCellOffsets()[i];
velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
}
}
else {
float4* velm = (float4*) cu.getPinnedBuffer();
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
float4 vel = velm[i];
int4 offset = cu.getPosCellOffsets()[i];
velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
}
}
}
void CudaUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector& velocities) {
cuCtxSetCurrent(cu.getContext());
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision()) {
double4* velm = (double4*) cu.getPinnedBuffer();
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
double4& vel = velm[i];
const Vec3& p = velocities[order[i]];
vel.x = p[0];
vel.y = p[1];
vel.z = p[2];
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
velm[i] = make_double4(0.0, 0.0, 0.0, 0.0);
cu.getVelm().upload(velm);
}
else {
float4* velm = (float4*) cu.getPinnedBuffer();
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
float4& vel = velm[i];
const Vec3& p = velocities[order[i]];
vel.x = p[0];
vel.y = p[1];
vel.z = p[2];
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
velm[i] = make_float4(0.0, 0.0, 0.0, 0.0);
cu.getVelm().upload(velm);
}
}
void CudaUpdateStateDataKernel::getForces(ContextImpl& context, vector& forces) {
cuCtxSetCurrent(cu.getContext());
long long* force = (long long*) cu.getPinnedBuffer();
cu.getForce().download(force);
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
int paddedNumParticles = cu.getPaddedNumAtoms();
forces.resize(numParticles);
double scale = 1.0/(double) 0xFFFFFFFF;
for (int i = 0; i < numParticles; ++i)
forces[order[i]] = Vec3(scale*force[i], scale*force[i+paddedNumParticles], scale*force[i+paddedNumParticles*2]);
}
void CudaUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const {
double4 box = cu.getPeriodicBoxSize();
a = Vec3(box.x, 0, 0);
b = Vec3(0, box.y, 0);
c = Vec3(0, 0, box.z);
}
void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const {
vector& contexts = cu.getPlatformData().contexts;
for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->setPeriodicBoxSize(a[0], b[1], c[2]);
}
void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
cuCtxSetCurrent(cu.getContext());
// int version = 1;
// stream.write((char*) &version, sizeof(int));
// double time = cu.getTime();
// stream.write((char*) &time, sizeof(double));
// cu.getPosq().download();
// stream.write((char*) &cu.getPosq()[0], sizeof(mm_float4)*cu.getPosq().getSize());
// cu.getVelm().download();
// stream.write((char*) &cu.getVelm()[0], sizeof(mm_float4)*cu.getVelm().getSize());
// stream.write((char*) &cu.getAtomIndex()[0], sizeof(cl_int)*cu.getAtomIndex().getSize());
// stream.write((char*) &cu.getPosCellOffsets()[0], sizeof(mm_int4)*cu.getPosCellOffsets().size());
// mm_float4 box = cu.getPeriodicBoxSize();
// stream.write((char*) &box, sizeof(mm_float4));
// cu.getIntegrationUtilities().createCheckpoint(stream);
// SimTKOpenMMUtilities::createCheckpoint(stream);
}
void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
cuCtxSetCurrent(cu.getContext());
// int version;
// stream.read((char*) &version, sizeof(int));
// if (version != 1)
// throw OpenMMException("Checkpoint was created with a different version of OpenMM");
// double time;
// stream.read((char*) &time, sizeof(double));
// vector& contexts = cu.getPlatformData().contexts;
// for (int i = 0; i < (int) contexts.size(); i++)
// contexts[i]->setTime(time);
// stream.read((char*) &cu.getPosq()[0], sizeof(mm_float4)*cu.getPosq().getSize());
// cu.getPosq().upload();
// stream.read((char*) &cu.getVelm()[0], sizeof(mm_float4)*cu.getVelm().getSize());
// cu.getVelm().upload();
// stream.read((char*) &cu.getAtomIndex()[0], sizeof(cl_int)*cu.getAtomIndex().getSize());
// cu.getAtomIndex().upload();
// stream.read((char*) &cu.getPosCellOffsets()[0], sizeof(mm_int4)*cu.getPosCellOffsets().size());
// mm_float4 box;
// stream.read((char*) &box, sizeof(mm_float4));
// for (int i = 0; i < (int) contexts.size(); i++)
// contexts[i]->setPeriodicBoxSize(box.x, box.y, box.z);
// cu.getIntegrationUtilities().loadCheckpoint(stream);
// SimTKOpenMMUtilities::loadCheckpoint(stream);
// for (int i = 0; i < cu.getReorderListeners().size(); i++)
// cu.getReorderListeners()[i]->execute();
}
void CudaApplyConstraintsKernel::initialize(const System& system) {
}
void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
// if (!hasInitializedKernel) {
// hasInitializedKernel = true;
// map defines;
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// cu::Program program = cu.createProgram(CudaKernelSources::constraints, defines);
// applyDeltasKernel = cu::Kernel(program, "applyPositionDeltas");
// applyDeltasKernel.setArg(0, cu.getPosq().getDevicePointer());
// applyDeltasKernel.setArg(1, cu.getIntegrationUtilities().getPosDelta().getDevicePointer());
// }
// CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
// cu.clearBuffer(integration.getPosDelta());
// integration.applyConstraints(tol);
// cu.executeKernel(applyDeltasKernel, cu.getNumAtoms());
// integration.computeVirtualSites();
}
void CudaVirtualSitesKernel::initialize(const System& system) {
}
void CudaVirtualSitesKernel::computePositions(ContextImpl& context) {
cu.getIntegrationUtilities().computeVirtualSites();
}
class CudaHarmonicBondForceInfo : public CudaForceInfo {
public:
CudaHarmonicBondForceInfo(const HarmonicBondForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
double length, k;
force.getBondParameters(index, particle1, particle2, length, k);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
double length1, length2, k1, k2;
force.getBondParameters(group1, particle1, particle2, length1, k1);
force.getBondParameters(group2, particle1, particle2, length2, k2);
return (length1 == length2 && k1 == k2);
}
private:
const HarmonicBondForce& force;
};
CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() {
if (params != NULL)
delete params;
}
void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
numBonds = endIndex-startIndex;
if (numBonds == 0)
return;
vector > atoms(numBonds, vector(2));
params = CudaArray::create(numBonds, "bondParams");
vector paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
double length, k;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
paramVector[i] = make_float2((float) length, (float) k);
}
params->upload(paramVector);
map replacements;
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicBondForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup());
cu.addForce(new CudaHarmonicBondForceInfo(force));
}
double CudaCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force) {
cuCtxSetCurrent(cu.getContext());
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
if (numBonds != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of bonds has changed");
// Record the per-bond parameters.
vector paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
int atom1, atom2;
double length, k;
force.getBondParameters(startIndex+i, atom1, atom2, length, k);
paramVector[i] = make_float2((float) length, (float) k);
}
params->upload(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCustomBondForceInfo : public CudaForceInfo {
public:
CudaCustomBondForceInfo(const CustomBondForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
vector parameters;
force.getBondParameters(index, particle1, particle2, parameters);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
vector parameters1, parameters2;
force.getBondParameters(group1, particle1, particle2, parameters1);
force.getBondParameters(group2, particle1, particle2, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomBondForce& force;
};
CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
numBonds = endIndex-startIndex;
if (numBonds == 0)
return;
vector > atoms(numBonds, vector(2));
params = new CudaParameterSet(cu, force.getNumPerBondParameters(), numBonds, "customBondParams");
vector > paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
vector parameters;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
cu.addForce(new CudaCustomBondForceInfo(force));
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdR = "] = forceExpression;
// Create the kernels.
map variables;
variables["r"] = "r";
for (int i = 0; i < force.getNumPerBondParameters(); i++) {
const string& name = force.getPerBondParameterName(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create(force.getNumGlobalParameters(), "customBondGlobals");
globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
variables[name] = value;
}
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string argName = cu.getBondedUtilities().addArgument(buffer.getMemory(), buffer.getType());
compute< > functions;
compute << cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup());
}
double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
return 0.0;
}
void CudaCalcCustomBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomBondForce& force) {
cuCtxSetCurrent(cu.getContext());
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
if (numBonds != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of bonds has changed");
// Record the per-bond parameters.
vector > paramVector(numBonds);
vector parameters;
for (int i = 0; i < numBonds; i++) {
int atom1, atom2;
force.getBondParameters(startIndex+i, atom1, atom2, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaHarmonicAngleForceInfo : public CudaForceInfo {
public:
CudaHarmonicAngleForceInfo(const HarmonicAngleForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3;
double angle, k;
force.getAngleParameters(index, particle1, particle2, particle3, angle, k);
particles.resize(3);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3;
double angle1, angle2, k1, k2;
force.getAngleParameters(group1, particle1, particle2, particle3, angle1, k1);
force.getAngleParameters(group2, particle1, particle2, particle3, angle2, k2);
return (angle1 == angle2 && k1 == k2);
}
private:
const HarmonicAngleForce& force;
};
CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() {
if (params != NULL)
delete params;
}
void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0)
return;
vector > atoms(numAngles, vector(3));
params = CudaArray::create(numAngles, "angleParams");
vector paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
double angle, k;
force.getAngleParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], angle, k);
paramVector[i] = make_float2((float) angle, (float) k);
}
params->upload(paramVector);
map replacements;
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicAngleForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup());
cu.addForce(new CudaHarmonicAngleForceInfo(force));
}
double CudaCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force) {
cuCtxSetCurrent(cu.getContext());
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
if (numAngles != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of angles has changed");
// Record the per-angle parameters.
vector paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
int atom1, atom2, atom3;
double angle, k;
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
paramVector[i] = make_float2((float) angle, (float) k);
}
params->upload(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCustomAngleForceInfo : public CudaForceInfo {
public:
CudaCustomAngleForceInfo(const CustomAngleForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3;
vector parameters;
force.getAngleParameters(index, particle1, particle2, particle3, parameters);
particles.resize(3);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3;
vector parameters1, parameters2;
force.getAngleParameters(group1, particle1, particle2, particle3, parameters1);
force.getAngleParameters(group2, particle1, particle2, particle3, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomAngleForce& force;
};
CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0)
return;
vector > atoms(numAngles, vector(3));
params = new CudaParameterSet(cu, force.getNumPerAngleParameters(), numAngles, "customAngleParams");
vector > paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
vector parameters;
force.getAngleParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
cu.addForce(new CudaCustomAngleForceInfo(force));
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("theta").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdAngle = "] = forceExpression;
// Create the kernels.
map variables;
variables["theta"] = "theta";
for (int i = 0; i < force.getNumPerAngleParameters(); i++) {
const string& name = force.getPerAngleParameterName(i);
variables[name] = "angleParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create(force.getNumGlobalParameters(), "customAngleGlobals");
globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
variables[name] = value;
}
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string argName = cu.getBondedUtilities().addArgument(buffer.getMemory(), buffer.getType());
compute< > functions;
compute << cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup());
}
double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
return 0.0;
}
void CudaCalcCustomAngleForceKernel::copyParametersToContext(ContextImpl& context, const CustomAngleForce& force) {
cuCtxSetCurrent(cu.getContext());
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
if (numAngles != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of angles has changed");
// Record the per-angle parameters.
vector > paramVector(numAngles);
vector parameters;
for (int i = 0; i < numAngles; i++) {
int atom1, atom2, atom3;
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaPeriodicTorsionForceInfo : public CudaForceInfo {
public:
CudaPeriodicTorsionForceInfo(const PeriodicTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3, particle4, periodicity;
double phase, k;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, periodicity, phase, k);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4, periodicity1, periodicity2;
double phase1, phase2, k1, k2;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, periodicity1, phase1, k1);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, periodicity2, phase2, k2);
return (periodicity1 == periodicity2 && phase1 == phase2 && k1 == k2);
}
private:
const PeriodicTorsionForce& force;
};
CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
if (params != NULL)
delete params;
}
void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
vector > atoms(numTorsions, vector(4));
params = CudaArray::create(numTorsions, "periodicTorsionParams");
vector paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int periodicity;
double phase, k;
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
}
params->upload(paramVector);
map replacements;
replacements["COMPUTE_FORCE"] = CudaKernelSources::periodicTorsionForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
cu.addForce(new CudaPeriodicTorsionForceInfo(force));
}
double CudaCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force) {
cuCtxSetCurrent(cu.getContext());
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
if (numTorsions != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of torsions has changed");
// Record the per-torsion parameters.
vector paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int atom1, atom2, atom3, atom4, periodicity;
double phase, k;
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
}
params->upload(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaRBTorsionForceInfo : public CudaForceInfo {
public:
CudaRBTorsionForceInfo(const RBTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3, particle4;
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4;
double c0a, c0b, c1a, c1b, c2a, c2b, c3a, c3b, c4a, c4b, c5a, c5b;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, c0a, c1a, c2a, c3a, c4a, c5a);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, c0b, c1b, c2b, c3b, c4b, c5b);
return (c0a == c0b && c1a == c1b && c2a == c2b && c3a == c3b && c4a == c4b && c5a == c5b);
}
private:
const RBTorsionForce& force;
};
CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
if (params1 != NULL)
delete params1;
if (params2 != NULL)
delete params2;
}
void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
vector > atoms(numTorsions, vector(4));
params1 = CudaArray::create(numTorsions, "rbTorsionParams1");
params2 = CudaArray::create(numTorsions, "rbTorsionParams2");
vector paramVector1(numTorsions);
vector paramVector2(numTorsions);
for (int i = 0; i < numTorsions; i++) {
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], c0, c1, c2, c3, c4, c5);
paramVector1[i] = make_float4((float) c0, (float) c1, (float) c2, (float) c3);
paramVector2[i] = make_float2((float) c4, (float) c5);
}
params1->upload(paramVector1);
params2->upload(paramVector2);
map replacements;
replacements["COMPUTE_FORCE"] = CudaKernelSources::rbTorsionForce;
replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1->getDevicePointer(), "float4");
replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2->getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
cu.addForce(new CudaRBTorsionForceInfo(force));
}
double CudaCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context, const RBTorsionForce& force) {
cuCtxSetCurrent(cu.getContext());
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
if (numTorsions != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of torsions has changed");
// Record the per-torsion parameters.
vector paramVector1(numTorsions);
vector paramVector2(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int atom1, atom2, atom3, atom4;
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, c0, c1, c2, c3, c4, c5);
paramVector1[i] = make_float4((float) c0, (float) c1, (float) c2, (float) c3);
paramVector2[i] = make_float2((float) c4, (float) c5);
}
params1->upload(paramVector1);
params2->upload(paramVector2);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCMAPTorsionForceInfo : public CudaForceInfo {
public:
CudaCMAPTorsionForceInfo(const CMAPTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int map, a1, a2, a3, a4, b1, b2, b3, b4;
force.getTorsionParameters(index, map, a1, a2, a3, a4, b1, b2, b3, b4);
particles.resize(8);
particles[0] = a1;
particles[1] = a2;
particles[2] = a3;
particles[3] = a4;
particles[4] = b1;
particles[5] = b2;
particles[6] = b3;
particles[7] = b4;
}
bool areGroupsIdentical(int group1, int group2) {
int map1, map2, a1, a2, a3, a4, b1, b2, b3, b4;
force.getTorsionParameters(group1, map1, a1, a2, a3, a4, b1, b2, b3, b4);
force.getTorsionParameters(group2, map2, a1, a2, a3, a4, b1, b2, b3, b4);
return (map1 == map2);
}
private:
const CMAPTorsionForce& force;
};
CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() {
if (coefficients != NULL)
delete coefficients;
if (mapPositions != NULL)
delete mapPositions;
if (torsionMaps != NULL)
delete torsionMaps;
}
void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
int numMaps = force.getNumMaps();
vector coeffVec;
vector mapPositionsVec(numMaps);
vector energy;
vector > c;
int currentPosition = 0;
for (int i = 0; i < numMaps; i++) {
int size;
force.getMapParameters(i, size, energy);
CMAPTorsionForceImpl::calcMapDerivatives(size, energy, c);
mapPositionsVec[i] = make_int2(currentPosition, size);
currentPosition += 4*size*size;
for (int j = 0; j < size*size; j++) {
coeffVec.push_back(make_float4((float) c[j][0], (float) c[j][1], (float) c[j][2], (float) c[j][3]));
coeffVec.push_back(make_float4((float) c[j][4], (float) c[j][5], (float) c[j][6], (float) c[j][7]));
coeffVec.push_back(make_float4((float) c[j][8], (float) c[j][9], (float) c[j][10], (float) c[j][11]));
coeffVec.push_back(make_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
}
}
vector > atoms(numTorsions, vector(8));
vector torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++)
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
coefficients = CudaArray::create(coeffVec.size(), "cmapTorsionCoefficients");
mapPositions = CudaArray::create(numMaps, "cmapTorsionMapPositions");
torsionMaps = CudaArray::create(numTorsions, "cmapTorsionMaps");
coefficients->upload(coeffVec);
mapPositions->upload(mapPositionsVec);
torsionMaps->upload(torsionMapsVec);
map replacements;
replacements["COEFF"] = cu.getBondedUtilities().addArgument(coefficients->getDevicePointer(), "float4");
replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions->getDevicePointer(), "int2");
replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps->getDevicePointer(), "int");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
cu.addForce(new CudaCMAPTorsionForceInfo(force));
}
double CudaCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
class CudaCustomTorsionForceInfo : public CudaForceInfo {
public:
CudaCustomTorsionForceInfo(const CustomTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3, particle4;
vector parameters;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, parameters);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4;
vector parameters1, parameters2;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, parameters1);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomTorsionForce& force;
};
CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
}
void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
vector > atoms(numTorsions, vector(4));
params = new CudaParameterSet(cu, force.getNumPerTorsionParameters(), numTorsions, "customTorsionParams");
vector > paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
vector parameters;
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
cu.addForce(new CudaCustomTorsionForceInfo(force));
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("theta").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdAngle = "] = forceExpression;
// Create the kernels.
map variables;
variables["theta"] = "theta";
for (int i = 0; i < force.getNumPerTorsionParameters(); i++) {
const string& name = force.getPerTorsionParameterName(i);
variables[name] = "torsionParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create(force.getNumGlobalParameters(), "customTorsionGlobals");
globals->upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
variables[name] = value;
}
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string argName = cu.getBondedUtilities().addArgument(buffer.getMemory(), buffer.getType());
compute< > functions;
compute << cu.getExpressionUtilities().createExpressions(expressions, variables, functions, "temp", "");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
}
double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
return 0.0;
}
void CudaCalcCustomTorsionForceKernel::copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force) {
cuCtxSetCurrent(cu.getContext());
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
if (numTorsions != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of torsions has changed");
// Record the per-torsion parameters.
vector > paramVector(numTorsions);
vector parameters;
for (int i = 0; i < numTorsions; i++) {
int atom1, atom2, atom3, atom4;
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
//class CudaNonbondedForceInfo : public CudaForceInfo {
//public:
// CudaNonbondedForceInfo(int requiredBuffers, const NonbondedForce& force) : CudaForceInfo(requiredBuffers), force(force) {
// }
// bool areParticlesIdentical(int particle1, int particle2) {
// double charge1, charge2, sigma1, sigma2, epsilon1, epsilon2;
// force.getParticleParameters(particle1, charge1, sigma1, epsilon1);
// force.getParticleParameters(particle2, charge2, sigma2, epsilon2);
// return (charge1 == charge2 && sigma1 == sigma2 && epsilon1 == epsilon2);
// }
// int getNumParticleGroups() {
// return force.getNumExceptions();
// }
// void getParticlesInGroup(int index, vector& particles) {
// int particle1, particle2;
// double chargeProd, sigma, epsilon;
// force.getExceptionParameters(index, particle1, particle2, chargeProd, sigma, epsilon);
// particles.resize(2);
// particles[0] = particle1;
// particles[1] = particle2;
// }
// bool areGroupsIdentical(int group1, int group2) {
// int particle1, particle2;
// double chargeProd1, chargeProd2, sigma1, sigma2, epsilon1, epsilon2;
// force.getExceptionParameters(group1, particle1, particle2, chargeProd1, sigma1, epsilon1);
// force.getExceptionParameters(group2, particle1, particle2, chargeProd2, sigma2, epsilon2);
// return (chargeProd1 == chargeProd2 && sigma1 == sigma2 && epsilon1 == epsilon2);
// }
//private:
// const NonbondedForce& force;
//};
//
//CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
// if (sigmaEpsilon != NULL)
// delete sigmaEpsilon;
// if (exceptionParams != NULL)
// delete exceptionParams;
// if (cosSinSums != NULL)
// delete cosSinSums;
// if (pmeGrid != NULL)
// delete pmeGrid;
// if (pmeGrid2 != NULL)
// delete pmeGrid2;
// if (pmeBsplineModuliX != NULL)
// delete pmeBsplineModuliX;
// if (pmeBsplineModuliY != NULL)
// delete pmeBsplineModuliY;
// if (pmeBsplineModuliZ != NULL)
// delete pmeBsplineModuliZ;
// if (pmeBsplineTheta != NULL)
// delete pmeBsplineTheta;
// if (pmeBsplineDTheta != NULL)
// delete pmeBsplineDTheta;
// if (pmeAtomRange != NULL)
// delete pmeAtomRange;
// if (pmeAtomGridIndex != NULL)
// delete pmeAtomGridIndex;
// if (sort != NULL)
// delete sort;
// if (fft != NULL)
// delete fft;
//}
//
//void CudaCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
//
// // Identify which exceptions are 1-4 interactions.
//
// vector > exclusions;
// vector exceptions;
// for (int i = 0; i < force.getNumExceptions(); i++) {
// int particle1, particle2;
// double chargeProd, sigma, epsilon;
// force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
// exclusions.push_back(pair(particle1, particle2));
// if (chargeProd != 0.0 || epsilon != 0.0)
// exceptions.push_back(i);
// }
//
// // Initialize nonbonded interactions.
//
// int numParticles = force.getNumParticles();
// sigmaEpsilon = new CudaArray(cu, numParticles, "sigmaEpsilon");
// CudaArray& posq = cu.getPosq();
// vector sigmaEpsilonVector(numParticles);
// vector > exclusionList(numParticles);
// double sumSquaredCharges = 0.0;
// hasCoulomb = false;
// hasLJ = false;
// for (int i = 0; i < numParticles; i++) {
// double charge, sigma, epsilon;
// force.getParticleParameters(i, charge, sigma, epsilon);
// posq[i].w = (float) charge;
// sigmaEpsilonVector[i] = mm_float2((float) (0.5*sigma), (float) (2.0*sqrt(epsilon)));
// exclusionList[i].push_back(i);
// sumSquaredCharges += charge*charge;
// if (charge != 0.0)
// hasCoulomb = true;
// if (epsilon != 0.0)
// hasLJ = true;
// }
// for (int i = 0; i < (int) exclusions.size(); i++) {
// exclusionList[exclusions[i].first].push_back(exclusions[i].second);
// exclusionList[exclusions[i].second].push_back(exclusions[i].first);
// }
// posq.upload();
// sigmaEpsilon->upload(sigmaEpsilonVector);
// bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff);
// bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic);
// map defines;
// defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
// defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
// if (useCutoff) {
// // Compute the reaction field constants.
//
// double reactionFieldK = pow(force.getCutoffDistance(), -3.0)*(force.getReactionFieldDielectric()-1.0)/(2.0*force.getReactionFieldDielectric()+1.0);
// double reactionFieldC = (1.0 / force.getCutoffDistance())*(3.0*force.getReactionFieldDielectric())/(2.0*force.getReactionFieldDielectric()+1.0);
// defines["REACTION_FIELD_K"] = cu.doubleToString(reactionFieldK);
// defines["REACTION_FIELD_C"] = cu.doubleToString(reactionFieldC);
// }
// if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0)
// dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
// else
// dispersionCoefficient = 0.0;
// alpha = 0;
// if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
// // Compute the Ewald parameters.
//
// int kmaxx, kmaxy, kmaxz;
// NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
// defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
// defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
// defines["USE_EWALD"] = "1";
// ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
//
// // Create the reciprocal space kernels.
//
// map replacements;
// replacements["NUM_ATOMS"] = cu.intToString(numParticles);
// replacements["KMAX_X"] = cu.intToString(kmaxx);
// replacements["KMAX_Y"] = cu.intToString(kmaxy);
// replacements["KMAX_Z"] = cu.intToString(kmaxz);
// replacements["EXP_COEFFICIENT"] = cu.doubleToString(-1.0/(4.0*alpha*alpha));
// cu::Program program = cu.createProgram(CudaKernelSources::ewald, replacements);
// ewaldSumsKernel = cu::Kernel(program, "calculateEwaldCosSinSums");
// ewaldForcesKernel = cu::Kernel(program, "calculateEwaldForces");
// cosSinSums = new CudaArray(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), "cosSinSums");
// }
// else if (force.getNonbondedMethod() == NonbondedForce::PME) {
// // Compute the PME parameters.
//
// int gridSizeX, gridSizeY, gridSizeZ;
// NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
// gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
// gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
// gridSizeZ = CudaFFT3D::findLegalDimension(gridSizeZ);
// defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
// defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
// defines["USE_EWALD"] = "1";
// ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
// pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder);
// pmeDefines["NUM_ATOMS"] = cu.intToString(numParticles);
// pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(alpha*alpha));
// pmeDefines["GRID_SIZE_X"] = cu.intToString(gridSizeX);
// pmeDefines["GRID_SIZE_Y"] = cu.intToString(gridSizeY);
// pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
// pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(sqrt(ONE_4PI_EPS0));
//
// // Create required data structures.
//
// pmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, "pmeGrid");
// cu.addAutoclearBuffer(pmeGrid->getDevicePointer(), pmeGrid->getSize()*2);
// pmeGrid2 = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, "pmeGrid2");
// pmeBsplineModuliX = new CudaArray(cu, gridSizeX, "pmeBsplineModuliX");
// pmeBsplineModuliY = new CudaArray(cu, gridSizeY, "pmeBsplineModuliY");
// pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, "pmeBsplineModuliZ");
// pmeBsplineTheta = new CudaArray(cu, PmeOrder*numParticles, "pmeBsplineTheta");
// bool deviceIsCpu = (cu.getDevice().getInfo() == CL_DEVICE_TYPE_CPU);
// if (deviceIsCpu)
// pmeBsplineDTheta = new CudaArray(cu, PmeOrder*numParticles, "pmeBsplineDTheta");
// pmeAtomRange = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
// pmeAtomGridIndex = new CudaArray(cu, numParticles, "pmeAtomGridIndex");
// sort = new CudaSort(cu, cu.getNumAtoms());
// fft = new CudaFFT3D(cu, gridSizeX, gridSizeY, gridSizeZ);
//
// // Initialize the b-spline moduli.
//
// int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
// vector data(PmeOrder);
// vector ddata(PmeOrder);
// vector bsplines_data(maxSize);
// data[PmeOrder-1] = 0.0;
// data[1] = 0.0;
// data[0] = 1.0;
// for (int i = 3; i < PmeOrder; i++) {
// double div = 1.0/(i-1.0);
// data[i-1] = 0.0;
// for (int j = 1; j < (i-1); j++)
// data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
// data[0] = div*data[0];
// }
//
// // Differentiate.
//
// ddata[0] = -data[0];
// for (int i = 1; i < PmeOrder; i++)
// ddata[i] = data[i-1]-data[i];
// double div = 1.0/(PmeOrder-1);
// data[PmeOrder-1] = 0.0;
// for (int i = 1; i < (PmeOrder-1); i++)
// data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
// data[0] = div*data[0];
// for (int i = 0; i < maxSize; i++)
// bsplines_data[i] = 0.0;
// for (int i = 1; i <= PmeOrder; i++)
// bsplines_data[i] = data[i-1];
//
// // Evaluate the actual bspline moduli for X/Y/Z.
//
// for(int dim = 0; dim < 3; dim++) {
// int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
// vector moduli(ndata);
// for (int i = 0; i < ndata; i++) {
// double sc = 0.0;
// double ss = 0.0;
// for (int j = 0; j < ndata; j++) {
// double arg = (2.0*M_PI*i*j)/ndata;
// sc += bsplines_data[j]*cos(arg);
// ss += bsplines_data[j]*sin(arg);
// }
// moduli[i] = (float) (sc*sc+ss*ss);
// }
// for (int i = 0; i < ndata; i++)
// {
// if (moduli[i] < 1.0e-7)
// moduli[i] = (moduli[i-1]+moduli[i+1])*0.5f;
// }
// if (dim == 0)
// pmeBsplineModuliX->upload(moduli);
// else if (dim == 1)
// pmeBsplineModuliY->upload(moduli);
// else
// pmeBsplineModuliZ->upload(moduli);
// }
// }
// else
// ewaldSelfEnergy = 0.0;
//
// // Add the interaction to the default nonbonded kernel.
//
// string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
// cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
// if (hasLJ)
// cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(cl_float2), sigmaEpsilon->getDevicePointer()));
//
// // Initialize the exceptions.
//
// int numContexts = cu.getPlatformData().contexts.size();
// int startIndex = cu.getContextIndex()*exceptions.size()/numContexts;
// int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
// int numExceptions = endIndex-startIndex;
// if (numExceptions > 0) {
// exceptionAtoms.resize(numExceptions);
// vector > atoms(numExceptions, vector(2));
// exceptionParams = new CudaArray(cu, numExceptions, "exceptionParams");
// vector exceptionParamsVector(numExceptions);
// for (int i = 0; i < numExceptions; i++) {
// double chargeProd, sigma, epsilon;
// force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
// exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
// exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
// }
// exceptionParams->upload(exceptionParamsVector);
// map replacements;
// replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams->getDevicePointer(), "float4");
// cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
// }
// cu.addForce(new CudaNonbondedForceInfo(cu.getNonbondedUtilities().getNumForceBuffers(), force));
//}
//
//double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
// bool deviceIsCpu = (cu.getDevice().getInfo() == CL_DEVICE_TYPE_CPU);
// if (!hasInitializedKernel) {
// hasInitializedKernel = true;
// if (cosSinSums != NULL) {
// ewaldSumsKernel.setArg(0, cu.getEnergyBuffer().getDevicePointer());
// ewaldSumsKernel.setArg(1, cu.getPosq().getDevicePointer());
// ewaldSumsKernel.setArg(2, cosSinSums->getDevicePointer());
// ewaldForcesKernel.setArg(0, cu.getForceBuffers().getDevicePointer());
// ewaldForcesKernel.setArg(1, cu.getPosq().getDevicePointer());
// ewaldForcesKernel.setArg(2, cosSinSums->getDevicePointer());
// }
// if (pmeGrid != NULL) {
// string file = (deviceIsCpu ? CudaKernelSources::pme_cpu : CudaKernelSources::pme);
// cu::Program program = cu.createProgram(file, pmeDefines);
// pmeUpdateBsplinesKernel = cu::Kernel(program, "updateBsplines");
// pmeAtomRangeKernel = cu::Kernel(program, "findAtomRangeForGrid");
// if (!deviceIsCpu)
// pmeZIndexKernel = cu::Kernel(program, "recordZIndex");
// pmeSpreadChargeKernel = cu::Kernel(program, "gridSpreadCharge");
// pmeConvolutionKernel = cu::Kernel(program, "reciprocalConvolution");
// pmeInterpolateForceKernel = cu::Kernel(program, "gridInterpolateForce");
// pmeUpdateBsplinesKernel.setArg(0, cu.getPosq().getDevicePointer());
// pmeUpdateBsplinesKernel.setArg(1, pmeBsplineTheta->getDevicePointer());
// pmeUpdateBsplinesKernel.setArg(2, CudaContext::ThreadBlockSize*PmeOrder*sizeof(mm_float4), NULL);
// pmeUpdateBsplinesKernel.setArg(3, pmeAtomGridIndex->getDevicePointer());
// if (deviceIsCpu)
// pmeUpdateBsplinesKernel.setArg(6, pmeBsplineDTheta->getDevicePointer());
// pmeAtomRangeKernel.setArg(0, pmeAtomGridIndex->getDevicePointer());
// pmeAtomRangeKernel.setArg(1, pmeAtomRange->getDevicePointer());
// pmeAtomRangeKernel.setArg(2, cu.getPosq().getDevicePointer());
// if (!deviceIsCpu) {
// pmeZIndexKernel.setArg(0, pmeAtomGridIndex->getDevicePointer());
// pmeZIndexKernel.setArg(1, cu.getPosq().getDevicePointer());
// }
// pmeSpreadChargeKernel.setArg(0, cu.getPosq().getDevicePointer());
// pmeSpreadChargeKernel.setArg(1, pmeAtomGridIndex->getDevicePointer());
// pmeSpreadChargeKernel.setArg(2, pmeAtomRange->getDevicePointer());
// pmeSpreadChargeKernel.setArg(3, pmeGrid->getDevicePointer());
// pmeSpreadChargeKernel.setArg(4, pmeBsplineTheta->getDevicePointer());
// pmeConvolutionKernel.setArg(0, pmeGrid2->getDevicePointer());
// pmeConvolutionKernel.setArg(1, cu.getEnergyBuffer().getDevicePointer());
// pmeConvolutionKernel.setArg(2, pmeBsplineModuliX->getDevicePointer());
// pmeConvolutionKernel.setArg(3, pmeBsplineModuliY->getDevicePointer());
// pmeConvolutionKernel.setArg(4, pmeBsplineModuliZ->getDevicePointer());
// interpolateForceThreads = (cu.getDevice().getInfo() > 2*128*PmeOrder*sizeof(mm_float4) ? 128 : 64);
// pmeInterpolateForceKernel.setArg(0, cu.getPosq().getDevicePointer());
// pmeInterpolateForceKernel.setArg(1, cu.getForceBuffers().getDevicePointer());
// pmeInterpolateForceKernel.setArg(2, pmeGrid->getDevicePointer());
// if (deviceIsCpu) {
// pmeInterpolateForceKernel.setArg(5, pmeBsplineTheta->getDevicePointer());
// pmeInterpolateForceKernel.setArg(6, pmeBsplineDTheta->getDevicePointer());
// }
// else
// pmeInterpolateForceKernel.setArg(5, 2*interpolateForceThreads*PmeOrder*sizeof(mm_float4), NULL);
// if (cu.getSupports64BitGlobalAtomics()) {
// pmeFinishSpreadChargeKernel = cu::Kernel(program, "finishSpreadCharge");
// pmeFinishSpreadChargeKernel.setArg(0, pmeGrid->getDevicePointer());
// }
// }
// }
// if (cosSinSums != NULL && cu.getContextIndex() == 0 && includeReciprocal) {
// mm_float4 boxSize = cu.getPeriodicBoxSize();
// mm_float4 recipBoxSize = mm_float4((float) (2*M_PI/boxSize.x), (float) (2*M_PI/boxSize.y), (float) (2*M_PI/boxSize.z), 0);
// float recipCoefficient = (float) (ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z));
// ewaldSumsKernel.setArg(3, recipBoxSize);
// ewaldSumsKernel.setArg(4, recipCoefficient);
// cu.executeKernel(ewaldSumsKernel, cosSinSums->getSize());
// ewaldForcesKernel.setArg(3, recipBoxSize);
// ewaldForcesKernel.setArg(4, recipCoefficient);
// cu.executeKernel(ewaldForcesKernel, cu.getNumAtoms());
// }
// if (pmeGrid != NULL && cu.getContextIndex() == 0 && includeReciprocal) {
// mm_float4 boxSize = cu.getPeriodicBoxSize();
// mm_float4 invBoxSize = cu.getInvPeriodicBoxSize();
// pmeUpdateBsplinesKernel.setArg(4, boxSize);
// pmeUpdateBsplinesKernel.setArg(5, invBoxSize);
// cu.executeKernel(pmeUpdateBsplinesKernel, cu.getNumAtoms());
// if (deviceIsCpu) {
// pmeSpreadChargeKernel.setArg(5, boxSize);
// pmeSpreadChargeKernel.setArg(6, invBoxSize);
// cu.executeKernel(pmeSpreadChargeKernel, 2*cu.getDevice().getInfo(), 1);
// }
// else {
// sort->sort(*pmeAtomGridIndex);
// pmeAtomRangeKernel.setArg(3, boxSize);
// pmeAtomRangeKernel.setArg(4, invBoxSize);
// cu.executeKernel(pmeAtomRangeKernel, cu.getNumAtoms());
// if (cu.getSupports64BitGlobalAtomics()) {
// pmeSpreadChargeKernel.setArg(5, boxSize);
// pmeSpreadChargeKernel.setArg(6, invBoxSize);
// cu.executeKernel(pmeSpreadChargeKernel, cu.getNumAtoms(), PmeOrder*PmeOrder*PmeOrder);
// cu.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize());
// }
// else {
// pmeZIndexKernel.setArg(2, boxSize);
// pmeZIndexKernel.setArg(3, invBoxSize);
// cu.executeKernel(pmeZIndexKernel, cu.getNumAtoms());
// cu.executeKernel(pmeSpreadChargeKernel, cu.getNumAtoms());
// }
// }
// fft->execFFT(*pmeGrid, *pmeGrid2, true);
// pmeConvolutionKernel.setArg(5, invBoxSize);
// pmeConvolutionKernel.setArg(6, (float) (1.0/(M_PI*boxSize.x*boxSize.y*boxSize.z)));
// cu.executeKernel(pmeConvolutionKernel, cu.getNumAtoms());
// fft->execFFT(*pmeGrid2, *pmeGrid, false);
// pmeInterpolateForceKernel.setArg(3, boxSize);
// pmeInterpolateForceKernel.setArg(4, invBoxSize);
// cu.executeKernel(pmeInterpolateForceKernel, cu.getNumAtoms(), interpolateForceThreads);
// }
// double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
// if (dispersionCoefficient != 0.0 && includeDirect) {
// mm_float4 boxSize = cu.getPeriodicBoxSize();
// energy += dispersionCoefficient/(boxSize.x*boxSize.y*boxSize.z);
// }
// return energy;
//}
//
//void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force) {
// // Make sure the new parameters are acceptable.
//
// cuCtxSetCurrent(cu.getContext());
// if (force.getNumParticles() != cu.getNumAtoms())
// throw OpenMMException("updateParametersInContext: The number of particles has changed");
// if (!hasCoulomb || !hasLJ) {
// for (int i = 0; i < force.getNumParticles(); i++) {
// double charge, sigma, epsilon;
// force.getParticleParameters(i, charge, sigma, epsilon);
// if (!hasCoulomb && charge != 0.0)
// throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Coulomb interactions, because all charges were originally 0");
// if (!hasLJ && epsilon != 0.0)
// throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Lennard-Jones interactions, because all epsilons were originally 0");
// }
// }
// vector exceptions;
// for (int i = 0; i < force.getNumExceptions(); i++) {
// int particle1, particle2;
// double chargeProd, sigma, epsilon;
// force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
// if (exceptionAtoms.size() > exceptions.size() && make_pair(particle1, particle2) == exceptionAtoms[exceptions.size()])
// exceptions.push_back(i);
// else if (chargeProd != 0.0 || epsilon != 0.0)
// throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
// }
// int numContexts = cu.getPlatformData().contexts.size();
// int startIndex = cu.getContextIndex()*exceptions.size()/numContexts;
// int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
// int numExceptions = endIndex-startIndex;
//
// // Record the per-particle parameters.
//
// CudaArray& posq = cu.getPosq();
// posq.download();
// vector sigmaEpsilonVector(force.getNumParticles());
// double sumSquaredCharges = 0.0;
// CudaArray& order = cu.getAtomIndex();
// for (int i = 0; i < force.getNumParticles(); i++) {
// int index = order[i];
// double charge, sigma, epsilon;
// force.getParticleParameters(index, charge, sigma, epsilon);
// posq[i].w = (float) charge;
// sigmaEpsilonVector[index] = mm_float2((float) (0.5*sigma), (float) (2.0*sqrt(epsilon)));
// sumSquaredCharges += charge*charge;
// }
// posq.upload();
// sigmaEpsilon->upload(sigmaEpsilonVector);
//
// // Record the exceptions.
//
// if (numExceptions > 0) {
// vector > atoms(numExceptions, vector(2));
// vector exceptionParamsVector(numExceptions);
// for (int i = 0; i < numExceptions; i++) {
// double chargeProd, sigma, epsilon;
// force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
// exceptionParamsVector[i] = mm_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
// }
// exceptionParams->upload(exceptionParamsVector);
// }
//
// // Compute other values.
//
// NonbondedForce::NonbondedMethod method = force.getNonbondedMethod();
// if (method == NonbondedForce::Ewald || method == NonbondedForce::PME)
// ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
// if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (method == NonbondedForce::CutoffPeriodic || method == NonbondedForce::Ewald || method == NonbondedForce::PME))
// dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
// cu.invalidateMolecules();
//}
//
//class CudaCustomNonbondedForceInfo : public CudaForceInfo {
//public:
// CudaCustomNonbondedForceInfo(int requiredBuffers, const CustomNonbondedForce& force) : CudaForceInfo(requiredBuffers), force(force) {
// }
// bool areParticlesIdentical(int particle1, int particle2) {
// vector params1;
// vector params2;
// force.getParticleParameters(particle1, params1);
// force.getParticleParameters(particle2, params2);
// for (int i = 0; i < (int) params1.size(); i++)
// if (params1[i] != params2[i])
// return false;
// return true;
// }
// int getNumParticleGroups() {
// return force.getNumExclusions();
// }
// void getParticlesInGroup(int index, vector& particles) {
// int particle1, particle2;
// force.getExclusionParticles(index, particle1, particle2);
// particles.resize(2);
// particles[0] = particle1;
// particles[1] = particle2;
// }
// bool areGroupsIdentical(int group1, int group2) {
// return true;
// }
//private:
// const CustomNonbondedForce& force;
//};
//
//CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
// if (params != NULL)
// delete params;
// if (globals != NULL)
// delete globals;
// if (tabulatedFunctionParams != NULL)
// delete tabulatedFunctionParams;
// for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
// delete tabulatedFunctions[i];
//}
//
//void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
// int forceIndex;
// for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
// ;
// string prefix = "custom"+cu.intToString(forceIndex)+"_";
//
// // Record parameters and exclusions.
//
// int numParticles = force.getNumParticles();
// params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
// if (force.getNumGlobalParameters() > 0)
// globals = new CudaArray(cu, force.getNumGlobalParameters(), "customNonbondedGlobals", false, CL_MEM_READ_ONLY);
// vector > paramVector(numParticles);
// vector > exclusionList(numParticles);
// for (int i = 0; i < numParticles; i++) {
// vector parameters;
// force.getParticleParameters(i, parameters);
// paramVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// paramVector[i][j] = (cl_float) parameters[j];
// exclusionList[i].push_back(i);
// }
// for (int i = 0; i < force.getNumExclusions(); i++) {
// int particle1, particle2;
// force.getExclusionParticles(i, particle1, particle2);
// exclusionList[particle1].push_back(particle2);
// exclusionList[particle2].push_back(particle1);
// }
// params->setParameterValues(paramVector);
//
// // Record the tabulated functions.
//
// CudaExpressionUtilities::FunctionPlaceholder fp;
// map functions;
// vector > functionDefinitions;
// vector tabulatedFunctionParamsVec(force.getNumFunctions());
// for (int i = 0; i < force.getNumFunctions(); i++) {
// string name;
// vector values;
// double min, max;
// force.getFunctionParameters(i, name, values, min, max);
// string arrayName = prefix+"table"+cu.intToString(i);
// functionDefinitions.push_back(make_pair(name, arrayName));
// functions[name] = &fp;
// tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
// vector f = cu.getExpressionUtilities().computeFunctionCoefficients(values, min, max);
// tabulatedFunctions.push_back(new CudaArray(cu, values.size()-1, "TabulatedFunction"));
// tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer()));
// }
// if (force.getNumFunctions() > 0) {
// tabulatedFunctionParams = new CudaArray(cu, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
// tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(cl_float4), tabulatedFunctionParams->getDevicePointer()));
// }
//
// // Record information for the expressions.
//
// globalParamNames.resize(force.getNumGlobalParameters());
// globalParamValues.resize(force.getNumGlobalParameters());
// for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// globalParamNames[i] = force.getGlobalParameterName(i);
// globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
// }
// if (globals != NULL)
// globals->upload(globalParamValues);
// bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
// bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
// Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
// Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
// map forceExpressions;
// forceExpressions["tempEnergy += "] = energyExpression;
// forceExpressions["tempForce -= "] = forceExpression;
//
// // Create the kernels.
//
// vector > variables;
// ExpressionTreeNode rnode(new Operation::Variable("r"));
// variables.push_back(make_pair(rnode, "r"));
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2"));
// variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR"));
// for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
// const string& name = force.getPerParticleParameterName(i);
// variables.push_back(makeVariable(name+"1", prefix+"params"+params->getParameterSuffix(i, "1")));
// variables.push_back(makeVariable(name+"2", prefix+"params"+params->getParameterSuffix(i, "2")));
// }
// for (int i = 0; i < force.getNumGlobalParameters(); i++) {
// const string& name = force.getGlobalParameterName(i);
// string value = "globals["+cu.intToString(i)+"]";
// variables.push_back(makeVariable(name, prefix+value));
// }
// stringstream compute;
// compute << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionDefinitions, prefix+"temp", prefix+"functionParams");
// map replacements;
// replacements["COMPUTE_FORCE"] = compute.str();
// string source = cu.replaceStrings(CudaKernelSources::customNonbonded, replacements);
// cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
// for (int i = 0; i < (int) params->getBuffers().size(); i++) {
// const CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
// cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
// }
// if (globals != NULL) {
// globals->upload(globalParamValues);
// cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDevicePointer()));
// }
// cu.addForce(new CudaCustomNonbondedForceInfo(cu.getNonbondedUtilities().getNumForceBuffers(), force));
//}
//
//double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// if (globals != NULL) {
// bool changed = false;
// for (int i = 0; i < (int) globalParamNames.size(); i++) {
// cl_float value = (cl_float) context.getParameter(globalParamNames[i]);
// if (value != globalParamValues[i])
// changed = true;
// globalParamValues[i] = value;
// }
// if (changed)
// globals->upload(globalParamValues);
// }
// return 0.0;
//}
//
//void CudaCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force) {
// cuCtxSetCurrent(cu.getContext());
// int numParticles = force.getNumParticles();
// if (numParticles != cu.getNumAtoms())
// throw OpenMMException("updateParametersInContext: The number of particles has changed");
//
// // Record the per-particle parameters.
//
// vector > paramVector(numParticles);
// vector parameters;
// for (int i = 0; i < numParticles; i++) {
// force.getParticleParameters(i, parameters);
// paramVector[i].resize(parameters.size());
// for (int j = 0; j < (int) parameters.size(); j++)
// paramVector[i][j] = (cl_float) parameters[j];
// }
// params->setParameterValues(paramVector);
//
// // Mark that the current reordering may be invalid.
//
// cu.invalidateMolecules();
//}
//
//class CudaGBSAOBCForceInfo : public CudaForceInfo {
//public:
// CudaGBSAOBCForceInfo(int requiredBuffers, const GBSAOBCForce& force) : CudaForceInfo(requiredBuffers), force(force) {
// }
// bool areParticlesIdentical(int particle1, int particle2) {
// double charge1, charge2, radius1, radius2, scale1, scale2;
// force.getParticleParameters(particle1, charge1, radius1, scale1);
// force.getParticleParameters(particle2, charge2, radius2, scale2);
// return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
// }
//private:
// const GBSAOBCForce& force;
//};
//
//CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
// if (params != NULL)
// delete params;
// if (bornSum != NULL)
// delete bornSum;
// if (longBornSum != NULL)
// delete longBornSum;
// if (bornRadii != NULL)
// delete bornRadii;
// if (bornForce != NULL)
// delete bornForce;
// if (longBornForce != NULL)
// delete longBornForce;
// if (obcChain != NULL)
// delete obcChain;
//}
//
//void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
// if (cu.getPlatformData().contexts.size() > 1)
// throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// params = new CudaArray(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
// bornRadii = new CudaArray(cu, cu.getPaddedNumAtoms(), "bornRadii");
// obcChain = new CudaArray(cu, cu.getPaddedNumAtoms(), "obcChain");
// if (cu.getSupports64BitGlobalAtomics()) {
// longBornSum = new CudaArray(cu, cu.getPaddedNumAtoms(), "longBornSum");
// longBornForce = new CudaArray(cu, cu.getPaddedNumAtoms(), "longBornForce");
// bornForce = new CudaArray(cu, cu.getPaddedNumAtoms(), "bornForce");
// cu.addAutoclearBuffer(longBornSum->getDevicePointer(), 2*longBornSum->getSize());
// cu.addAutoclearBuffer(longBornForce->getDevicePointer(), 2*longBornForce->getSize());
// }
// else {
// bornSum = new CudaArray(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornSum");
// bornForce = new CudaArray(cu, cu.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornForce");
// cu.addAutoclearBuffer(bornSum->getDevicePointer(), bornSum->getSize());
// cu.addAutoclearBuffer(bornForce->getDevicePointer(), bornForce->getSize());
// }
// CudaArray& posq = cu.getPosq();
// int numParticles = force.getNumParticles();
// vector paramsVector(numParticles);
// const double dielectricOffset = 0.009;
// for (int i = 0; i < numParticles; i++) {
// double charge, radius, scalingFactor;
// force.getParticleParameters(i, charge, radius, scalingFactor);
// radius -= dielectricOffset;
// paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
// posq[i].w = (float) charge;
// }
// posq.upload();
// params->upload(paramsVector);
// prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
// bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
// bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
// string source = CudaKernelSources::gbsaObc2;
// nb.addInteraction(useCutoff, usePeriodic, false, force.getCutoffDistance(), vector >(), source, force.getForceGroup());
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params->getDevicePointer()));;
// nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "float", 1, sizeof(cl_float), bornForce->getDevicePointer()));;
// cu.addForce(new CudaGBSAOBCForceInfo(nb.getNumForceBuffers(), force));
//}
//
//double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// bool deviceIsCpu = (cu.getDevice().getInfo() == CL_DEVICE_TYPE_CPU);
// if (!hasCreatedKernels) {
// // These Kernels cannot be created in initialize(), because the CudaNonbondedUtilities has not been initialized yet then.
//
// hasCreatedKernels = true;
// maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0);
// map defines;
// if (nb.getForceBufferPerAtomBlock())
// defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
// if (nb.getUseCutoff())
// defines["USE_CUTOFF"] = "1";
// if (nb.getUsePeriodic())
// defines["USE_PERIODIC"] = "1";
// defines["CUTOFF_SQUARED"] = cu.doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance());
// defines["PREFACTOR"] = cu.doubleToString(prefactor);
// defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
// defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
// defines["FORCE_WORK_GROUP_SIZE"] = cu.intToString(nb.getForceThreadBlockSize());
// string platformVendor = cu::Platform(cu.getDevice().getInfo()).getInfo();
// if (platformVendor == "Apple")
// defines["USE_APPLE_WORKAROUND"] = "1";
// string file;
// if (deviceIsCpu)
// file = CudaKernelSources::gbsaObc_cpu;
// else if (cu.getSIMDWidth() == 32)
// file = CudaKernelSources::gbsaObc_nvidia;
// else
// file = CudaKernelSources::gbsaObc_default;
// cu::Program program = cu.createProgram(file, defines);
// bool useLong = (cu.getSupports64BitGlobalAtomics() && !deviceIsCpu);
// int index = 0;
// computeBornSumKernel = cu::Kernel(program, "computeBornSum");
// computeBornSumKernel.setArg(index++, (useLong ? longBornSum->getDevicePointer() : bornSum->getDevicePointer()));
// computeBornSumKernel.setArg(index++, cu.getPosq().getDevicePointer());
// computeBornSumKernel.setArg(index++, params->getDevicePointer());
// if (nb.getUseCutoff()) {
// computeBornSumKernel.setArg(index++, nb.getInteractingTiles().getDevicePointer());
// computeBornSumKernel.setArg(index++, nb.getInteractionCount().getDevicePointer());
// index += 2; // The periodic box size arguments are set when the kernel is executed.
// computeBornSumKernel.setArg(index++, maxTiles);
// if (cu.getSIMDWidth() == 32 || deviceIsCpu)
// computeBornSumKernel.setArg(index++, nb.getInteractionFlags().getDevicePointer());
// }
// else
// computeBornSumKernel.setArg(index++, cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2);
// if (cu.getSIMDWidth() == 32) {
// computeBornSumKernel.setArg(index++, nb.getExclusionIndices().getDevicePointer());
// computeBornSumKernel.setArg(index++, nb.getExclusionRowIndices().getDevicePointer());
// }
// force1Kernel = cu::Kernel(program, "computeGBSAForce1");
// index = 0;
// force1Kernel.setArg(index++, (useLong ? cu.getLongForceBuffer().getDevicePointer() : cu.getForceBuffers().getDevicePointer()));
// force1Kernel.setArg(index++, (useLong ? longBornForce->getDevicePointer() : bornForce->getDevicePointer()));
// force1Kernel.setArg(index++, cu.getEnergyBuffer().getDevicePointer());
// force1Kernel.setArg(index++, cu.getPosq().getDevicePointer());
// force1Kernel.setArg(index++, bornRadii->getDevicePointer());
// if (nb.getUseCutoff()) {
// force1Kernel.setArg(index++, nb.getInteractingTiles().getDevicePointer());
// force1Kernel.setArg(index++, nb.getInteractionCount().getDevicePointer());
// index += 2; // The periodic box size arguments are set when the kernel is executed.
// force1Kernel.setArg(index++, maxTiles);
// if (cu.getSIMDWidth() == 32 || deviceIsCpu)
// force1Kernel.setArg(index++, nb.getInteractionFlags().getDevicePointer());
// }
// else
// force1Kernel.setArg(index++, cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2);
// if (cu.getSIMDWidth() == 32) {
// force1Kernel.setArg(index++, nb.getExclusionIndices().getDevicePointer());
// force1Kernel.setArg(index++, nb.getExclusionRowIndices().getDevicePointer());
// }
// program = cu.createProgram(CudaKernelSources::gbsaObcReductions, defines);
// reduceBornSumKernel = cu::Kernel(program, "reduceBornSum");
// reduceBornSumKernel.setArg(0, cu.getPaddedNumAtoms());
// reduceBornSumKernel.setArg(1, nb.getNumForceBuffers());
// reduceBornSumKernel.setArg(2, 1.0f);
// reduceBornSumKernel.setArg