/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "CudaKernels.h"
#include "CudaForceInfo.h"
#include "openmm/LangevinIntegrator.h"
#include "openmm/Context.h"
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/CMAPTorsionForceImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomCentroidBondForceImpl.h"
#include "openmm/internal/CustomCompoundBondForceImpl.h"
#include "openmm/internal/CustomHbondForceImpl.h"
#include "openmm/internal/CustomManyParticleForceImpl.h"
#include "openmm/internal/CustomNonbondedForceImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/internal/OSRngSeed.h"
#include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h"
#include "CudaIntegrationUtilities.h"
#include "CudaNonbondedUtilities.h"
#include "CudaKernelSources.h"
#include "lepton/CustomFunction.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include "ReferenceTabulatedFunction.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h"
#include "jama_eig.h"
#include
#include
#include
#include
using namespace OpenMM;
using namespace std;
using namespace Lepton;
#define CHECK_RESULT(result, prefix) \
if (result != CUDA_SUCCESS) { \
std::stringstream m; \
m<(op).getValue() == 0.0);
}
static bool usesVariable(const Lepton::ExpressionTreeNode& node, const string& variable) {
const Lepton::Operation& op = node.getOperation();
if (op.getId() == Lepton::Operation::VARIABLE && op.getName() == variable)
return true;
for (auto& child : node.getChildren())
if (usesVariable(child, variable))
return true;
return false;
}
static bool usesVariable(const Lepton::ParsedExpression& expression, const string& variable) {
return usesVariable(expression.getRootNode(), variable);
}
static pair makeVariable(const string& name, const string& value) {
return make_pair(ExpressionTreeNode(new Operation::Variable(name)), value);
}
static void replaceFunctionsInExpression(map& functions, ExpressionProgram& expression) {
for (int i = 0; i < expression.getNumOperations(); i++) {
if (expression.getOperation(i).getId() == Operation::CUSTOM) {
const Operation::Custom& op = dynamic_cast(expression.getOperation(i));
expression.setOperation(i, new Operation::Custom(op.getName(), functions[op.getName()]->clone(), op.getDerivOrder()));
}
}
}
void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
}
void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
cu.setForcesValid(true);
cu.setAsCurrent();
cu.clearAutoclearBuffers();
for (auto computation : cu.getPreComputations())
computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
cu.setComputeForceCount(cu.getComputeForceCount()+1);
nb.prepareInteractions(groups);
map& derivs = cu.getEnergyParamDerivWorkspace();
for (auto& param : context.getParameters())
derivs[param.first] = 0;
}
double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cu.getBondedUtilities().computeInteractions(groups);
cu.getNonbondedUtilities().computeInteractions(groups, includeForces, includeEnergy);
double sum = 0.0;
for (auto computation : cu.getPostComputations())
sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
cu.getIntegrationUtilities().distributeForcesFromVirtualSites();
if (includeEnergy)
sum += cu.reduceEnergy();
if (!cu.getForcesValid())
valid = false;
return sum;
}
void CudaUpdateStateDataKernel::initialize(const System& system) {
}
double CudaUpdateStateDataKernel::getTime(const ContextImpl& context) const {
return cu.getTime();
}
void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
vector& contexts = cu.getPlatformData().contexts;
for (auto ctx : contexts)
ctx->setTime(time);
}
void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector& positions) {
cu.setAsCurrent();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
vector posCorrection;
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
}
else if (cu.getUseMixedPrecision()) {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq, false);
posCorrection.resize(numParticles);
cu.getPosqCorrection().download(posCorrection);
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
}
// Filling in the output array is done in parallel for speed.
cu.getPlatformData().threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Compute the position of each particle to return to the user. This is done in parallel for speed.
const vector& order = cu.getAtomIndex();
int numParticles = cu.getNumAtoms();
Vec3 boxVectors[3];
cu.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
for (int i = start; i < end; ++i) {
double4 pos = posq[i];
int4 offset = cu.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
else if (cu.getUseMixedPrecision()) {
float4* posq = (float4*) cu.getPinnedBuffer();
for (int i = start; i < end; ++i) {
float4 pos1 = posq[i];
float4 pos2 = posCorrection[i];
int4 offset = cu.getPosCellOffsets()[i];
positions[order[i]] = Vec3((double)pos1.x+(double)pos2.x, (double)pos1.y+(double)pos2.y, (double)pos1.z+(double)pos2.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
for (int i = start; i < end; ++i) {
float4 pos = posq[i];
int4 offset = cu.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
});
cu.getPlatformData().threads.waitForThreads();
}
void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const vector& positions) {
cu.setAsCurrent();
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
double4& pos = posq[i];
const Vec3& p = positions[order[i]];
pos.x = p[0];
pos.y = p[1];
pos.z = p[2];
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
posq[i] = make_double4(0.0, 0.0, 0.0, 0.0);
cu.getPosq().upload(posq);
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
float4& pos = posq[i];
const Vec3& p = positions[order[i]];
pos.x = (float) p[0];
pos.y = (float) p[1];
pos.z = (float) p[2];
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
posq[i] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
cu.getPosq().upload(posq);
}
if (cu.getUseMixedPrecision()) {
float4* posCorrection = (float4*) cu.getPinnedBuffer();
for (int i = 0; i < numParticles; ++i) {
float4& c = posCorrection[i];
const Vec3& p = positions[order[i]];
c.x = (float) (p[0]-(float)p[0]);
c.y = (float) (p[1]-(float)p[1]);
c.z = (float) (p[2]-(float)p[2]);
c.w = 0;
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
posCorrection[i] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
cu.getPosqCorrection().upload(posCorrection);
}
for (auto& offset : cu.getPosCellOffsets())
offset = make_int4(0, 0, 0, 0);
cu.reorderAtoms();
}
void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, vector& velocities) {
cu.setAsCurrent();
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
velocities.resize(numParticles);
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
double4* velm = (double4*) cu.getPinnedBuffer();
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
double4 vel = velm[i];
int4 offset = cu.getPosCellOffsets()[i];
velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
}
}
else {
float4* velm = (float4*) cu.getPinnedBuffer();
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
float4 vel = velm[i];
int4 offset = cu.getPosCellOffsets()[i];
velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
}
}
}
void CudaUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector& velocities) {
cu.setAsCurrent();
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
double4* velm = (double4*) cu.getPinnedBuffer();
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
double4& vel = velm[i];
const Vec3& p = velocities[order[i]];
vel.x = p[0];
vel.y = p[1];
vel.z = p[2];
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
velm[i] = make_double4(0.0, 0.0, 0.0, 0.0);
cu.getVelm().upload(velm);
}
else {
float4* velm = (float4*) cu.getPinnedBuffer();
cu.getVelm().download(velm);
for (int i = 0; i < numParticles; ++i) {
float4& vel = velm[i];
const Vec3& p = velocities[order[i]];
vel.x = p[0];
vel.y = p[1];
vel.z = p[2];
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
velm[i] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
cu.getVelm().upload(velm);
}
}
void CudaUpdateStateDataKernel::getForces(ContextImpl& context, vector& forces) {
cu.setAsCurrent();
long long* force = (long long*) cu.getPinnedBuffer();
cu.getForce().download(force);
const vector& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
int paddedNumParticles = cu.getPaddedNumAtoms();
forces.resize(numParticles);
double scale = 1.0/(double) 0x100000000LL;
for (int i = 0; i < numParticles; ++i)
forces[order[i]] = Vec3(scale*force[i], scale*force[i+paddedNumParticles], scale*force[i+paddedNumParticles*2]);
}
void CudaUpdateStateDataKernel::getEnergyParameterDerivatives(ContextImpl& context, map& derivs) {
const vector& paramDerivNames = cu.getEnergyParamDerivNames();
int numDerivs = paramDerivNames.size();
if (numDerivs == 0)
return;
derivs = cu.getEnergyParamDerivWorkspace();
CudaArray& derivArray = cu.getEnergyParamDerivBuffer();
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
vector derivBuffers;
derivArray.download(derivBuffers);
for (int i = numDerivs; i < derivArray.getSize(); i += numDerivs)
for (int j = 0; j < numDerivs; j++)
derivBuffers[j] += derivBuffers[i+j];
for (int i = 0; i < numDerivs; i++)
derivs[paramDerivNames[i]] += derivBuffers[i];
}
else {
vector derivBuffers;
derivArray.download(derivBuffers);
for (int i = numDerivs; i < derivArray.getSize(); i += numDerivs)
for (int j = 0; j < numDerivs; j++)
derivBuffers[j] += derivBuffers[i+j];
for (int i = 0; i < numDerivs; i++)
derivs[paramDerivNames[i]] += derivBuffers[i];
}
}
void CudaUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const {
cu.getPeriodicBoxVectors(a, b, c);
}
void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) {
vector& contexts = cu.getPlatformData().contexts;
// If any particles have been wrapped to the first periodic box, we need to unwrap them
// to avoid changing their positions.
vector positions;
for (auto& offset : cu.getPosCellOffsets()) {
if (offset.x != 0 || offset.y != 0 || offset.z != 0) {
getPositions(context, positions);
break;
}
}
// Update the vectors.
for (auto ctx : contexts)
ctx->setPeriodicBoxVectors(a, b, c);
if (positions.size() > 0)
setPositions(context, positions);
}
void CudaUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
cu.setAsCurrent();
int version = 2;
stream.write((char*) &version, sizeof(int));
int precision = (cu.getUseDoublePrecision() ? 2 : cu.getUseMixedPrecision() ? 1 : 0);
stream.write((char*) &precision, sizeof(int));
double time = cu.getTime();
stream.write((char*) &time, sizeof(double));
int stepCount = cu.getStepCount();
stream.write((char*) &stepCount, sizeof(int));
int stepsSinceReorder = cu.getStepsSinceReorder();
stream.write((char*) &stepsSinceReorder, sizeof(int));
char* buffer = (char*) cu.getPinnedBuffer();
cu.getPosq().download(buffer);
stream.write(buffer, cu.getPosq().getSize()*cu.getPosq().getElementSize());
if (cu.getUseMixedPrecision()) {
cu.getPosqCorrection().download(buffer);
stream.write(buffer, cu.getPosqCorrection().getSize()*cu.getPosqCorrection().getElementSize());
}
cu.getVelm().download(buffer);
stream.write(buffer, cu.getVelm().getSize()*cu.getVelm().getElementSize());
stream.write((char*) &cu.getAtomIndex()[0], sizeof(int)*cu.getAtomIndex().size());
stream.write((char*) &cu.getPosCellOffsets()[0], sizeof(int4)*cu.getPosCellOffsets().size());
Vec3 boxVectors[3];
cu.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
stream.write((char*) boxVectors, 3*sizeof(Vec3));
cu.getIntegrationUtilities().createCheckpoint(stream);
SimTKOpenMMUtilities::createCheckpoint(stream);
}
void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
cu.setAsCurrent();
int version;
stream.read((char*) &version, sizeof(int));
if (version != 2)
throw OpenMMException("Checkpoint was created with a different version of OpenMM");
int precision;
stream.read((char*) &precision, sizeof(int));
int expectedPrecision = (cu.getUseDoublePrecision() ? 2 : cu.getUseMixedPrecision() ? 1 : 0);
if (precision != expectedPrecision)
throw OpenMMException("Checkpoint was created with a different numeric precision");
double time;
stream.read((char*) &time, sizeof(double));
int stepCount, stepsSinceReorder;
stream.read((char*) &stepCount, sizeof(int));
stream.read((char*) &stepsSinceReorder, sizeof(int));
vector& contexts = cu.getPlatformData().contexts;
for (auto ctx : contexts) {
ctx->setTime(time);
ctx->setStepCount(stepCount);
ctx->setStepsSinceReorder(stepsSinceReorder);
}
char* buffer = (char*) cu.getPinnedBuffer();
stream.read(buffer, cu.getPosq().getSize()*cu.getPosq().getElementSize());
cu.getPosq().upload(buffer);
if (cu.getUseMixedPrecision()) {
stream.read(buffer, cu.getPosqCorrection().getSize()*cu.getPosqCorrection().getElementSize());
cu.getPosqCorrection().upload(buffer);
}
stream.read(buffer, cu.getVelm().getSize()*cu.getVelm().getElementSize());
cu.getVelm().upload(buffer);
stream.read((char*) &cu.getAtomIndex()[0], sizeof(int)*cu.getAtomIndex().size());
cu.getAtomIndexArray().upload(cu.getAtomIndex());
stream.read((char*) &cu.getPosCellOffsets()[0], sizeof(int4)*cu.getPosCellOffsets().size());
Vec3 boxVectors[3];
stream.read((char*) &boxVectors, 3*sizeof(Vec3));
for (auto ctx : contexts)
ctx->setPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
cu.getIntegrationUtilities().loadCheckpoint(stream);
SimTKOpenMMUtilities::loadCheckpoint(stream);
for (auto listener : cu.getReorderListeners())
listener->execute();
}
void CudaApplyConstraintsKernel::initialize(const System& system) {
}
void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
cu.setAsCurrent();
if (!hasInitializedKernel) {
hasInitializedKernel = true;
map defines;
CUmodule module = cu.createModule(CudaKernelSources::constraints, defines);
applyDeltasKernel = cu.getKernel(module, "applyPositionDeltas");
}
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
cu.clearBuffer(integration.getPosDelta());
integration.applyConstraints(tol);
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
int numAtoms = cu.getNumAtoms();
void* args[] = {&numAtoms, &cu.getPosq().getDevicePointer(), &posCorrection, &cu.getIntegrationUtilities().getPosDelta().getDevicePointer()};
cu.executeKernel(applyDeltasKernel, args, cu.getNumAtoms());
integration.computeVirtualSites();
}
void CudaApplyConstraintsKernel::applyToVelocities(ContextImpl& context, double tol) {
cu.getIntegrationUtilities().applyVelocityConstraints(tol);
}
void CudaVirtualSitesKernel::initialize(const System& system) {
}
void CudaVirtualSitesKernel::computePositions(ContextImpl& context) {
cu.getIntegrationUtilities().computeVirtualSites();
}
class CudaCalcHarmonicBondForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const HarmonicBondForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
double length, k;
force.getBondParameters(index, particle1, particle2, length, k);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
double length1, length2, k1, k2;
force.getBondParameters(group1, particle1, particle2, length1, k1);
force.getBondParameters(group2, particle1, particle2, length2, k2);
return (length1 == length2 && k1 == k2);
}
private:
const HarmonicBondForce& force;
};
void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
numBonds = endIndex-startIndex;
if (numBonds == 0)
return;
vector > atoms(numBonds, vector(2));
params.initialize(cu, numBonds, "bondParams");
vector paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
double length, k;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
paramVector[i] = make_float2((float) length, (float) k);
}
params.upload(paramVector);
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicBondForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params.getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
if (numBonds != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of bonds has changed");
if (numBonds == 0)
return;
// Record the per-bond parameters.
vector paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
int atom1, atom2;
double length, k;
force.getBondParameters(startIndex+i, atom1, atom2, length, k);
paramVector[i] = make_float2((float) length, (float) k);
}
params.upload(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCalcCustomBondForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const CustomBondForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
vector parameters;
force.getBondParameters(index, particle1, particle2, parameters);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
vector parameters1, parameters2;
force.getBondParameters(group1, particle1, particle2, parameters1);
force.getBondParameters(group2, particle1, particle2, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomBondForce& force;
};
CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
numBonds = endIndex-startIndex;
if (numBonds == 0)
return;
vector > atoms(numBonds, vector(2));
params = new CudaParameterSet(cu, force.getNumPerBondParameters(), numBonds, "customBondParams");
vector > paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
vector parameters;
force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
info = new ForceInfo(force);
cu.addForce(info);
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdR = "] = forceExpression;
// Create the kernels.
map variables;
variables["r"] = "r";
for (int i = 0; i < force.getNumPerBondParameters(); i++) {
const string& name = force.getPerBondParameterName(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals.initialize(cu, force.getNumGlobalParameters(), "customBondGlobals");
globals.upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
variables[name] = value;
}
}
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string paramName = force.getEnergyParameterDerivativeName(i);
string derivVariable = cu.getBondedUtilities().addEnergyParameterDerivative(paramName);
Lepton::ParsedExpression derivExpression = energyExpression.differentiate(paramName).optimize();
expressions[derivVariable+" += "] = derivExpression;
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string argName = cu.getBondedUtilities().addArgument(buffer.getMemory(), buffer.getType());
compute< functions;
vector > functionNames;
compute << cu.getExpressionUtilities().createExpressions(expressions, variables, functions, functionNames, "temp");
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = compute.str();
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup());
}
double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals.upload(globalParamValues);
}
return 0.0;
}
void CudaCalcCustomBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomBondForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumBonds()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumBonds()/numContexts;
if (numBonds != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of bonds has changed");
if (numBonds == 0)
return;
// Record the per-bond parameters.
vector > paramVector(numBonds);
vector parameters;
for (int i = 0; i < numBonds; i++) {
int atom1, atom2;
force.getBondParameters(startIndex+i, atom1, atom2, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCalcHarmonicAngleForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const HarmonicAngleForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3;
double angle, k;
force.getAngleParameters(index, particle1, particle2, particle3, angle, k);
particles.resize(3);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3;
double angle1, angle2, k1, k2;
force.getAngleParameters(group1, particle1, particle2, particle3, angle1, k1);
force.getAngleParameters(group2, particle1, particle2, particle3, angle2, k2);
return (angle1 == angle2 && k1 == k2);
}
private:
const HarmonicAngleForce& force;
};
void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0)
return;
vector > atoms(numAngles, vector(3));
params.initialize(cu, numAngles, "angleParams");
vector paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
double angle, k;
force.getAngleParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], angle, k);
paramVector[i] = make_float2((float) angle, (float) k);
}
params.upload(paramVector);
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicAngleForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params.getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
if (numAngles != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of angles has changed");
if (numAngles == 0)
return;
// Record the per-angle parameters.
vector paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
int atom1, atom2, atom3;
double angle, k;
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
paramVector[i] = make_float2((float) angle, (float) k);
}
params.upload(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCalcCustomAngleForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const CustomAngleForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3;
vector parameters;
force.getAngleParameters(index, particle1, particle2, particle3, parameters);
particles.resize(3);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3;
vector parameters1, parameters2;
force.getAngleParameters(group1, particle1, particle2, particle3, parameters1);
force.getAngleParameters(group2, particle1, particle2, particle3, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomAngleForce& force;
};
CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
}
void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
numAngles = endIndex-startIndex;
if (numAngles == 0)
return;
vector > atoms(numAngles, vector(3));
params = new CudaParameterSet(cu, force.getNumPerAngleParameters(), numAngles, "customAngleParams");
vector > paramVector(numAngles);
for (int i = 0; i < numAngles; i++) {
vector parameters;
force.getAngleParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
info = new ForceInfo(force);
cu.addForce(info);
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("theta").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdAngle = "] = forceExpression;
// Create the kernels.
map variables;
variables["theta"] = "theta";
for (int i = 0; i < force.getNumPerAngleParameters(); i++) {
const string& name = force.getPerAngleParameterName(i);
variables[name] = "angleParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals.initialize(cu, force.getNumGlobalParameters(), "customAngleGlobals");
globals.upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
variables[name] = value;
}
}
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string paramName = force.getEnergyParameterDerivativeName(i);
string derivVariable = cu.getBondedUtilities().addEnergyParameterDerivative(paramName);
Lepton::ParsedExpression derivExpression = energyExpression.differentiate(paramName).optimize();
expressions[derivVariable+" += "] = derivExpression;
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string argName = cu.getBondedUtilities().addArgument(buffer.getMemory(), buffer.getType());
compute< functions;
vector > functionNames;
compute << cu.getExpressionUtilities().createExpressions(expressions, variables, functions, functionNames, "temp");
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = compute.str();
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup());
}
double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals.upload(globalParamValues);
}
return 0.0;
}
void CudaCalcCustomAngleForceKernel::copyParametersToContext(ContextImpl& context, const CustomAngleForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumAngles()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumAngles()/numContexts;
if (numAngles != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of angles has changed");
if (numAngles == 0)
return;
// Record the per-angle parameters.
vector > paramVector(numAngles);
vector parameters;
for (int i = 0; i < numAngles; i++) {
int atom1, atom2, atom3;
force.getAngleParameters(startIndex+i, atom1, atom2, atom3, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCalcPeriodicTorsionForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const PeriodicTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3, particle4, periodicity;
double phase, k;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, periodicity, phase, k);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4, periodicity1, periodicity2;
double phase1, phase2, k1, k2;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, periodicity1, phase1, k1);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, periodicity2, phase2, k2);
return (periodicity1 == periodicity2 && phase1 == phase2 && k1 == k2);
}
private:
const PeriodicTorsionForce& force;
};
void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
vector > atoms(numTorsions, vector(4));
params.initialize(cu, numTorsions, "periodicTorsionParams");
vector paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int periodicity;
double phase, k;
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
}
params.upload(paramVector);
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::periodicTorsionForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
if (numTorsions != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of torsions has changed");
if (numTorsions == 0)
return;
// Record the per-torsion parameters.
vector paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int atom1, atom2, atom3, atom4, periodicity;
double phase, k;
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
}
params.upload(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCalcRBTorsionForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const RBTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3, particle4;
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4;
double c0a, c0b, c1a, c1b, c2a, c2b, c3a, c3b, c4a, c4b, c5a, c5b;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, c0a, c1a, c2a, c3a, c4a, c5a);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, c0b, c1b, c2b, c3b, c4b, c5b);
return (c0a == c0b && c1a == c1b && c2a == c2b && c3a == c3b && c4a == c4b && c5a == c5b);
}
private:
const RBTorsionForce& force;
};
void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
vector > atoms(numTorsions, vector(4));
params1.initialize(cu, numTorsions, "rbTorsionParams1");
params2.initialize(cu, numTorsions, "rbTorsionParams2");
vector paramVector1(numTorsions);
vector paramVector2(numTorsions);
for (int i = 0; i < numTorsions; i++) {
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], c0, c1, c2, c3, c4, c5);
paramVector1[i] = make_float4((float) c0, (float) c1, (float) c2, (float) c3);
paramVector2[i] = make_float2((float) c4, (float) c5);
}
params1.upload(paramVector1);
params2.upload(paramVector2);
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = CudaKernelSources::rbTorsionForce;
replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1.getDevicePointer(), "float4");
replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2.getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context, const RBTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
if (numTorsions != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of torsions has changed");
if (numTorsions == 0)
return;
// Record the per-torsion parameters.
vector paramVector1(numTorsions);
vector paramVector2(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int atom1, atom2, atom3, atom4;
double c0, c1, c2, c3, c4, c5;
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, c0, c1, c2, c3, c4, c5);
paramVector1[i] = make_float4((float) c0, (float) c1, (float) c2, (float) c3);
paramVector2[i] = make_float2((float) c4, (float) c5);
}
params1.upload(paramVector1);
params2.upload(paramVector2);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCalcCMAPTorsionForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const CMAPTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int map, a1, a2, a3, a4, b1, b2, b3, b4;
force.getTorsionParameters(index, map, a1, a2, a3, a4, b1, b2, b3, b4);
particles.resize(8);
particles[0] = a1;
particles[1] = a2;
particles[2] = a3;
particles[3] = a4;
particles[4] = b1;
particles[5] = b2;
particles[6] = b3;
particles[7] = b4;
}
bool areGroupsIdentical(int group1, int group2) {
int map1, map2, a1, a2, a3, a4, b1, b2, b3, b4;
force.getTorsionParameters(group1, map1, a1, a2, a3, a4, b1, b2, b3, b4);
force.getTorsionParameters(group2, map2, a1, a2, a3, a4, b1, b2, b3, b4);
return (map1 == map2);
}
private:
const CMAPTorsionForce& force;
};
void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
int numMaps = force.getNumMaps();
vector coeffVec;
mapPositionsVec.resize(numMaps);
vector energy;
vector > c;
int currentPosition = 0;
for (int i = 0; i < numMaps; i++) {
int size;
force.getMapParameters(i, size, energy);
CMAPTorsionForceImpl::calcMapDerivatives(size, energy, c);
mapPositionsVec[i] = make_int2(currentPosition, size);
currentPosition += 4*size*size;
for (int j = 0; j < size*size; j++) {
coeffVec.push_back(make_float4((float) c[j][0], (float) c[j][1], (float) c[j][2], (float) c[j][3]));
coeffVec.push_back(make_float4((float) c[j][4], (float) c[j][5], (float) c[j][6], (float) c[j][7]));
coeffVec.push_back(make_float4((float) c[j][8], (float) c[j][9], (float) c[j][10], (float) c[j][11]));
coeffVec.push_back(make_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
}
}
vector > atoms(numTorsions, vector(8));
vector torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++)
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
coefficients.initialize(cu, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions.initialize(cu, numMaps, "cmapTorsionMapPositions");
torsionMaps.initialize(cu, numTorsions, "cmapTorsionMaps");
coefficients.upload(coeffVec);
mapPositions.upload(mapPositionsVec);
torsionMaps.upload(torsionMapsVec);
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COEFF"] = cu.getBondedUtilities().addArgument(coefficients.getDevicePointer(), "float4");
replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions.getDevicePointer(), "int2");
replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps.getDevicePointer(), "int");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
return 0.0;
}
void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force) {
int numMaps = force.getNumMaps();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (mapPositions.getSize() != numMaps)
throw OpenMMException("updateParametersInContext: The number of maps has changed");
if (torsionMaps.getSize() != numTorsions)
throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed");
// Update the maps.
vector coeffVec;
vector energy;
vector > c;
int currentPosition = 0;
for (int i = 0; i < numMaps; i++) {
int size;
force.getMapParameters(i, size, energy);
if (size != mapPositionsVec[i].y)
throw OpenMMException("updateParametersInContext: The size of a map has changed");
CMAPTorsionForceImpl::calcMapDerivatives(size, energy, c);
currentPosition += 4*size*size;
for (int j = 0; j < size*size; j++) {
coeffVec.push_back(make_float4((float) c[j][0], (float) c[j][1], (float) c[j][2], (float) c[j][3]));
coeffVec.push_back(make_float4((float) c[j][4], (float) c[j][5], (float) c[j][6], (float) c[j][7]));
coeffVec.push_back(make_float4((float) c[j][8], (float) c[j][9], (float) c[j][10], (float) c[j][11]));
coeffVec.push_back(make_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
}
}
coefficients.upload(coeffVec);
// Update the indices.
vector torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++) {
int index[8];
force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]);
}
torsionMaps.upload(torsionMapsVec);
}
class CudaCalcCustomTorsionForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const CustomTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2, particle3, particle4;
vector parameters;
force.getTorsionParameters(index, particle1, particle2, particle3, particle4, parameters);
particles.resize(4);
particles[0] = particle1;
particles[1] = particle2;
particles[2] = particle3;
particles[3] = particle4;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2, particle3, particle4;
vector parameters1, parameters2;
force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, parameters1);
force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
return true;
}
private:
const CustomTorsionForce& force;
};
CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
if (params != NULL)
delete params;
}
void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
numTorsions = endIndex-startIndex;
if (numTorsions == 0)
return;
vector > atoms(numTorsions, vector(4));
params = new CudaParameterSet(cu, force.getNumPerTorsionParameters(), numTorsions, "customTorsionParams");
vector > paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) {
vector parameters;
force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
info = new ForceInfo(force);
cu.addForce(info);
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("theta").optimize();
map expressions;
expressions["energy += "] = energyExpression;
expressions["float dEdAngle = "] = forceExpression;
// Create the kernels.
map variables;
variables["theta"] = "theta";
for (int i = 0; i < force.getNumPerTorsionParameters(); i++) {
const string& name = force.getPerTorsionParameterName(i);
variables[name] = "torsionParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals.initialize(cu, force.getNumGlobalParameters(), "customTorsionGlobals");
globals.upload(globalParamValues);
string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = argName+"["+cu.intToString(i)+"]";
variables[name] = value;
}
}
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string paramName = force.getEnergyParameterDerivativeName(i);
string derivVariable = cu.getBondedUtilities().addEnergyParameterDerivative(paramName);
Lepton::ParsedExpression derivExpression = energyExpression.differentiate(paramName).optimize();
expressions[derivVariable+" += "] = derivExpression;
}
stringstream compute;
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string argName = cu.getBondedUtilities().addArgument(buffer.getMemory(), buffer.getType());
compute< functions;
vector > functionNames;
compute << cu.getExpressionUtilities().createExpressions(expressions, variables, functions, functionNames, "temp");
map replacements;
replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
replacements["COMPUTE_FORCE"] = compute.str();
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
}
double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals.upload(globalParamValues);
}
return 0.0;
}
void CudaCalcCustomTorsionForceKernel::copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force) {
cu.setAsCurrent();
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
if (numTorsions != endIndex-startIndex)
throw OpenMMException("updateParametersInContext: The number of torsions has changed");
if (numTorsions == 0)
return;
// Record the per-torsion parameters.
vector > paramVector(numTorsions);
vector parameters;
for (int i = 0; i < numTorsions; i++) {
int atom1, atom2, atom3, atom4;
force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCalcNonbondedForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const NonbondedForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, sigma1, sigma2, epsilon1, epsilon2;
force.getParticleParameters(particle1, charge1, sigma1, epsilon1);
force.getParticleParameters(particle2, charge2, sigma2, epsilon2);
return (charge1 == charge2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
int getNumParticleGroups() {
return force.getNumExceptions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(index, particle1, particle2, chargeProd, sigma, epsilon);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
int particle1, particle2;
double chargeProd1, chargeProd2, sigma1, sigma2, epsilon1, epsilon2;
force.getExceptionParameters(group1, particle1, particle2, chargeProd1, sigma1, epsilon1);
force.getExceptionParameters(group2, particle1, particle2, chargeProd2, sigma2, epsilon2);
return (chargeProd1 == chargeProd2 && sigma1 == sigma2 && epsilon1 == epsilon2);
}
private:
const NonbondedForce& force;
};
class CudaCalcNonbondedForceKernel::PmeIO : public CalcPmeReciprocalForceKernel::IO {
public:
PmeIO(CudaContext& cu, CUfunction addForcesKernel) : cu(cu), addForcesKernel(addForcesKernel) {
forceTemp.initialize(cu, cu.getNumAtoms(), "PmeForce");
}
float* getPosq() {
cu.setAsCurrent();
cu.getPosq().download(posq);
return (float*) &posq[0];
}
void setForce(float* force) {
forceTemp.upload(force);
void* args[] = {&forceTemp.getDevicePointer(), &cu.getForce().getDevicePointer()};
cu.executeKernel(addForcesKernel, args, cu.getNumAtoms());
}
private:
CudaContext& cu;
vector posq;
CudaArray forceTemp;
CUfunction addForcesKernel;
};
class CudaCalcNonbondedForceKernel::PmePreComputation : public CudaContext::ForcePreComputation {
public:
PmePreComputation(CudaContext& cu, Kernel& pme, CalcPmeReciprocalForceKernel::IO& io) : cu(cu), pme(pme), io(io) {
}
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
Vec3 boxVectors[3] = {Vec3(cu.getPeriodicBoxSize().x, 0, 0), Vec3(0, cu.getPeriodicBoxSize().y, 0), Vec3(0, 0, cu.getPeriodicBoxSize().z)};
pme.getAs().beginComputation(io, boxVectors, includeEnergy);
}
private:
CudaContext& cu;
Kernel pme;
CalcPmeReciprocalForceKernel::IO& io;
};
class CudaCalcNonbondedForceKernel::PmePostComputation : public CudaContext::ForcePostComputation {
public:
PmePostComputation(Kernel& pme, CalcPmeReciprocalForceKernel::IO& io) : pme(pme), io(io) {
}
double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
return pme.getAs().finishComputation(io);
}
private:
Kernel pme;
CalcPmeReciprocalForceKernel::IO& io;
};
class CudaCalcNonbondedForceKernel::SyncStreamPreComputation : public CudaContext::ForcePreComputation {
public:
SyncStreamPreComputation(CudaContext& cu, CUstream stream, CUevent event, int forceGroup) : cu(cu), stream(stream), event(event), forceGroup(forceGroup) {
}
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
if ((groups&(1< exceptionsWithOffsets;
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
exceptionsWithOffsets.insert(exception);
}
vector > exclusions;
vector exceptions;
map exceptionIndex;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
exclusions.push_back(pair(particle1, particle2));
if (chargeProd != 0.0 || epsilon != 0.0 || exceptionsWithOffsets.find(i) != exceptionsWithOffsets.end()) {
exceptionIndex[i] = exceptions.size();
exceptions.push_back(i);
}
}
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
vector baseParticleParamVec(cu.getPaddedNumAtoms(), make_float4(0, 0, 0, 0));
vector > exclusionList(numParticles);
hasCoulomb = false;
hasLJ = false;
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
baseParticleParamVec[i] = make_float4(charge, sigma, epsilon, 0);
exclusionList[i].push_back(i);
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
hasLJ = true;
}
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string param;
int particle;
double charge, sigma, epsilon;
force.getParticleParameterOffset(i, param, particle, charge, sigma, epsilon);
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
hasLJ = true;
}
for (auto exclusion : exclusions) {
exclusionList[exclusion.first].push_back(exclusion.second);
exclusionList[exclusion.second].push_back(exclusion.first);
}
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
doLJPME = (nonbondedMethod == LJPME && hasLJ);
usePosqCharges = hasCoulomb ? cu.requestPosqCharges() : false;
map defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
defines["USE_LJ_SWITCH"] = (useCutoff && force.getUseSwitchingFunction() ? "1" : "0");
if (useCutoff) {
// Compute the reaction field constants.
double reactionFieldK = pow(force.getCutoffDistance(), -3.0)*(force.getReactionFieldDielectric()-1.0)/(2.0*force.getReactionFieldDielectric()+1.0);
double reactionFieldC = (1.0 / force.getCutoffDistance())*(3.0*force.getReactionFieldDielectric())/(2.0*force.getReactionFieldDielectric()+1.0);
defines["REACTION_FIELD_K"] = cu.doubleToString(reactionFieldK);
defines["REACTION_FIELD_C"] = cu.doubleToString(reactionFieldC);
// Compute the switching coefficients.
if (force.getUseSwitchingFunction()) {
defines["LJ_SWITCH_CUTOFF"] = cu.doubleToString(force.getSwitchingDistance());
defines["LJ_SWITCH_C3"] = cu.doubleToString(10/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 3.0));
defines["LJ_SWITCH_C4"] = cu.doubleToString(15/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 4.0));
defines["LJ_SWITCH_C5"] = cu.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0));
}
}
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && !doLJPME)
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else
dispersionCoefficient = 0.0;
alpha = 0;
ewaldSelfEnergy = 0.0;
map paramsDefines;
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1";
if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1";
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
int kmaxx, kmaxy, kmaxz;
NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
if (cu.getContextIndex() == 0) {
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
// Create the reciprocal space kernels.
map replacements;
replacements["NUM_ATOMS"] = cu.intToString(numParticles);
replacements["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
replacements["KMAX_X"] = cu.intToString(kmaxx);
replacements["KMAX_Y"] = cu.intToString(kmaxy);
replacements["KMAX_Z"] = cu.intToString(kmaxz);
replacements["EXP_COEFFICIENT"] = cu.doubleToString(-1.0/(4.0*alpha*alpha));
replacements["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0);
replacements["M_PI"] = cu.doubleToString(M_PI);
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::ewald, replacements);
ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
cosSinSums.initialize(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if (((nonbondedMethod == PME || nonbondedMethod == LJPME) && hasCoulomb) || doLJPME) {
// Compute the PME parameters.
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
gridSizeZ = CudaFFT3D::findLegalDimension(gridSizeZ);
if (doLJPME) {
NonbondedForceImpl::calcPMEParameters(system, force, dispersionAlpha, dispersionGridSizeX,
dispersionGridSizeY, dispersionGridSizeZ, true);
dispersionGridSizeX = CudaFFT3D::findLegalDimension(dispersionGridSizeX);
dispersionGridSizeY = CudaFFT3D::findLegalDimension(dispersionGridSizeY);
dispersionGridSizeZ = CudaFFT3D::findLegalDimension(dispersionGridSizeZ);
}
defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
defines["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
if (cu.getContextIndex() == 0) {
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME) {
paramsDefines["INCLUDE_LJPME"] = "1";
paramsDefines["LJPME_SELF_ENERGY_SCALE"] = cu.doubleToString(pow(dispersionAlpha, 6)/3.0);
for (int i = 0; i < numParticles; i++)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
char deviceName[100];
cuDeviceGetName(deviceName, 100, cu.getDevice());
usePmeStream = (!cu.getPlatformData().disablePmeStream && string(deviceName) != "GeForce GTX 980"); // Using a separate stream is slower on GTX 980
map pmeDefines;
pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = cu.intToString(numParticles);
pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(alpha*alpha));
pmeDefines["GRID_SIZE_X"] = cu.intToString(gridSizeX);
pmeDefines["GRID_SIZE_Y"] = cu.intToString(gridSizeY);
pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(sqrt(ONE_4PI_EPS0));
pmeDefines["M_PI"] = cu.doubleToString(M_PI);
if (cu.getUseDoublePrecision())
pmeDefines["USE_DOUBLE_PRECISION"] = "1";
if (usePmeStream)
pmeDefines["USE_PME_STREAM"] = "1";
if (cu.getPlatformData().deterministicForces)
pmeDefines["USE_DETERMINISTIC_FORCES"] = "1";
map replacements;
replacements["CHARGE"] = (usePosqCharges ? "pos.w" : "charges[atom]");
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::pme, replacements), pmeDefines);
if (cu.getPlatformData().useCpuPme && !doLJPME && usePosqCharges) {
// Create the CPU PME kernel.
try {
cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cu.getPlatformData().context);
cpuPme.getAs().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha, cu.getPlatformData().deterministicForces);
CUfunction addForcesKernel = cu.getKernel(module, "addForces");
pmeio = new PmeIO(cu, addForcesKernel);
cu.addPreComputation(new PmePreComputation(cu, cpuPme, *pmeio));
cu.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
}
catch (OpenMMException& ex) {
// The CPU PME plugin isn't available.
}
}
if (pmeio == NULL) {
pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
pmeSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge");
pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
pmeInterpolateForceKernel = cu.getKernel(module, "gridInterpolateForce");
pmeEvalEnergyKernel = cu.getKernel(module, "gridEvaluateEnergy");
pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
cuFuncSetCacheConfig(pmeSpreadChargeKernel, CU_FUNC_CACHE_PREFER_L1);
cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
if (doLJPME) {
pmeDefines["EWALD_ALPHA"] = cu.doubleToString(dispersionAlpha);
pmeDefines["GRID_SIZE_X"] = cu.intToString(dispersionGridSizeX);
pmeDefines["GRID_SIZE_Y"] = cu.intToString(dispersionGridSizeY);
pmeDefines["GRID_SIZE_Z"] = cu.intToString(dispersionGridSizeZ);
pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha));
pmeDefines["USE_LJPME"] = "1";
pmeDefines["CHARGE_FROM_SIGEPS"] = "1";
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
double dar2 = dalphaR*dalphaR;
double dar4 = dar2*dar2;
double multShift6 = -invRCut6*(1.0 - exp(-dar2) * (1.0 + dar2 + 0.5*dar4));
defines["INVCUT6"] = cu.doubleToString(invRCut6);
defines["MULTSHIFT6"] = cu.doubleToString(multShift6);
module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
pmeDispersionFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
pmeDispersionGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
pmeDispersionSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge");
pmeDispersionConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
pmeEvalDispersionEnergyKernel = cu.getKernel(module, "gridEvaluateEnergy");
pmeInterpolateDispersionForceKernel = cu.getKernel(module, "gridInterpolateForce");
cuFuncSetCacheConfig(pmeDispersionSpreadChargeKernel, CU_FUNC_CACHE_PREFER_L1);
}
// Create required data structures.
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
int roundedZSize = PmeOrder*(int) ceil(gridSizeZ/(double) PmeOrder);
int gridElements = gridSizeX*gridSizeY*roundedZSize;
if (doLJPME) {
roundedZSize = PmeOrder*(int) ceil(dispersionGridSizeZ/(double) PmeOrder);
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*roundedZSize);
}
pmeGrid1.initialize(cu, gridElements, 2*elementSize, "pmeGrid1");
pmeGrid2.initialize(cu, gridElements, 2*elementSize, "pmeGrid2");
cu.addAutoclearBuffer(pmeGrid2);
pmeBsplineModuliX.initialize(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY.initialize(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ.initialize(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
if (doLJPME) {
pmeDispersionBsplineModuliX.initialize(cu, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY.initialize(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ.initialize(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
}
pmeAtomGridIndex.initialize(cu, numParticles, "pmeAtomGridIndex");
int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer.initialize(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cu.clearBuffer(pmeEnergyBuffer);
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
int cufftVersion;
cufftGetVersion(&cufftVersion);
useCudaFFT = (cufftVersion >= 7050); // There was a critical bug in version 7.0
if (useCudaFFT) {
cufftResult result = cufftPlan3d(&fftForward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C);
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
result = cufftPlan3d(&fftBackward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2D : CUFFT_C2R);
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
if (doLJPME) {
result = cufftPlan3d(&dispersionFftForward, dispersionGridSizeX, dispersionGridSizeY,
dispersionGridSizeZ, cu.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C);
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing disperison FFT: "+cu.intToString(result));
result = cufftPlan3d(&dispersionFftBackward, dispersionGridSizeX, dispersionGridSizeY,
dispersionGridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2D : CUFFT_C2R);
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing disperison FFT: "+cu.intToString(result));
}
}
else {
fft = new CudaFFT3D(cu, gridSizeX, gridSizeY, gridSizeZ, true);
if (doLJPME)
dispersionFft = new CudaFFT3D(cu, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, true);
}
// Prepare for doing PME on its own stream.
if (usePmeStream) {
cuStreamCreate(&pmeStream, CU_STREAM_NON_BLOCKING);
if (useCudaFFT) {
cufftSetStream(fftForward, pmeStream);
cufftSetStream(fftBackward, pmeStream);
if (doLJPME) {
cufftSetStream(dispersionFftForward, pmeStream);
cufftSetStream(dispersionFftBackward, pmeStream);
}
}
CHECK_RESULT(cuEventCreate(&pmeSyncEvent, CU_EVENT_DISABLE_TIMING), "Error creating event for NonbondedForce");
CHECK_RESULT(cuEventCreate(¶msSyncEvent, CU_EVENT_DISABLE_TIMING), "Error creating event for NonbondedForce");
int recipForceGroup = force.getReciprocalSpaceForceGroup();
if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup();
cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), pmeEnergyBuffer, recipForceGroup));
}
hasInitializedFFT = true;
// Initialize the b-spline moduli.
for (int grid = 0; grid < 2; grid++) {
int xsize, ysize, zsize;
CudaArray *xmoduli, *ymoduli, *zmoduli;
if (grid == 0) {
xsize = gridSizeX;
ysize = gridSizeY;
zsize = gridSizeZ;
xmoduli = &pmeBsplineModuliX;
ymoduli = &pmeBsplineModuliY;
zmoduli = &pmeBsplineModuliZ;
}
else {
if (!doLJPME)
continue;
xsize = dispersionGridSizeX;
ysize = dispersionGridSizeY;
zsize = dispersionGridSizeZ;
xmoduli = &pmeDispersionBsplineModuliX;
ymoduli = &pmeDispersionBsplineModuliY;
zmoduli = &pmeDispersionBsplineModuliZ;
}
int maxSize = max(max(xsize, ysize), zsize);
vector data(PmeOrder);
vector ddata(PmeOrder);
vector bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
}
// Differentiate.
ddata[0] = -data[0];
for (int i = 1; i < PmeOrder; i++)
ddata[i] = data[i-1]-data[i];
double div = 1.0/(PmeOrder-1);
data[PmeOrder-1] = 0.0;
for (int i = 1; i < (PmeOrder-1); i++)
data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
data[0] = div*data[0];
for (int i = 0; i < maxSize; i++)
bsplines_data[i] = 0.0;
for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = data[i-1];
// Evaluate the actual bspline moduli for X/Y/Z.
for (int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector moduli(ndata);
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
for (int j = 0; j < ndata; j++) {
double arg = (2.0*M_PI*i*j)/ndata;
sc += bsplines_data[j]*cos(arg);
ss += bsplines_data[j]*sin(arg);
}
moduli[i] = sc*sc+ss*ss;
}
for (int i = 0; i < ndata; i++)
if (moduli[i] < 1.0e-7)
moduli[i] = (moduli[i-1]+moduli[i+1])*0.5;
if (dim == 0)
xmoduli->upload(moduli, true);
else if (dim == 1)
ymoduli->upload(moduli, true);
else
zmoduli->upload(moduli, true);
}
}
}
}
}
// Add code to subtract off the reciprocal part of excluded interactions.
if ((nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) && pmeio == NULL) {
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*force.getNumExceptions()/numContexts;
int endIndex = (cu.getContextIndex()+1)*force.getNumExceptions()/numContexts;
int numExclusions = endIndex-startIndex;
if (numExclusions > 0) {
paramsDefines["HAS_EXCLUSIONS"] = "1";
vector > atoms(numExclusions, vector(2));
exclusionAtoms.initialize(cu, numExclusions, "exclusionAtoms");
exclusionParams.initialize(cu, numExclusions, "exclusionParams");
vector exclusionAtomsVec(numExclusions);
for (int i = 0; i < numExclusions; i++) {
int j = i+startIndex;
exclusionAtomsVec[i] = make_int2(exclusions[j].first, exclusions[j].second);
atoms[i][0] = exclusions[j].first;
atoms[i][1] = exclusions[j].second;
}
exclusionAtoms.upload(exclusionAtomsVec);
map replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exclusionParams.getDevicePointer(), "float4");
replacements["EWALD_ALPHA"] = cu.doubleToString(alpha);
replacements["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
replacements["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::pmeExclusions, replacements), force.getForceGroup());
}
}
// Add the interaction to the default nonbonded kernel.
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
charges.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "charges");
baseParticleParams.initialize(cu, cu.getPaddedNumAtoms(), "baseParticleParams");
baseParticleParams.upload(baseParticleParamVec);
map replacements;
if (usePosqCharges) {
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
}
else {
replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2";
}
if (hasCoulomb)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer()));
sigmaEpsilon.initialize(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
if (hasLJ) {
replacements["SIGMA_EPSILON1"] = prefix+"sigmaEpsilon1";
replacements["SIGMA_EPSILON2"] = prefix+"sigmaEpsilon2";
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon.getDevicePointer()));
}
source = cu.replaceStrings(source, replacements);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
// Initialize the exceptions.
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
if (numExceptions > 0) {
paramsDefines["HAS_EXCEPTIONS"] = "1";
exceptionAtoms.resize(numExceptions);
vector > atoms(numExceptions, vector(2));
exceptionParams.initialize(cu, numExceptions, "exceptionParams");
baseExceptionParams.initialize(cu, numExceptions, "baseExceptionParams");
vector baseExceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
baseExceptionParamsVec[i] = make_float4(chargeProd, sigma, epsilon, 0);
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
baseExceptionParams.upload(baseExceptionParamsVec);
map replacements;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
// Initialize parameter offsets.
vector > particleOffsetVec(force.getNumParticles());
vector > exceptionOffsetVec(force.getNumExceptions());
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string param;
int particle;
double charge, sigma, epsilon;
force.getParticleParameterOffset(i, param, particle, charge, sigma, epsilon);
auto paramPos = find(paramNames.begin(), paramNames.end(), param);
int paramIndex;
if (paramPos == paramNames.end()) {
paramIndex = paramNames.size();
paramNames.push_back(param);
}
else
paramIndex = paramPos-paramNames.begin();
particleOffsetVec[particle].push_back(make_float4(charge, sigma, epsilon, paramIndex));
}
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
auto paramPos = find(paramNames.begin(), paramNames.end(), param);
int paramIndex;
if (paramPos == paramNames.end()) {
paramIndex = paramNames.size();
paramNames.push_back(param);
}
else
paramIndex = paramPos-paramNames.begin();
exceptionOffsetVec[exceptionIndex[exception]].push_back(make_float4(charge, sigma, epsilon, paramIndex));
}
paramValues.resize(paramNames.size(), 0.0);
particleParamOffsets.initialize(cu, max(force.getNumParticleParameterOffsets(), 1), "particleParamOffsets");
exceptionParamOffsets.initialize(cu, max(force.getNumExceptionParameterOffsets(), 1), "exceptionParamOffsets");
particleOffsetIndices.initialize(cu, cu.getPaddedNumAtoms()+1, "particleOffsetIndices");
exceptionOffsetIndices.initialize(cu, force.getNumExceptions()+1, "exceptionOffsetIndices");
vector particleOffsetIndicesVec, exceptionOffsetIndicesVec;
vector p, e;
for (int i = 0; i < particleOffsetVec.size(); i++) {
particleOffsetIndicesVec.push_back(p.size());
for (int j = 0; j < particleOffsetVec[i].size(); j++)
p.push_back(particleOffsetVec[i][j]);
}
while (particleOffsetIndicesVec.size() < particleOffsetIndices.getSize())
particleOffsetIndicesVec.push_back(p.size());
for (int i = 0; i < exceptionOffsetVec.size(); i++) {
exceptionOffsetIndicesVec.push_back(e.size());
for (int j = 0; j < exceptionOffsetVec[i].size(); j++)
e.push_back(exceptionOffsetVec[i][j]);
}
exceptionOffsetIndicesVec.push_back(e.size());
if (force.getNumParticleParameterOffsets() > 0) {
particleParamOffsets.upload(p);
particleOffsetIndices.upload(particleOffsetIndicesVec);
}
if (force.getNumExceptionParameterOffsets() > 0) {
exceptionParamOffsets.upload(e);
exceptionOffsetIndices.upload(exceptionOffsetIndicesVec);
}
globalParams.initialize(cu, max((int) paramValues.size(), 1), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "globalParams");
recomputeParams = true;
// Initialize the kernel for updating parameters.
CUmodule module = cu.createModule(CudaKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = cu.getKernel(module, "computeParameters");
computeExclusionParamsKernel = cu.getKernel(module, "computeExclusionParameters");
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
// Update particle and exception parameters.
bool paramChanged = false;
for (int i = 0; i < paramNames.size(); i++) {
double value = context.getParameter(paramNames[i]);
if (value != paramValues[i]) {
paramValues[i] = value;;
paramChanged = true;
}
}
if (paramChanged) {
recomputeParams = true;
globalParams.upload(paramValues, true);
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (recomputeParams || hasOffsets) {
bool computeSelfEnergy = (includeEnergy && includeReciprocal);
int numAtoms = cu.getPaddedNumAtoms();
vector paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms,
&baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&particleParamOffsets.getDevicePointer(), &particleOffsetIndices.getDevicePointer()};
int numExceptions;
if (exceptionParams.isInitialized()) {
numExceptions = exceptionParams.getSize();
paramsArgs.push_back(&numExceptions);
paramsArgs.push_back(&baseExceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParams.getDevicePointer());
paramsArgs.push_back(&exceptionParamOffsets.getDevicePointer());
paramsArgs.push_back(&exceptionOffsetIndices.getDevicePointer());
}
cu.executeKernel(computeParamsKernel, ¶msArgs[0], cu.getPaddedNumAtoms());
if (exclusionParams.isInitialized()) {
int numExclusions = exclusionParams.getSize();
vector exclusionParamsArgs = {&cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&numExclusions, &exclusionAtoms.getDevicePointer(), &exclusionParams.getDevicePointer()};
cu.executeKernel(computeExclusionParamsKernel, &exclusionParamsArgs[0], numExclusions);
}
if (usePmeStream) {
cuEventRecord(paramsSyncEvent, cu.getCurrentStream());
cuStreamWaitEvent(pmeStream, paramsSyncEvent, 0);
}
if (hasOffsets)
energy = 0.0; // The Ewald self energy was computed in the kernel.
recomputeParams = false;
}
// Do reciprocal space calculations.
if (cosSinSums.isInitialized() && includeReciprocal) {
void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums.getSize());
void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms());
}
if (pmeGrid1.isInitialized() && includeReciprocal) {
if (usePmeStream)
cu.setCurrentStream(pmeStream);
// Invert the periodic box vectors.
Vec3 boxVectors[3];
cu.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
double determinant = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2];
double scale = 1.0/determinant;
double3 recipBoxVectors[3];
recipBoxVectors[0] = make_double3(boxVectors[1][1]*boxVectors[2][2]*scale, 0, 0);
recipBoxVectors[1] = make_double3(-boxVectors[1][0]*boxVectors[2][2]*scale, boxVectors[0][0]*boxVectors[2][2]*scale, 0);
recipBoxVectors[2] = make_double3((boxVectors[1][0]*boxVectors[2][1]-boxVectors[1][1]*boxVectors[2][0])*scale, -boxVectors[0][0]*boxVectors[2][1]*scale, boxVectors[0][0]*boxVectors[1][1]*scale);
float3 recipBoxVectorsFloat[3];
void* recipBoxVectorPointer[3];
if (cu.getUseDoublePrecision()) {
recipBoxVectorPointer[0] = &recipBoxVectors[0];
recipBoxVectorPointer[1] = &recipBoxVectors[1];
recipBoxVectorPointer[2] = &recipBoxVectors[2];
}
else {
recipBoxVectorsFloat[0] = make_float3((float) recipBoxVectors[0].x, 0, 0);
recipBoxVectorsFloat[1] = make_float3((float) recipBoxVectors[1].x, (float) recipBoxVectors[1].y, 0);
recipBoxVectorsFloat[2] = make_float3((float) recipBoxVectors[2].x, (float) recipBoxVectors[2].y, (float) recipBoxVectors[2].z);
recipBoxVectorPointer[0] = &recipBoxVectorsFloat[0];
recipBoxVectorPointer[1] = &recipBoxVectorsFloat[1];
recipBoxVectorPointer[2] = &recipBoxVectorsFloat[2];
}
// Execute the reciprocal space kernels.
if (hasCoulomb) {
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(pmeAtomGridIndex);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &pmeGrid2.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&charges.getDevicePointer()};
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
void* finishSpreadArgs[] = {&pmeGrid2.getDevicePointer(), &pmeGrid1.getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecD2Z(fftForward, (double*) pmeGrid1.getDevicePointer(), (double2*) pmeGrid2.getDevicePointer());
else
cufftExecR2C(fftForward, (float*) pmeGrid1.getDevicePointer(), (float2*) pmeGrid2.getDevicePointer());
}
else {
fft->execFFT(pmeGrid1, pmeGrid2, true);
}
if (includeEnergy) {
void* computeEnergyArgs[] = {&pmeGrid2.getDevicePointer(), usePmeStream ? &pmeEnergyBuffer.getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX.getDevicePointer(), &pmeBsplineModuliY.getDevicePointer(), &pmeBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalEnergyKernel, computeEnergyArgs, gridSizeX*gridSizeY*gridSizeZ);
}
void* convolutionArgs[] = {&pmeGrid2.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX.getDevicePointer(), &pmeBsplineModuliY.getDevicePointer(), &pmeBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeConvolutionKernel, convolutionArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecZ2D(fftBackward, (double2*) pmeGrid2.getDevicePointer(), (double*) pmeGrid1.getDevicePointer());
else
cufftExecC2R(fftBackward, (float2*) pmeGrid2.getDevicePointer(), (float*) pmeGrid1.getDevicePointer());
}
else {
fft->execFFT(pmeGrid2, pmeGrid1, false);
}
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &pmeGrid1.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&charges.getDevicePointer()};
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
}
if (doLJPME && hasLJ) {
if (!hasCoulomb) {
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(pmeAtomGridIndex);
cu.clearBuffer(pmeEnergyBuffer);
}
cu.clearBuffer(pmeGrid2);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &pmeGrid2.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&sigmaEpsilon.getDevicePointer()};
cu.executeKernel(pmeDispersionSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
void* finishSpreadArgs[] = {&pmeGrid2.getDevicePointer(), &pmeGrid1.getDevicePointer()};
cu.executeKernel(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecD2Z(dispersionFftForward, (double*) pmeGrid1.getDevicePointer(), (double2*) pmeGrid2.getDevicePointer());
else
cufftExecR2C(dispersionFftForward, (float*) pmeGrid1.getDevicePointer(), (float2*) pmeGrid2.getDevicePointer());
}
else {
dispersionFft->execFFT(pmeGrid1, pmeGrid2, true);
}
if (includeEnergy) {
void* computeEnergyArgs[] = {&pmeGrid2.getDevicePointer(), usePmeStream ? &pmeEnergyBuffer.getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX.getDevicePointer(), &pmeDispersionBsplineModuliY.getDevicePointer(), &pmeDispersionBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalDispersionEnergyKernel, computeEnergyArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
}
void* convolutionArgs[] = {&pmeGrid2.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX.getDevicePointer(), &pmeDispersionBsplineModuliY.getDevicePointer(), &pmeDispersionBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionConvolutionKernel, convolutionArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecZ2D(dispersionFftBackward, (double2*) pmeGrid2.getDevicePointer(), (double*) pmeGrid1.getDevicePointer());
else
cufftExecC2R(dispersionFftBackward, (float2*) pmeGrid2.getDevicePointer(), (float*) pmeGrid1.getDevicePointer());
}
else {
dispersionFft->execFFT(pmeGrid2, pmeGrid1, false);
}
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &pmeGrid1.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&sigmaEpsilon.getDevicePointer()};
cu.executeKernel(pmeInterpolateDispersionForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
}
if (usePmeStream) {
cuEventRecord(pmeSyncEvent, pmeStream);
cu.restoreDefaultStream();
}
}
if (dispersionCoefficient != 0.0 && includeDirect) {
double4 boxSize = cu.getPeriodicBoxSize();
energy += dispersionCoefficient/(boxSize.x*boxSize.y*boxSize.z);
}
return energy;
}
void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force) {
// Make sure the new parameters are acceptable.
cu.setAsCurrent();
if (force.getNumParticles() != cu.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
if (!hasCoulomb || !hasLJ) {
for (int i = 0; i < force.getNumParticles(); i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
if (!hasCoulomb && charge != 0.0)
throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Coulomb interactions, because all charges were originally 0");
if (!hasLJ && epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Lennard-Jones interactions, because all epsilons were originally 0");
}
}
vector exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
if (exceptionAtoms.size() > exceptions.size() && make_pair(particle1, particle2) == exceptionAtoms[exceptions.size()])
exceptions.push_back(i);
else if (chargeProd != 0.0 || epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
}
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
// Record the per-particle parameters.
vector baseParticleParamVec(cu.getPaddedNumAtoms(), make_float4(0, 0, 0, 0));
const vector& order = cu.getAtomIndex();
for (int i = 0; i < force.getNumParticles(); i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
baseParticleParamVec[i] = make_float4(charge, sigma, epsilon, 0);
}
baseParticleParams.upload(baseParticleParamVec);
// Record the exceptions.
if (numExceptions > 0) {
vector > atoms(numExceptions, vector(2));
vector baseExceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
baseExceptionParamsVec[i] = make_float4(chargeProd, sigma, epsilon, 0);
}
baseExceptionParams.upload(baseExceptionParamsVec);
}
// Compute other values.
ewaldSelfEnergy = 0.0;
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
if (cu.getContextIndex() == 0) {
for (int i = 0; i < force.getNumParticles(); i++) {
ewaldSelfEnergy -= baseParticleParamVec[i].x*baseParticleParamVec[i].x*ONE_4PI_EPS0*alpha/sqrt(M_PI);
if (doLJPME)
ewaldSelfEnergy += baseParticleParamVec[i].z*pow(baseParticleParamVec[i].y*dispersionAlpha, 6)/3.0;
}
}
}
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cu.invalidateMolecules();
recomputeParams = true;
}
void CudaCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (cu.getPlatformData().useCpuPme)
cpuPme.getAs().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = this->alpha;
nx = gridSizeX;
ny = gridSizeY;
nz = gridSizeZ;
}
}
void CudaCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (!doLJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (cu.getPlatformData().useCpuPme)
//cpuPme.getAs().getLJPMEParameters(alpha, nx, ny, nz);
throw OpenMMException("getPMEParametersInContext: CPUPME has not been implemented for LJPME yet.");
else {
alpha = this->dispersionAlpha;
nx = dispersionGridSizeX;
ny = dispersionGridSizeY;
nz = dispersionGridSizeZ;
}
}
class CudaCalcCustomNonbondedForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const CustomNonbondedForce& force) : force(force) {
if (force.getNumInteractionGroups() > 0) {
groupsForParticle.resize(force.getNumParticles());
for (int i = 0; i < force.getNumInteractionGroups(); i++) {
set set1, set2;
force.getInteractionGroupParameters(i, set1, set2);
for (int p : set1)
groupsForParticle[p].insert(2*i);
for (int p : set2)
groupsForParticle[p].insert(2*i+1);
}
}
}
bool areParticlesIdentical(int particle1, int particle2) {
vector params1;
vector params2;
force.getParticleParameters(particle1, params1);
force.getParticleParameters(particle2, params2);
for (int i = 0; i < (int) params1.size(); i++)
if (params1[i] != params2[i])
return false;
if (groupsForParticle.size() > 0 && groupsForParticle[particle1] != groupsForParticle[particle2])
return false;
return true;
}
int getNumParticleGroups() {
return force.getNumExclusions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
force.getExclusionParticles(index, particle1, particle2);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
return true;
}
private:
const CustomNonbondedForce& force;
vector > groupsForParticle;
};
CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (forceCopy != NULL)
delete forceCopy;
}
void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
cu.setAsCurrent();
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
string prefix = (force.getNumInteractionGroups() == 0 ? "custom"+cu.intToString(forceIndex)+"_" : "");
// Record parameters and exclusions.
int numParticles = force.getNumParticles();
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0)
globals.initialize(cu, force.getNumGlobalParameters(), "customNonbondedGlobals");
vector > paramVector(numParticles);
vector > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
vector parameters;
force.getParticleParameters(i, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
exclusionList[i].push_back(i);
}
for (int i = 0; i < force.getNumExclusions(); i++) {
int particle1, particle2;
force.getExclusionParticles(i, particle1, particle2);
exclusionList[particle1].push_back(particle2);
exclusionList[particle2].push_back(particle1);
}
params->setParameterValues(paramVector);
// Record the tabulated functions.
map functions;
vector > functionDefinitions;
vector functionList;
vector tableTypes;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string arrayName = prefix+"table"+cu.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions[i].initialize(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[i].upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[i].getDevicePointer()));
if (width == 1)
tableTypes.push_back("float");
else
tableTypes.push_back("float"+cu.intToString(width));
}
// Record information for the expressions.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
if (globals.isInitialized())
globals.upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
map forceExpressions;
forceExpressions["real customEnergy = "] = energyExpression;
forceExpressions["tempForce -= "] = forceExpression;
// Create the kernels.
vector > variables;
ExpressionTreeNode rnode(new Operation::Variable("r"));
variables.push_back(make_pair(rnode, "r"));
variables.push_back(make_pair(ExpressionTreeNode(new Operation::Square(), rnode), "r2"));
variables.push_back(make_pair(ExpressionTreeNode(new Operation::Reciprocal(), rnode), "invR"));
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables.push_back(makeVariable(name+"1", prefix+"params"+params->getParameterSuffix(i, "1")));
variables.push_back(makeVariable(name+"2", prefix+"params"+params->getParameterSuffix(i, "2")));
}
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cu.intToString(i)+"]";
variables.push_back(makeVariable(name, prefix+value));
}
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string paramName = force.getEnergyParameterDerivativeName(i);
string derivVariable = cu.getNonbondedUtilities().addEnergyParameterDerivative(paramName);
Lepton::ParsedExpression derivExpression = energyExpression.differentiate(paramName).optimize();
forceExpressions[derivVariable+" += interactionScale*switchValue*"] = derivExpression;
}
stringstream compute;
compute << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionList, functionDefinitions, prefix+"temp");
map replacements;
replacements["COMPUTE_FORCE"] = compute.str();
replacements["USE_SWITCH"] = (useCutoff && force.getUseSwitchingFunction() ? "1" : "0");
if (force.getUseSwitchingFunction()) {
// Compute the switching coefficients.
replacements["SWITCH_CUTOFF"] = cu.doubleToString(force.getSwitchingDistance());
replacements["SWITCH_C3"] = cu.doubleToString(10/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 3.0));
replacements["SWITCH_C4"] = cu.doubleToString(15/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 4.0));
replacements["SWITCH_C5"] = cu.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0));
}
string source = cu.replaceStrings(CudaKernelSources::customNonbonded, replacements);
if (force.getNumInteractionGroups() > 0)
initInteractionGroups(force, source, tableTypes);
else {
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
}
if (globals.isInitialized()) {
globals.upload(globalParamValues);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals.getDevicePointer()));
}
}
info = new ForceInfo(force);
cu.addForce(info);
// Record information for the long range correction.
if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic && force.getUseLongRangeCorrection() && cu.getContextIndex() == 0) {
forceCopy = new CustomNonbondedForce(force);
hasInitializedLongRangeCorrection = false;
}
else {
longRangeCoefficient = 0.0;
hasInitializedLongRangeCorrection = true;
}
}
void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbondedForce& force, const string& interactionSource, const vector& tableTypes) {
// Process groups to form tiles.
vector > atomLists;
vector > tiles;
vector tileGroup;
vector > duplicateAtomsForGroup;
for (int group = 0; group < force.getNumInteractionGroups(); group++) {
// Get the list of atoms in this group and sort them.
set set1, set2;
force.getInteractionGroupParameters(group, set1, set2);
vector atoms1, atoms2;
atoms1.insert(atoms1.begin(), set1.begin(), set1.end());
atoms2.insert(atoms2.begin(), set2.begin(), set2.end());
sort(atoms1.begin(), atoms1.end());
sort(atoms2.begin(), atoms2.end());
duplicateAtomsForGroup.push_back(vector());
set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(),
inserter(duplicateAtomsForGroup[group], duplicateAtomsForGroup[group].begin()));
sort(duplicateAtomsForGroup[group].begin(), duplicateAtomsForGroup[group].end());
// Find how many tiles we will create for this group.
int tileWidth = min(min(32, (int) atoms1.size()), (int) atoms2.size());
if (tileWidth == 0)
continue;
int numBlocks1 = (atoms1.size()+tileWidth-1)/tileWidth;
int numBlocks2 = (atoms2.size()+tileWidth-1)/tileWidth;
// Add the tiles.
int firstTile = tiles.size();
for (int i = 0; i < numBlocks1; i++)
for (int j = 0; j < numBlocks2; j++) {
tiles.push_back(make_pair(atomLists.size()+i, atomLists.size()+numBlocks1+j));
tileGroup.push_back(group);
}
// Add the atom lists.
for (int i = 0; i < numBlocks1; i++) {
vector atoms;
int first = i*tileWidth;
int last = min((i+1)*tileWidth, (int) atoms1.size());
for (int j = first; j < last; j++)
atoms.push_back(atoms1[j]);
atomLists.push_back(atoms);
}
for (int i = 0; i < numBlocks2; i++) {
vector atoms;
int first = i*tileWidth;
int last = min((i+1)*tileWidth, (int) atoms2.size());
for (int j = first; j < last; j++)
atoms.push_back(atoms2[j]);
atomLists.push_back(atoms);
}
}
// Build a lookup table for quickly identifying excluded interactions.
vector > exclusions(force.getNumParticles());
for (int i = 0; i < force.getNumExclusions(); i++) {
int p1, p2;
force.getExclusionParticles(i, p1, p2);
exclusions[p1].insert(p2);
exclusions[p2].insert(p1);
}
// Build the exclusion flags for each tile. While we're at it, filter out tiles
// where all interactions are excluded, and sort the tiles by size.
vector > exclusionFlags(tiles.size());
vector > tileOrder;
for (int tile = 0; tile < tiles.size(); tile++) {
bool swapped = false;
if (atomLists[tiles[tile].first].size() < atomLists[tiles[tile].second].size()) {
// For efficiency, we want the first axis to be the larger one.
int swap = tiles[tile].first;
tiles[tile].first = tiles[tile].second;
tiles[tile].second = swap;
swapped = true;
}
vector& atoms1 = atomLists[tiles[tile].first];
vector& atoms2 = atomLists[tiles[tile].second];
vector& duplicateAtoms = duplicateAtomsForGroup[tileGroup[tile]];
vector& flags = exclusionFlags[tile];
flags.resize(atoms1.size(), (int) (1LL< a2) == swapped && a1IsDuplicate && binary_search(duplicateAtoms.begin(), duplicateAtoms.end(), a2))
isExcluded = true; // Both atoms are in both sets, so skip duplicate interactions.
if (isExcluded) {
flags[i] &= -1-(1< tileSetStart;
tileSetStart.push_back(0);
int tileSetSize = 0;
for (int i = 0; i < tileOrder.size(); i++) {
int tile = tileOrder[i].second;
int size = atomLists[tiles[tile].first].size();
if (tileSetSize+size > 32) {
tileSetStart.push_back(i);
tileSetSize = 0;
}
tileSetSize += size;
}
tileSetStart.push_back(tileOrder.size());
// Build the data structures.
int numTileSets = tileSetStart.size()-1;
vector groupData;
for (int tileSet = 0; tileSet < numTileSets; tileSet++) {
int indexInTileSet = 0;
for (int i = tileSetStart[tileSet]; i < tileSetStart[tileSet+1]; i++) {
int tile = tileOrder[i].second;
vector& atoms1 = atomLists[tiles[tile].first];
vector& atoms2 = atomLists[tiles[tile].second];
int range = indexInTileSet + ((indexInTileSet+atoms1.size())<<16);
int allFlags = (1< 0 ? exclusionFlags[tile][j] : allFlags);
groupData.push_back(make_int4(a1, a2, range, flags<(cu, groupData.size(), "interactionGroupData");
interactionGroupData.upload(groupData);
numGroupTiles.initialize(cu, 1, "numGroupTiles");
// Allocate space for a neighbor list, if necessary.
if (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && groupData.size() > cu.getNumThreadBlocks()) {
filteredGroupData.initialize(cu, groupData.size(), "filteredGroupData");
interactionGroupData.copyTo(filteredGroupData);
int numTiles = groupData.size()/32;
numGroupTiles.upload(&numTiles);
}
// Create the kernel.
hasParamDerivs = (force.getNumEnergyParameterDerivatives() > 0);
map replacements;
replacements["COMPUTE_INTERACTION"] = interactionSource;
const string suffixes[] = {"x", "y", "z", "w"};
stringstream localData;
int localDataSize = 0;
vector& buffers = params->getBuffers();
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getNumComponents() == 1)
localData< 0)
load2<<", ";
load2<<"localData[localIndex].params"<<(i+1)<<"_"<& allParamDerivNames = cu.getEnergyParamDerivNames();
int numDerivs = allParamDerivNames.size();
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string paramName = force.getEnergyParameterDerivativeName(i);
string derivVariable = cu.getNonbondedUtilities().addEnergyParameterDerivative(paramName);
initDerivs<<"mixed "< defines;
if (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff)
defines["USE_CUTOFF"] = "1";
if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic)
defines["USE_PERIODIC"] = "1";
int localMemorySize = max(32, cu.getNonbondedUtilities().getForceThreadBlockSize());
defines["LOCAL_MEMORY_SIZE"] = cu.intToString(localMemorySize);
defines["WARPS_IN_BLOCK"] = cu.intToString(localMemorySize/32);
double cutoff = force.getCutoffDistance();
defines["CUTOFF_SQUARED"] = cu.doubleToString(cutoff*cutoff);
double paddedCutoff = cu.getNonbondedUtilities().padCutoff(cutoff);
defines["PADDED_CUTOFF_SQUARED"] = cu.doubleToString(paddedCutoff*paddedCutoff);
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["TILE_SIZE"] = "32";
defines["NUM_TILES"] = cu.intToString(numTileSets);
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*numTileSets/numContexts;
int endIndex = (cu.getContextIndex()+1)*numTileSets/numContexts;
defines["FIRST_TILE"] = cu.intToString(startIndex);
defines["LAST_TILE"] = cu.intToString(endIndex);
if ((localDataSize/4)%2 == 0 && !cu.getUseDoublePrecision())
defines["PARAMETER_SIZE_IS_EVEN"] = "1";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::customNonbondedGroups, replacements), defines);
interactionGroupKernel = cu.getKernel(module, "computeInteractionGroups");
prepareNeighborListKernel = cu.getKernel(module, "prepareToBuildNeighborList");
buildNeighborListKernel = cu.getKernel(module, "buildNeighborList");
numGroupThreadBlocks = cu.getNonbondedUtilities().getNumForceThreadBlocks();
}
double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
useNeighborList = (filteredGroupData.isInitialized() && cu.getNonbondedUtilities().getUseCutoff());
if (useNeighborList && cu.getContextIndex() > 0) {
// When using a neighbor list, run the whole calculation on a single device.
return 0.0;
}
if (globals.isInitialized()) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed) {
globals.upload(globalParamValues);
if (forceCopy != NULL) {
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
}
}
}
if (!hasInitializedLongRangeCorrection) {
CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
}
if (interactionGroupData.isInitialized()) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
interactionGroupArgs.push_back(&cu.getForce().getDevicePointer());
interactionGroupArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
interactionGroupArgs.push_back(&cu.getPosq().getDevicePointer());
interactionGroupArgs.push_back(&(useNeighborList ? filteredGroupData : interactionGroupData).getDevicePointer());
interactionGroupArgs.push_back(&numGroupTiles.getDevicePointer());
interactionGroupArgs.push_back(&useNeighborList);
interactionGroupArgs.push_back(cu.getPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getInvPeriodicBoxSizePointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxVecXPointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxVecYPointer());
interactionGroupArgs.push_back(cu.getPeriodicBoxVecZPointer());
for (auto& buffer : params->getBuffers())
interactionGroupArgs.push_back(&buffer.getMemory());
for (auto& function : tabulatedFunctions)
interactionGroupArgs.push_back(&function.getDevicePointer());
if (globals.isInitialized())
interactionGroupArgs.push_back(&globals.getDevicePointer());
if (hasParamDerivs)
interactionGroupArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
if (useNeighborList) {
// Initialize kernels for building the interaction group neighbor list.
prepareNeighborListArgs.push_back(&cu.getNonbondedUtilities().getRebuildNeighborList().getDevicePointer());
prepareNeighborListArgs.push_back(&numGroupTiles.getDevicePointer());
buildNeighborListArgs.push_back(&cu.getNonbondedUtilities().getRebuildNeighborList().getDevicePointer());
buildNeighborListArgs.push_back(&numGroupTiles.getDevicePointer());
buildNeighborListArgs.push_back(&cu.getPosq().getDevicePointer());
buildNeighborListArgs.push_back(&interactionGroupData.getDevicePointer());
buildNeighborListArgs.push_back(&filteredGroupData.getDevicePointer());
buildNeighborListArgs.push_back(cu.getPeriodicBoxSizePointer());
buildNeighborListArgs.push_back(cu.getInvPeriodicBoxSizePointer());
buildNeighborListArgs.push_back(cu.getPeriodicBoxVecXPointer());
buildNeighborListArgs.push_back(cu.getPeriodicBoxVecYPointer());
buildNeighborListArgs.push_back(cu.getPeriodicBoxVecZPointer());
}
}
int forceThreadBlockSize = cu.getNonbondedUtilities().getForceThreadBlockSize();
if (useNeighborList) {
// Rebuild the neighbor list, if necessary.
cu.executeKernel(prepareNeighborListKernel, &prepareNeighborListArgs[0], 1, 1);
cu.executeKernel(buildNeighborListKernel, &buildNeighborListArgs[0], numGroupThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
cu.executeKernel(interactionGroupKernel, &interactionGroupArgs[0], numGroupThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
double4 boxSize = cu.getPeriodicBoxSize();
double volume = boxSize.x*boxSize.y*boxSize.z;
map& derivs = cu.getEnergyParamDerivWorkspace();
for (int i = 0; i < longRangeCoefficientDerivs.size(); i++)
derivs[forceCopy->getEnergyParameterDerivativeName(i)] += longRangeCoefficientDerivs[i]/volume;
return longRangeCoefficient/volume;
}
void CudaCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force) {
cu.setAsCurrent();
int numParticles = force.getNumParticles();
if (numParticles != cu.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
// Record the per-particle parameters.
vector > paramVector(numParticles);
vector parameters;
for (int i = 0; i < numParticles; i++) {
force.getParticleParameters(i, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// If necessary, recompute the long range correction.
if (forceCopy != NULL) {
CustomNonbondedForceImpl::calcLongRangeCorrection(force, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
hasInitializedLongRangeCorrection = true;
*forceCopy = force;
}
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCalcGBSAOBCForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const GBSAOBCForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, radius1, radius2, scale1, scale2;
force.getParticleParameters(particle1, charge1, radius1, scale1);
force.getParticleParameters(particle2, charge2, radius2, scale2);
return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
}
private:
const GBSAOBCForce& force;
};
void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
cu.setAsCurrent();
if (cu.getPlatformData().contexts.size() > 1)
throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
string prefix = "obc"+cu.intToString(forceIndex)+"_";
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
params.initialize(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
charges.initialize(cu, cu.getPaddedNumAtoms(), elementSize, "gbsaObcCharges");
bornRadii.initialize(cu, cu.getPaddedNumAtoms(), elementSize, "bornRadii");
obcChain.initialize(cu, cu.getPaddedNumAtoms(), elementSize, "obcChain");
bornSum.initialize(cu, cu.getPaddedNumAtoms(), "bornSum");
bornForce.initialize(cu, cu.getPaddedNumAtoms(), "bornForce");
cu.addAutoclearBuffer(bornSum);
cu.addAutoclearBuffer(bornForce);
CudaArray& posq = cu.getPosq();
vector chargeVec(cu.getPaddedNumAtoms());
vector paramsVector(cu.getPaddedNumAtoms(), make_float2(1, 1));
const double dielectricOffset = 0.009;
for (int i = 0; i < force.getNumParticles(); i++) {
double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor);
radius -= dielectricOffset;
chargeVec[i] = charge;
paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
}
charges.upload(chargeVec, true);
params.upload(paramsVector);
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
cutoff = force.getCutoffDistance();
string source = CudaKernelSources::gbsaObc2;
map replacements;
replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2";
replacements["OBC_PARAMS1"] = prefix+"obcParams1";
replacements["OBC_PARAMS2"] = prefix+"obcParams2";
replacements["BORN_FORCE1"] = prefix+"bornForce1";
replacements["BORN_FORCE2"] = prefix+"bornForce2";
source = cu.replaceStrings(source, replacements);
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector >(), source, force.getForceGroup());
nb.addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"charge", "float", 1, sizeof(float), charges.getDevicePointer()));;
nb.addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"obcParams", "float", 2, sizeof(float2), params.getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"bornForce", "long long", 1, sizeof(long long), bornForce.getDevicePointer()));
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
if (!hasCreatedKernels) {
// These Kernels cannot be created in initialize(), because the CudaNonbondedUtilities has not been initialized yet then.
hasCreatedKernels = true;
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2);
map defines;
if (nb.getUseCutoff())
defines["USE_CUTOFF"] = "1";
if (nb.getUsePeriodic())
defines["USE_PERIODIC"] = "1";
if (cu.getComputeCapability() >= 3.0 && !cu.getUseDoublePrecision())
defines["ENABLE_SHUFFLE"] = "1";
defines["CUTOFF_SQUARED"] = cu.doubleToString(cutoff*cutoff);
defines["CUTOFF"] = cu.doubleToString(cutoff);
defines["PREFACTOR"] = cu.doubleToString(prefactor);
defines["SURFACE_AREA_FACTOR"] = cu.doubleToString(surfaceAreaFactor);
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
defines["FORCE_WORK_GROUP_SIZE"] = cu.intToString(nb.getForceThreadBlockSize());
defines["TILE_SIZE"] = cu.intToString(CudaContext::TileSize);
int numExclusionTiles = nb.getExclusionTiles().getSize();
defines["NUM_TILES_WITH_EXCLUSIONS"] = cu.intToString(numExclusionTiles);
int numContexts = cu.getPlatformData().contexts.size();
int startExclusionIndex = cu.getContextIndex()*numExclusionTiles/numContexts;
int endExclusionIndex = (cu.getContextIndex()+1)*numExclusionTiles/numContexts;
defines["FIRST_EXCLUSION_TILE"] = cu.intToString(startExclusionIndex);
defines["LAST_EXCLUSION_TILE"] = cu.intToString(endExclusionIndex);
map replacements;
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::gbsaObc1, replacements), defines);
computeBornSumKernel = cu.getKernel(module, "computeBornSum");
computeSumArgs.push_back(&bornSum.getDevicePointer());
computeSumArgs.push_back(&cu.getPosq().getDevicePointer());
computeSumArgs.push_back(&charges.getDevicePointer());
computeSumArgs.push_back(¶ms.getDevicePointer());
if (nb.getUseCutoff()) {
computeSumArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
computeSumArgs.push_back(&nb.getInteractionCount().getDevicePointer());
computeSumArgs.push_back(cu.getPeriodicBoxSizePointer());
computeSumArgs.push_back(cu.getInvPeriodicBoxSizePointer());
computeSumArgs.push_back(cu.getPeriodicBoxVecXPointer());
computeSumArgs.push_back(cu.getPeriodicBoxVecYPointer());
computeSumArgs.push_back(cu.getPeriodicBoxVecZPointer());
computeSumArgs.push_back(&maxTiles);
computeSumArgs.push_back(&nb.getBlockCenters().getDevicePointer());
computeSumArgs.push_back(&nb.getBlockBoundingBoxes().getDevicePointer());
computeSumArgs.push_back(&nb.getInteractingAtoms().getDevicePointer());
}
else
computeSumArgs.push_back(&maxTiles);
computeSumArgs.push_back(&nb.getExclusionTiles().getDevicePointer());
force1Kernel = cu.getKernel(module, "computeGBSAForce1");
force1Args.push_back(&cu.getForce().getDevicePointer());
force1Args.push_back(&bornForce.getDevicePointer());
force1Args.push_back(&cu.getEnergyBuffer().getDevicePointer());
force1Args.push_back(&cu.getPosq().getDevicePointer());
force1Args.push_back(&charges.getDevicePointer());
force1Args.push_back(&bornRadii.getDevicePointer());
force1Args.push_back(NULL);
if (nb.getUseCutoff()) {
force1Args.push_back(&nb.getInteractingTiles().getDevicePointer());
force1Args.push_back(&nb.getInteractionCount().getDevicePointer());
force1Args.push_back(cu.getPeriodicBoxSizePointer());
force1Args.push_back(cu.getInvPeriodicBoxSizePointer());
force1Args.push_back(cu.getPeriodicBoxVecXPointer());
force1Args.push_back(cu.getPeriodicBoxVecYPointer());
force1Args.push_back(cu.getPeriodicBoxVecZPointer());
force1Args.push_back(&maxTiles);
force1Args.push_back(&nb.getBlockCenters().getDevicePointer());
force1Args.push_back(&nb.getBlockBoundingBoxes().getDevicePointer());
force1Args.push_back(&nb.getInteractingAtoms().getDevicePointer());
}
else
force1Args.push_back(&maxTiles);
force1Args.push_back(&nb.getExclusionTiles().getDevicePointer());
reduceBornSumKernel = cu.getKernel(module, "reduceBornSum");
reduceBornForceKernel = cu.getKernel(module, "reduceBornForce");
}
force1Args[6] = &includeEnergy;
if (nb.getUseCutoff()) {
if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize();
computeSumArgs[4] = &nb.getInteractingTiles().getDevicePointer();
force1Args[7] = &nb.getInteractingTiles().getDevicePointer();
computeSumArgs[14] = &nb.getInteractingAtoms().getDevicePointer();
force1Args[17] = &nb.getInteractingAtoms().getDevicePointer();
}
}
cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
float alpha = 1.0f, beta = 0.8f, gamma = 4.85f;
void* reduceSumArgs[] = {&alpha, &beta, &gamma, &bornSum.getDevicePointer(), ¶ms.getDevicePointer(),
&bornRadii.getDevicePointer(), &obcChain.getDevicePointer()};
cu.executeKernel(reduceBornSumKernel, reduceSumArgs, cu.getPaddedNumAtoms());
cu.executeKernel(force1Kernel, &force1Args[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
void* reduceForceArgs[] = {&bornForce.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), ¶ms.getDevicePointer(),
&bornRadii.getDevicePointer(), &obcChain.getDevicePointer()};
cu.executeKernel(reduceBornForceKernel, &reduceForceArgs[0], cu.getPaddedNumAtoms());
return 0.0;
}
void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force) {
// Make sure the new parameters are acceptable.
cu.setAsCurrent();
int numParticles = force.getNumParticles();
if (numParticles != cu.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
// Record the per-particle parameters.
vector chargeVector(cu.getPaddedNumAtoms(), 0.0);
vector paramsVector(cu.getPaddedNumAtoms());
const double dielectricOffset = 0.009;
for (int i = 0; i < numParticles; i++) {
double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor);
chargeVector[i] = charge;
radius -= dielectricOffset;
paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
}
for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
paramsVector[i] = make_float2(1, 1);
charges.upload(chargeVector, true);
params.upload(paramsVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCalcCustomGBForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(const CustomGBForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
vector params1;
vector params2;
force.getParticleParameters(particle1, params1);
force.getParticleParameters(particle2, params2);
for (int i = 0; i < (int) params1.size(); i++)
if (params1[i] != params2[i])
return false;
return true;
}
int getNumParticleGroups() {
return force.getNumExclusions();
}
void getParticlesInGroup(int index, vector& particles) {
int particle1, particle2;
force.getExclusionParticles(index, particle1, particle2);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
bool areGroupsIdentical(int group1, int group2) {
return true;
}
private:
const CustomGBForce& force;
};
CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (computedValues != NULL)
delete computedValues;
if (energyDerivs != NULL)
delete energyDerivs;
if (energyDerivChain != NULL)
delete energyDerivChain;
for (auto d : dValuedParam)
delete d;
}
void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) {
cu.setAsCurrent();
if (cu.getPlatformData().contexts.size() > 1)
throw OpenMMException("CustomGBForce does not support using multiple CUDA devices");
cutoff = force.getCutoffDistance();
bool useExclusionsForValue = false;
numComputedValues = force.getNumComputedValues();
vector computedValueNames(force.getNumComputedValues());
vector computedValueExpressions(force.getNumComputedValues());
if (force.getNumComputedValues() > 0) {
CustomGBForce::ComputationType type;
force.getComputedValueParameters(0, computedValueNames[0], computedValueExpressions[0], type);
if (type == CustomGBForce::SingleParticle)
throw OpenMMException("CudaPlatform requires that the first computed value for a CustomGBForce be of type ParticlePair or ParticlePairNoExclusions.");
useExclusionsForValue = (type == CustomGBForce::ParticlePair);
for (int i = 1; i < force.getNumComputedValues(); i++) {
force.getComputedValueParameters(i, computedValueNames[i], computedValueExpressions[i], type);
if (type != CustomGBForce::SingleParticle)
throw OpenMMException("CudaPlatform requires that a CustomGBForce only have one computed value of type ParticlePair or ParticlePairNoExclusions.");
}
}
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
string prefix = "custom"+cu.intToString(forceIndex)+"_";
// Record parameters and exclusions.
int numParticles = force.getNumParticles();
int paddedNumParticles = cu.getPaddedNumAtoms();
int numParams = force.getNumPerParticleParameters();
params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), paddedNumParticles, "customGBParameters", true);
computedValues = new CudaParameterSet(cu, force.getNumComputedValues(), paddedNumParticles, "customGBComputedValues", true, cu.getUseDoublePrecision());
if (force.getNumGlobalParameters() > 0)
globals.initialize(cu, force.getNumGlobalParameters(), "customGBGlobals");
vector > paramVector(paddedNumParticles, vector(numParams, 0));
vector > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
vector parameters;
force.getParticleParameters(i, parameters);
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
exclusionList[i].push_back(i);
}
for (int i = 0; i < force.getNumExclusions(); i++) {
int particle1, particle2;
force.getExclusionParticles(i, particle1, particle2);
exclusionList[particle1].push_back(particle2);
exclusionList[particle2].push_back(particle1);
}
params->setParameterValues(paramVector);
// Record the tabulated functions.
map functions;
vector > functionDefinitions;
vector functionList;
stringstream tableArgs;
tabulatedFunctions.resize(force.getNumTabulatedFunctions());
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string arrayName = prefix+"table"+cu.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions[i].initialize(cu, f.size(), "TabulatedFunction");
tabulatedFunctions[i].upload(f);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[i].getDevicePointer()));
tableArgs << ", const float";
if (width > 1)
tableArgs << width;
tableArgs << "* __restrict__ " << arrayName;
}
// Record the global parameters.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
if (globals.isInitialized())
globals.upload(globalParamValues);
// Record derivatives of expressions needed for the chain rule terms.
vector