Unverified Commit 98d81730 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Converted more code to common platform (#3073)

* Converted more code to common platform

* Converted more code to common platform
parent 72c70cfe
...@@ -35,10 +35,68 @@ ...@@ -35,10 +35,68 @@
#include "openmm/internal/CompiledExpressionSet.h" #include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CustomIntegratorUtilities.h" #include "openmm/internal/CustomIntegratorUtilities.h"
#include "lepton/CompiledExpression.h" #include "lepton/CompiledExpression.h"
#include "lepton/ExpressionProgram.h"
namespace OpenMM { namespace OpenMM {
/**
* This kernel modifies the positions of particles to enforce distance constraints.
*/
class CommonApplyConstraintsKernel : public ApplyConstraintsKernel {
public:
CommonApplyConstraintsKernel(std::string name, const Platform& platform, ComputeContext& cc) : ApplyConstraintsKernel(name, platform),
cc(cc), hasInitializedKernel(false) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* Update particle positions to enforce constraints.
*
* @param context the context in which to execute this kernel
* @param tol the distance tolerance within which constraints must be satisfied.
*/
void apply(ContextImpl& context, double tol);
/**
* Update particle velocities to enforce constraints.
*
* @param context the context in which to execute this kernel
* @param tol the velocity tolerance within which constraints must be satisfied.
*/
void applyToVelocities(ContextImpl& context, double tol);
private:
ComputeContext& cc;
bool hasInitializedKernel;
ComputeKernel applyDeltasKernel;
};
/**
* This kernel recomputes the positions of virtual sites.
*/
class CommonVirtualSitesKernel : public VirtualSitesKernel {
public:
CommonVirtualSitesKernel(std::string name, const Platform& platform, ComputeContext& cc) : VirtualSitesKernel(name, platform), cc(cc) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* Compute the virtual site locations.
*
* @param context the context in which to execute this kernel
*/
void computePositions(ContextImpl& context);
private:
ComputeContext& cc;
};
/** /**
* This kernel is invoked by HarmonicBondForce to calculate the forces acting on the system and the energy of the system. * This kernel is invoked by HarmonicBondForce to calculate the forces acting on the system and the energy of the system.
*/ */
...@@ -832,6 +890,65 @@ private: ...@@ -832,6 +890,65 @@ private:
ComputeEvent event; ComputeEvent event;
}; };
/**
* This kernel is invoked by CustomCVForce to calculate the forces acting on the system and the energy of the system.
*/
class CommonCalcCustomCVForceKernel : public CalcCustomCVForceKernel {
public:
CommonCalcCustomCVForceKernel(std::string name, const Platform& platform, ComputeContext& cc) : CalcCustomCVForceKernel(name, platform),
cc(cc), hasInitializedListeners(false) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomCVForce this kernel will be used for
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy);
/**
* Copy state information to the inner context.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void copyState(ContextImpl& context, ContextImpl& innerContext);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomCVForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomCVForce& force);
/**
* Get the ComputeContext corresponding to the inner Context.
*/
virtual ComputeContext& getInnerComputeContext(ContextImpl& innerContext) = 0;
private:
class ForceInfo;
class ReorderListener;
ComputeContext& cc;
bool hasInitializedListeners;
Lepton::ExpressionProgram energyExpression;
std::vector<std::string> variableNames, paramDerivNames, globalParameterNames;
std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
std::vector<ComputeArray> cvForces;
ComputeArray invAtomOrder;
ComputeArray innerInvAtomOrder;
ComputeKernel copyStateKernel, copyForcesKernel, addForcesKernel;
};
/** /**
* This kernel is invoked by VerletIntegrator to take one time step. * This kernel is invoked by VerletIntegrator to take one time step.
*/ */
...@@ -1399,6 +1516,52 @@ private: ...@@ -1399,6 +1516,52 @@ private:
ComputeKernel kernel; ComputeKernel kernel;
}; };
/**
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
*/
class CommonApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel {
public:
CommonApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, ComputeContext& cc) : ApplyMonteCarloBarostatKernel(name, platform), cc(cc),
hasInitializedKernels(false) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param barostat the MonteCarloBarostat this kernel will be used for
*/
void initialize(const System& system, const Force& barostat);
/**
* Attempt a Monte Carlo step, scaling particle positions (or cluster centers) by a specified value.
* This version scales the x, y, and z positions independently.
* This is called BEFORE the periodic box size is modified. It should begin by translating each particle
* or cluster into the first periodic box, so that coordinates will still be correct after the box size
* is changed.
*
* @param context the context in which to execute this kernel
* @param scaleX the scale factor by which to multiply particle x-coordinate
* @param scaleY the scale factor by which to multiply particle y-coordinate
* @param scaleZ the scale factor by which to multiply particle z-coordinate
*/
void scaleCoordinates(ContextImpl& context, double scaleX, double scaleY, double scaleZ);
/**
* Reject the most recent Monte Carlo step, restoring the particle positions to where they were before
* scaleCoordinates() was last called.
*
* @param context the context in which to execute this kernel
*/
void restoreCoordinates(ContextImpl& context);
private:
ComputeContext& cc;
bool hasInitializedKernels;
int numMolecules;
ComputeArray savedPositions, savedFloatForces, savedLongForces;
ComputeArray moleculeAtoms;
ComputeArray moleculeStartIndex;
ComputeKernel kernel;
std::vector<int> lastAtomOrder;
};
} // namespace OpenMM } // namespace OpenMM
#endif /*OPENMM_COMMONKERNELS_H_*/ #endif /*OPENMM_COMMONKERNELS_H_*/
...@@ -300,10 +300,14 @@ public: ...@@ -300,10 +300,14 @@ public:
virtual ArrayInterface& getVelm() = 0; virtual ArrayInterface& getVelm() = 0;
/** /**
* On devices that do not support 64 bit atomics, this returns an array containing buffers of type real4 in which * On devices that do not support 64 bit atomics, this returns an array containing buffers of type real4 in which
* forces can be accumulated. Do not call this if getSupports64BitGlobalAtomics() returns true. The returned value * forces can be accumulated. On platforms that do not use floating point force buffers, this will throw an exception.
* in that case is undefined, and it may throw an exception.
*/ */
virtual ArrayInterface& getForceBuffers() = 0; virtual ArrayInterface& getForceBuffers() = 0;
/**
* Get the array which contains a contribution to each force represented as a real4. On platforms that do not use
* floating point force buffers, this will throw an exception.
*/
virtual ArrayInterface& getFloatForceBuffer() = 0;
/** /**
* Get the array which contains a contribution to each force represented as 64 bit fixed point. * Get the array which contains a contribution to each force represented as 64 bit fixed point.
*/ */
......
...@@ -98,6 +98,49 @@ static pair<ExpressionTreeNode, string> makeVariable(const string& name, const s ...@@ -98,6 +98,49 @@ static pair<ExpressionTreeNode, string> makeVariable(const string& name, const s
return make_pair(ExpressionTreeNode(new Operation::Variable(name)), value); return make_pair(ExpressionTreeNode(new Operation::Variable(name)), value);
} }
static void replaceFunctionsInExpression(map<string, CustomFunction*>& functions, ExpressionProgram& expression) {
for (int i = 0; i < expression.getNumOperations(); i++) {
if (expression.getOperation(i).getId() == Operation::CUSTOM) {
const Operation::Custom& op = dynamic_cast<const Operation::Custom&>(expression.getOperation(i));
expression.setOperation(i, new Operation::Custom(op.getName(), functions[op.getName()]->clone(), op.getDerivOrder()));
}
}
}
void CommonApplyConstraintsKernel::initialize(const System& system) {
}
void CommonApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
cc.setAsCurrent();
if (!hasInitializedKernel) {
hasInitializedKernel = true;
map<string, string> defines;
ComputeProgram program = cc.compileProgram(CommonKernelSources::constraints, defines);
applyDeltasKernel = program->createKernel("applyPositionDeltas");
applyDeltasKernel->addArg(cc.getNumAtoms());
applyDeltasKernel->addArg(cc.getPosq());
applyDeltasKernel->addArg(cc.getIntegrationUtilities().getPosDelta());
if (cc.getUseMixedPrecision())
applyDeltasKernel->addArg(cc.getPosqCorrection());
}
IntegrationUtilities& integration = cc.getIntegrationUtilities();
cc.clearBuffer(integration.getPosDelta());
integration.applyConstraints(tol);
applyDeltasKernel->execute(cc.getNumAtoms());
integration.computeVirtualSites();
}
void CommonApplyConstraintsKernel::applyToVelocities(ContextImpl& context, double tol) {
cc.getIntegrationUtilities().applyVelocityConstraints(tol);
}
void CommonVirtualSitesKernel::initialize(const System& system) {
}
void CommonVirtualSitesKernel::computePositions(ContextImpl& context) {
cc.getIntegrationUtilities().computeVirtualSites();
}
class CommonCalcHarmonicBondForceKernel::ForceInfo : public ComputeForceInfo { class CommonCalcHarmonicBondForceKernel::ForceInfo : public ComputeForceInfo {
public: public:
ForceInfo(const HarmonicBondForce& force) : force(force) { ForceInfo(const HarmonicBondForce& force) : force(force) {
...@@ -4993,6 +5036,225 @@ void CommonCalcGayBerneForceKernel::sortAtoms() { ...@@ -4993,6 +5036,225 @@ void CommonCalcGayBerneForceKernel::sortAtoms() {
exclusionStartIndex.upload(startIndexVec); exclusionStartIndex.upload(startIndexVec);
} }
class CommonCalcCustomCVForceKernel::ForceInfo : public ComputeForceInfo {
public:
ForceInfo(ComputeForceInfo& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
return force.areParticlesIdentical(particle1, particle2);
}
int getNumParticleGroups() {
return force.getNumParticleGroups();
}
void getParticlesInGroup(int index, std::vector<int>& particles) {
force.getParticlesInGroup(index, particles);
}
bool areGroupsIdentical(int group1, int group2) {
return force.areGroupsIdentical(group1, group2);
}
private:
ComputeForceInfo& force;
};
class CommonCalcCustomCVForceKernel::ReorderListener : public ComputeContext::ReorderListener {
public:
ReorderListener(ComputeContext& cc, ArrayInterface& invAtomOrder) : cc(cc), invAtomOrder(invAtomOrder) {
}
void execute() {
vector<int> invOrder(cc.getPaddedNumAtoms());
const vector<int>& order = cc.getAtomIndex();
for (int i = 0; i < order.size(); i++)
invOrder[order[i]] = i;
invAtomOrder.upload(invOrder);
}
private:
ComputeContext& cc;
ArrayInterface& invAtomOrder;
};
void CommonCalcCustomCVForceKernel::initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext) {
int numCVs = force.getNumCollectiveVariables();
for (int i = 0; i < force.getNumGlobalParameters(); i++)
globalParameterNames.push_back(force.getGlobalParameterName(i));
for (int i = 0; i < numCVs; i++)
variableNames.push_back(force.getCollectiveVariableName(i));
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string name = force.getEnergyParameterDerivativeName(i);
paramDerivNames.push_back(name);
cc.addEnergyParameterDerivative(name);
}
// Create custom functions for the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
for (int i = 0; i < (int) force.getNumTabulatedFunctions(); i++)
functions[force.getTabulatedFunctionName(i)] = createReferenceTabulatedFunction(force.getTabulatedFunction(i));
// Create the expressions.
Lepton::ParsedExpression energyExpr = Lepton::Parser::parse(force.getEnergyFunction(), functions);
energyExpression = energyExpr.createProgram();
variableDerivExpressions.clear();
for (auto& name : variableNames)
variableDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
paramDerivExpressions.clear();
for (auto& name : paramDerivNames)
paramDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
// Delete the custom functions.
for (auto& function : functions)
delete function.second;
// Copy parameter derivatives from the inner context.
ComputeContext& cc2 = getInnerComputeContext(innerContext);
for (auto& param : cc2.getEnergyParamDerivNames())
cc.addEnergyParameterDerivative(param);
// Create arrays for storing information.
cvForces.resize(numCVs);
for (int i = 0; i < numCVs; i++)
cvForces[i].initialize<long long>(cc, 3*cc.getPaddedNumAtoms(), "cvForce");
invAtomOrder.initialize<int>(cc, cc.getPaddedNumAtoms(), "invAtomOrder");
innerInvAtomOrder.initialize<int>(cc, cc.getPaddedNumAtoms(), "innerInvAtomOrder");
// Create the kernels.
stringstream args, add;
for (int i = 0; i < numCVs; i++) {
args << ", GLOBAL mm_long * RESTRICT force" << i << ", real dEdV" << i;
add << "forces[i] += (mm_long) (force" << i << "[i]*dEdV" << i << ");\n";
}
map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = args.str();
replacements["ADD_FORCES"] = add.str();
ComputeProgram program = cc.compileProgram(cc.replaceStrings(CommonKernelSources::customCVForce, replacements));
copyStateKernel = program->createKernel("copyState");
copyStateKernel->addArg(cc.getPosq());
copyStateKernel->addArg(cc2.getPosq());
if (cc.getUseMixedPrecision()) {
copyStateKernel->addArg(cc.getPosqCorrection());
copyStateKernel->addArg(cc2.getPosqCorrection());
}
copyStateKernel->addArg(cc.getVelm());
copyStateKernel->addArg(cc2.getVelm());
copyStateKernel->addArg(cc.getAtomIndexArray());
copyStateKernel->addArg(innerInvAtomOrder);
copyStateKernel->addArg(cc.getNumAtoms());
copyForcesKernel = program->createKernel("copyForces");
copyForcesKernel->addArg();
copyForcesKernel->addArg(invAtomOrder);
copyForcesKernel->addArg(cc2.getLongForceBuffer());
copyForcesKernel->addArg(cc2.getAtomIndexArray());
copyForcesKernel->addArg(cc.getNumAtoms());
copyForcesKernel->addArg(cc.getPaddedNumAtoms());
addForcesKernel = program->createKernel("addForces");
addForcesKernel->addArg(cc.getLongForceBuffer());
addForcesKernel->addArg(cc.getLongForceBuffer().getSize());
for (int i = 0; i < numCVs; i++) {
addForcesKernel->addArg();
addForcesKernel->addArg();
}
// This context needs to respect all forces in the inner context when reordering atoms.
for (auto* info : cc2.getForceInfos())
cc.addForce(new ForceInfo(*info));
}
double CommonCalcCustomCVForceKernel::execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy) {
copyState(context, innerContext);
int numCVs = variableNames.size();
int numAtoms = cc.getNumAtoms();
int paddedNumAtoms = cc.getPaddedNumAtoms();
vector<double> cvValues;
vector<map<string, double> > cvDerivs(numCVs);
for (int i = 0; i < numCVs; i++) {
cvValues.push_back(innerContext.calcForcesAndEnergy(true, true, 1<<i));
copyForcesKernel->setArg(0, cvForces[i]);
copyForcesKernel->execute(numAtoms);
innerContext.getEnergyParameterDerivatives(cvDerivs[i]);
}
// Compute the energy and forces.
map<string, double> variables;
for (auto& name : globalParameterNames)
variables[name] = context.getParameter(name);
for (int i = 0; i < numCVs; i++)
variables[variableNames[i]] = cvValues[i];
double energy = energyExpression.evaluate(variables);
for (int i = 0; i < numCVs; i++) {
double dEdV = variableDerivExpressions[i].evaluate(variables);
addForcesKernel->setArg(2*i+2, cvForces[i]);
if (cc.getUseDoublePrecision())
addForcesKernel->setArg(2*i+3, dEdV);
else
addForcesKernel->setArg(2*i+3, (float) dEdV);
}
addForcesKernel->execute(numAtoms);
// Compute the energy parameter derivatives.
map<string, double>& energyParamDerivs = cc.getEnergyParamDerivWorkspace();
for (int i = 0; i < paramDerivExpressions.size(); i++)
energyParamDerivs[paramDerivNames[i]] += paramDerivExpressions[i].evaluate(variables);
for (int i = 0; i < numCVs; i++) {
double dEdV = variableDerivExpressions[i].evaluate(variables);
for (auto& deriv : cvDerivs[i])
energyParamDerivs[deriv.first] += dEdV*deriv.second;
}
return energy;
}
void CommonCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& innerContext) {
int numAtoms = cc.getNumAtoms();
ComputeContext& cc2 = getInnerComputeContext(innerContext);
if (!hasInitializedListeners) {
hasInitializedListeners = true;
// Initialize the listeners.
ReorderListener* listener1 = new ReorderListener(cc, invAtomOrder);
ReorderListener* listener2 = new ReorderListener(cc2, innerInvAtomOrder);
cc.addReorderListener(listener1);
cc2.addReorderListener(listener2);
listener1->execute();
listener2->execute();
}
copyStateKernel->execute(numAtoms);
Vec3 a, b, c;
context.getPeriodicBoxVectors(a, b, c);
innerContext.setPeriodicBoxVectors(a, b, c);
innerContext.setTime(context.getTime());
map<string, double> innerParameters = innerContext.getParameters();
for (auto& param : innerParameters)
innerContext.setParameter(param.first, context.getParameter(param.first));
}
void CommonCalcCustomCVForceKernel::copyParametersToContext(ContextImpl& context, const CustomCVForce& force) {
// Create custom functions for the tabulated functions.
map<string, CustomFunction*> functions;
for (int i = 0; i < (int) force.getNumTabulatedFunctions(); i++)
functions[force.getTabulatedFunctionName(i)] = createReferenceTabulatedFunction(force.getTabulatedFunction(i));
// Replace tabulated functions in the expressions.
replaceFunctionsInExpression(functions, energyExpression);
for (auto& expression : variableDerivExpressions)
replaceFunctionsInExpression(functions, expression);
for (auto& expression : paramDerivExpressions)
replaceFunctionsInExpression(functions, expression);
// Delete the custom functions.
for (auto& function : functions)
delete function.second;
}
void CommonIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) { void CommonIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) {
cc.initializeContexts(); cc.initializeContexts();
cc.setAsCurrent(); cc.setAsCurrent();
...@@ -7345,3 +7607,83 @@ void CommonApplyAndersenThermostatKernel::execute(ContextImpl& context) { ...@@ -7345,3 +7607,83 @@ void CommonApplyAndersenThermostatKernel::execute(ContextImpl& context) {
kernel->setArg(6, cc.getIntegrationUtilities().prepareRandomNumbers(cc.getPaddedNumAtoms())); kernel->setArg(6, cc.getIntegrationUtilities().prepareRandomNumbers(cc.getPaddedNumAtoms()));
kernel->execute(cc.getNumAtoms()); kernel->execute(cc.getNumAtoms());
} }
void CommonApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
savedPositions.initialize(cc, cc.getPaddedNumAtoms(), cc.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedPositions");
savedLongForces.initialize<long long>(cc, cc.getPaddedNumAtoms()*3, "savedLongForces");
try {
cc.getFloatForceBuffer(); // This will throw an exception on the CUDA platform.
savedFloatForces.initialize(cc, cc.getPaddedNumAtoms(), cc.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedForces");
}
catch (...) {
// The CUDA platform doesn't have a floating point force buffer, so we don't need to copy it.
}
ComputeProgram program = cc.compileProgram(CommonKernelSources::monteCarloBarostat);
kernel = program->createKernel("scalePositions");
}
void CommonApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, double scaleX, double scaleY, double scaleZ) {
if (!hasInitializedKernels) {
hasInitializedKernels = true;
// Create the arrays with the molecule definitions.
vector<vector<int> > molecules = context.getMolecules();
numMolecules = molecules.size();
moleculeAtoms.initialize<int>(cc, cc.getNumAtoms(), "moleculeAtoms");
moleculeStartIndex.initialize<int>(cc, numMolecules+1, "moleculeStartIndex");
vector<int> atoms(moleculeAtoms.getSize());
vector<int> startIndex(moleculeStartIndex.getSize());
int index = 0;
for (int i = 0; i < numMolecules; i++) {
startIndex[i] = index;
for (int molecule : molecules[i])
atoms[index++] = molecule;
}
startIndex[numMolecules] = index;
moleculeAtoms.upload(atoms);
moleculeStartIndex.upload(startIndex);
// Initialize the kernel arguments.
//KERNEL void scalePositions(float scaleX, float scaleY, float scaleZ, int numMolecules, real4 periodicBoxSize,
// real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, GLOBAL real4* RESTRICT posq,
// GLOBAL const int* RESTRICT moleculeAtoms, GLOBAL const int* RESTRICT moleculeStartIndex) {
kernel->addArg();
kernel->addArg();
kernel->addArg();
kernel->addArg(numMolecules);
for (int i = 0; i < 5; i++)
kernel->addArg();
kernel->addArg(cc.getPosq());
kernel->addArg(moleculeAtoms);
kernel->addArg(moleculeStartIndex);
}
cc.getPosq().copyTo(savedPositions);
cc.getLongForceBuffer().copyTo(savedLongForces);
if (savedFloatForces.isInitialized())
cc.getFloatForceBuffer().copyTo(savedFloatForces);
// int bytesToCopy = cc.getPosq().getSize()*(cc.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
// cc.getQueue().enqueueCopyBuffer(cc.getPosq().getDeviceBuffer(), savedPositions.getDeviceBuffer(), 0, 0, bytesToCopy);
// cc.getQueue().enqueueCopyBuffer(cc.getForce().getDeviceBuffer(), savedForces.getDeviceBuffer(), 0, 0, bytesToCopy);
kernel->setArg(0, (float) scaleX);
kernel->setArg(1, (float) scaleY);
kernel->setArg(2, (float) scaleZ);
setPeriodicBoxArgs(cc, kernel, 4);
kernel->execute(cc.getNumAtoms());
for (auto& offset : cc.getPosCellOffsets())
offset = mm_int4(0, 0, 0, 0);
lastAtomOrder = cc.getAtomIndex();
}
void CommonApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
savedPositions.copyTo(cc.getPosq());
savedLongForces.copyTo(cc.getLongForceBuffer());
if (savedFloatForces.isInitialized())
savedFloatForces.copyTo(cc.getFloatForceBuffer());
// int bytesToCopy = cc.getPosq().getSize()*(cc.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
// cc.getQueue().enqueueCopyBuffer(savedPositions.getDeviceBuffer(), cc.getPosq().getDeviceBuffer(), 0, 0, bytesToCopy);
// cc.getQueue().enqueueCopyBuffer(savedForces.getDeviceBuffer(), cc.getForce().getDeviceBuffer(), 0, 0, bytesToCopy);
}
extern "C" __global__ void applyPositionDeltas(int numAtoms, real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ posDelta) { KERNEL void applyPositionDeltas(int numAtoms, GLOBAL real4* RESTRICT posq, GLOBAL mixed4* RESTRICT posDelta
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) { #ifdef USE_MIXED_PRECISION
, GLOBAL real4* RESTRICT posqCorrection
#endif
) {
for (unsigned int index = GLOBAL_ID; index < numAtoms; index += GLOBAL_SIZE) {
#ifdef USE_MIXED_PRECISION #ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index]; real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index]; real4 pos2 = posqCorrection[index];
......
/** /**
* Copy the positions and velocities to the inner context. * Copy the positions and velocities to the inner context.
*/ */
extern "C" __global__ void copyState(real4* posq, real4* posqCorrection, mixed4* velm, int* __restrict__ atomOrder, KERNEL void copyState(GLOBAL real4* RESTRICT posq, GLOBAL real4* RESTRICT innerPosq,
real4* innerPosq, real4* innerPosqCorrection, mixed4* innerVelm, int* __restrict__ innerInvAtomOrder, #ifdef USE_MIXED_PRECISION
int numAtoms) { GLOBAL real4* RESTRICT posqCorrection, GLOBAL real4* RESTRICT innerPosqCorrection,
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x) { #endif
GLOBAL mixed4* RESTRICT velm, GLOBAL mixed4* RESTRICT innerVelm, GLOBAL int* RESTRICT atomOrder, GLOBAL int* RESTRICT innerInvAtomOrder, int numAtoms) {
for (int i = GLOBAL_ID; i < numAtoms; i += GLOBAL_SIZE) {
int index = innerInvAtomOrder[atomOrder[i]]; int index = innerInvAtomOrder[atomOrder[i]];
innerPosq[index] = posq[i]; innerPosq[index] = posq[i];
innerVelm[index] = velm[i]; innerVelm[index] = velm[i];
...@@ -17,9 +19,9 @@ extern "C" __global__ void copyState(real4* posq, real4* posqCorrection, mixed4* ...@@ -17,9 +19,9 @@ extern "C" __global__ void copyState(real4* posq, real4* posqCorrection, mixed4*
/** /**
* Copy the forces back to the main context. * Copy the forces back to the main context.
*/ */
extern "C" __global__ void copyForces(long long* forces, int* __restrict__ invAtomOrder, long long* innerForces, KERNEL void copyForces(GLOBAL mm_long* RESTRICT forces, GLOBAL int* RESTRICT invAtomOrder, GLOBAL mm_long* RESTRICT innerForces,
int* __restrict__ innerAtomOrder, int numAtoms, int paddedNumAtoms) { GLOBAL int* RESTRICT innerAtomOrder, int numAtoms, int paddedNumAtoms) {
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x) { for (int i = GLOBAL_ID; i < numAtoms; i += GLOBAL_SIZE) {
int index = invAtomOrder[innerAtomOrder[i]]; int index = invAtomOrder[innerAtomOrder[i]];
forces[index] = innerForces[i]; forces[index] = innerForces[i];
forces[index+paddedNumAtoms] = innerForces[i+paddedNumAtoms]; forces[index+paddedNumAtoms] = innerForces[i+paddedNumAtoms];
...@@ -30,9 +32,9 @@ extern "C" __global__ void copyForces(long long* forces, int* __restrict__ invAt ...@@ -30,9 +32,9 @@ extern "C" __global__ void copyForces(long long* forces, int* __restrict__ invAt
/** /**
* Add all the forces from the CVs. * Add all the forces from the CVs.
*/ */
extern "C" __global__ void addForces(long long* forces, int bufferSize KERNEL void addForces(GLOBAL mm_long* RESTRICT forces, int bufferSize
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < bufferSize; i += blockDim.x*gridDim.x) { for (int i = GLOBAL_ID; i < bufferSize; i += GLOBAL_SIZE) {
ADD_FORCES ADD_FORCES
} }
} }
__device__ real2 multofReal2(real2 a, real2 b) { DEVICE real2 multofReal2(real2 a, real2 b) {
return make_real2(a.x*b.x - a.y*b.y, a.x*b.y + a.y*b.x); return make_real2(a.x*b.x - a.y*b.y, a.x*b.y + a.y*b.x);
} }
...@@ -6,17 +6,17 @@ __device__ real2 multofReal2(real2 a, real2 b) { ...@@ -6,17 +6,17 @@ __device__ real2 multofReal2(real2 a, real2 b) {
* Precompute the cosine and sine sums which appear in each force term. * Precompute the cosine and sine sums which appear in each force term.
*/ */
extern "C" __global__ void calculateEwaldCosSinSums(mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, real2* __restrict__ cosSinSum, real4 periodicBoxSize) { KERNEL void calculateEwaldCosSinSums(GLOBAL mixed* RESTRICT energyBuffer, GLOBAL const real4* RESTRICT posq, GLOBAL real2* RESTRICT cosSinSum, real4 periodicBoxSize) {
const unsigned int ksizex = 2*KMAX_X-1; const unsigned int ksizex = 2*KMAX_X-1;
const unsigned int ksizey = 2*KMAX_Y-1; const unsigned int ksizey = 2*KMAX_Y-1;
const unsigned int ksizez = 2*KMAX_Z-1; const unsigned int ksizez = 2*KMAX_Z-1;
const unsigned int totalK = ksizex*ksizey*ksizez; const unsigned int totalK = ksizex*ksizey*ksizez;
real3 reciprocalBoxSize = make_real3(2*M_PI/periodicBoxSize.x, 2*M_PI/periodicBoxSize.y, 2*M_PI/periodicBoxSize.z); real3 reciprocalBoxSize = make_real3(2*M_PI/periodicBoxSize.x, 2*M_PI/periodicBoxSize.y, 2*M_PI/periodicBoxSize.z);
real reciprocalCoefficient = ONE_4PI_EPS0*4*M_PI/(periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z); real reciprocalCoefficient = ONE_4PI_EPS0*4*M_PI/(periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; unsigned int index = GLOBAL_ID;
mixed energy = 0; mixed energy = 0;
while (index < (KMAX_Y-1)*ksizez+KMAX_Z) while (index < (KMAX_Y-1)*ksizez+KMAX_Z)
index += blockDim.x*gridDim.x; index += GLOBAL_SIZE;
while (index < totalK) { while (index < totalK) {
// Find the wave vector (kx, ky, kz) this index corresponds to. // Find the wave vector (kx, ky, kz) this index corresponds to.
...@@ -49,9 +49,9 @@ extern "C" __global__ void calculateEwaldCosSinSums(mixed* __restrict__ energyBu ...@@ -49,9 +49,9 @@ extern "C" __global__ void calculateEwaldCosSinSums(mixed* __restrict__ energyBu
real k2 = kx*kx + ky*ky + kz*kz; real k2 = kx*kx + ky*ky + kz*kz;
real ak = EXP(k2*EXP_COEFFICIENT) / k2; real ak = EXP(k2*EXP_COEFFICIENT) / k2;
energy += reciprocalCoefficient*ak*(sum.x*sum.x + sum.y*sum.y); energy += reciprocalCoefficient*ak*(sum.x*sum.x + sum.y*sum.y);
index += blockDim.x*gridDim.x; index += GLOBAL_SIZE;
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[GLOBAL_ID] += energy;
} }
/** /**
...@@ -59,8 +59,8 @@ extern "C" __global__ void calculateEwaldCosSinSums(mixed* __restrict__ energyBu ...@@ -59,8 +59,8 @@ extern "C" __global__ void calculateEwaldCosSinSums(mixed* __restrict__ energyBu
* previous routine. * previous routine.
*/ */
extern "C" __global__ void calculateEwaldForces(unsigned long long* __restrict__ forceBuffers, const real4* __restrict__ posq, const real2* __restrict__ cosSinSum, real4 periodicBoxSize) { KERNEL void calculateEwaldForces(GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL const real4* RESTRICT posq, GLOBAL const real2* RESTRICT cosSinSum, real4 periodicBoxSize) {
unsigned int atom = blockIdx.x*blockDim.x+threadIdx.x; unsigned int atom = GLOBAL_ID;
real3 reciprocalBoxSize = make_real3(2*M_PI/periodicBoxSize.x, 2*M_PI/periodicBoxSize.y, 2*M_PI/periodicBoxSize.z); real3 reciprocalBoxSize = make_real3(2*M_PI/periodicBoxSize.x, 2*M_PI/periodicBoxSize.y, 2*M_PI/periodicBoxSize.z);
real reciprocalCoefficient = ONE_4PI_EPS0*4*M_PI/(periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z); real reciprocalCoefficient = ONE_4PI_EPS0*4*M_PI/(periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
while (atom < NUM_ATOMS) { while (atom < NUM_ATOMS) {
...@@ -102,9 +102,9 @@ extern "C" __global__ void calculateEwaldForces(unsigned long long* __restrict__ ...@@ -102,9 +102,9 @@ extern "C" __global__ void calculateEwaldForces(unsigned long long* __restrict__
// Record the force on the atom. // Record the force on the atom.
atomicAdd(&forceBuffers[atom], static_cast<unsigned long long>((long long) (force.x*0x100000000))); ATOMIC_ADD(&forceBuffers[atom], (mm_ulong) ((mm_long) (force.x*0x100000000)));
atomicAdd(&forceBuffers[atom+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000))); ATOMIC_ADD(&forceBuffers[atom+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.y*0x100000000)));
atomicAdd(&forceBuffers[atom+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000))); ATOMIC_ADD(&forceBuffers[atom+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (force.z*0x100000000)));
atom += blockDim.x*gridDim.x; atom += GLOBAL_SIZE;
} }
} }
/** /**
* Scale the particle positions with each axis independent * Scale the particle positions with each axis independent.
*/ */
extern "C" __global__ void scalePositions(float scaleX, float scaleY, float scaleZ, int numMolecules, real4 periodicBoxSize, KERNEL void scalePositions(float scaleX, float scaleY, float scaleZ, int numMolecules, real4 periodicBoxSize,
real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4* __restrict__ posq, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, GLOBAL real4* RESTRICT posq,
const int* __restrict__ moleculeAtoms, const int* __restrict__ moleculeStartIndex) { GLOBAL const int* RESTRICT moleculeAtoms, GLOBAL const int* RESTRICT moleculeStartIndex) {
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numMolecules; index += blockDim.x*gridDim.x) { for (int index = GLOBAL_ID; index < numMolecules; index += GLOBAL_SIZE) {
int first = moleculeStartIndex[index]; int first = moleculeStartIndex[index];
int last = moleculeStartIndex[index+1]; int last = moleculeStartIndex[index+1];
int numAtoms = last-first; int numAtoms = last-first;
...@@ -19,7 +19,7 @@ extern "C" __global__ void scalePositions(float scaleX, float scaleY, float scal ...@@ -19,7 +19,7 @@ extern "C" __global__ void scalePositions(float scaleX, float scaleY, float scal
center.y += pos.y; center.y += pos.y;
center.z += pos.z; center.z += pos.z;
} }
real invNumAtoms = RECIP(numAtoms); real invNumAtoms = RECIP((real) numAtoms);
center.x *= invNumAtoms; center.x *= invNumAtoms;
center.y *= invNumAtoms; center.y *= invNumAtoms;
center.z *= invNumAtoms; center.z *= invNumAtoms;
......
/** /**
* Compute the nonbonded parameters for particles and exceptions. * Compute the nonbonded parameters for particles and exceptions.
*/ */
extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, bool includeSelfEnergy, real* __restrict__ globalParams, KERNEL void computeParameters(GLOBAL mixed* RESTRICT energyBuffer, int includeSelfEnergy, GLOBAL real* RESTRICT globalParams,
int numAtoms, const float4* __restrict__ baseParticleParams, real4* __restrict__ posq, real* __restrict__ charge, int numAtoms, GLOBAL const float4* RESTRICT baseParticleParams, GLOBAL real4* RESTRICT posq, GLOBAL real* RESTRICT charge,
float2* __restrict__ sigmaEpsilon, float4* __restrict__ particleParamOffsets, int* __restrict__ particleOffsetIndices GLOBAL float2* RESTRICT sigmaEpsilon, GLOBAL float4* RESTRICT particleParamOffsets, GLOBAL int* RESTRICT particleOffsetIndices
#ifdef HAS_EXCEPTIONS #ifdef HAS_EXCEPTIONS
, int numExceptions, const float4* __restrict__ baseExceptionParams, float4* __restrict__ exceptionParams, , int numExceptions, GLOBAL const float4* RESTRICT baseExceptionParams, GLOBAL float4* RESTRICT exceptionParams,
float4* __restrict__ exceptionParamOffsets, int* __restrict__ exceptionOffsetIndices GLOBAL float4* RESTRICT exceptionParamOffsets, GLOBAL int* RESTRICT exceptionOffsetIndices
#endif #endif
) { ) {
mixed energy = 0; mixed energy = 0;
// Compute particle parameters. // Compute particle parameters.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x) { for (int i = GLOBAL_ID; i < numAtoms; i += GLOBAL_SIZE) {
float4 params = baseParticleParams[i]; float4 params = baseParticleParams[i];
#ifdef HAS_OFFSETS #ifdef HAS_OFFSETS
int start = particleOffsetIndices[i], end = particleOffsetIndices[i+1]; int start = particleOffsetIndices[i], end = particleOffsetIndices[i+1];
...@@ -45,7 +45,7 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b ...@@ -45,7 +45,7 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b
// Compute exception parameters. // Compute exception parameters.
#ifdef HAS_EXCEPTIONS #ifdef HAS_EXCEPTIONS
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numExceptions; i += blockDim.x*gridDim.x) { for (int i = GLOBAL_ID; i < numExceptions; i += GLOBAL_SIZE) {
float4 params = baseExceptionParams[i]; float4 params = baseExceptionParams[i];
#ifdef HAS_OFFSETS #ifdef HAS_OFFSETS
int start = exceptionOffsetIndices[i], end = exceptionOffsetIndices[i+1]; int start = exceptionOffsetIndices[i], end = exceptionOffsetIndices[i+1];
...@@ -61,15 +61,15 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b ...@@ -61,15 +61,15 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b
} }
#endif #endif
if (includeSelfEnergy) if (includeSelfEnergy)
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[GLOBAL_ID] += energy;
} }
/** /**
* Compute parameters for subtracting the reciprocal part of excluded interactions. * Compute parameters for subtracting the reciprocal part of excluded interactions.
*/ */
extern "C" __global__ void computeExclusionParameters(real4* __restrict__ posq, real* __restrict__ charge, float2* __restrict__ sigmaEpsilon, KERNEL void computeExclusionParameters(GLOBAL real4* RESTRICT posq, GLOBAL real* RESTRICT charge, GLOBAL float2* RESTRICT sigmaEpsilon,
int numExclusions, const int2* __restrict__ exclusionAtoms, float4* __restrict__ exclusionParams) { int numExclusions, GLOBAL const int2* RESTRICT exclusionAtoms, GLOBAL float4* RESTRICT exclusionParams) {
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numExclusions; i += blockDim.x*gridDim.x) { for (int i = GLOBAL_ID; i < numExclusions; i += GLOBAL_SIZE) {
int2 atoms = exclusionAtoms[i]; int2 atoms = exclusionAtoms[i];
#ifdef USE_POSQ_CHARGES #ifdef USE_POSQ_CHARGES
real chargeProd = posq[atoms.x].w*posq[atoms.y].w; real chargeProd = posq[atoms.x].w*posq[atoms.y].w;
......
...@@ -36,3 +36,4 @@ if (r > 0) ...@@ -36,3 +36,4 @@ if (r > 0)
delta *= tempForce*invR*invR; delta *= tempForce*invR*invR;
real3 force1 = -delta; real3 force1 = -delta;
real3 force2 = delta; real3 force2 = delta;
...@@ -196,6 +196,12 @@ public: ...@@ -196,6 +196,12 @@ public:
CudaArray& getForce() { CudaArray& getForce() {
return force; return force;
} }
/**
* The CUDA platform does not use floating point force buffers, so this throws an exception.
*/
ArrayInterface& getFloatForceBuffer() {
throw OpenMMException("CUDA platform does not use floating point force buffers");
}
/** /**
* Get the array which contains a contribution to each force represented as 64 bit fixed point. * Get the array which contains a contribution to each force represented as 64 bit fixed point.
* This is a synonym for getForce(). It exists to satisfy the ComputeContext interface. * This is a synonym for getForce(). It exists to satisfy the ComputeContext interface.
...@@ -205,7 +211,6 @@ public: ...@@ -205,7 +211,6 @@ public:
} }
/** /**
* All CUDA devices support 64 bit atomics, so this throws an exception. * All CUDA devices support 64 bit atomics, so this throws an exception.
* @return
*/ */
ArrayInterface& getForceBuffers() { ArrayInterface& getForceBuffers() {
throw OpenMMException("CUDA platform does not use floating point force buffers"); throw OpenMMException("CUDA platform does not use floating point force buffers");
......
...@@ -31,14 +31,10 @@ ...@@ -31,14 +31,10 @@
#include "CudaArray.h" #include "CudaArray.h"
#include "CudaContext.h" #include "CudaContext.h"
#include "CudaFFT3D.h" #include "CudaFFT3D.h"
#include "CudaParameterSet.h"
#include "CudaSort.h" #include "CudaSort.h"
#include "openmm/kernels.h" #include "openmm/kernels.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "openmm/internal/CompiledExpressionSet.h" #include "openmm/common/CommonKernels.h"
#include "openmm/internal/CustomIntegratorUtilities.h"
#include "lepton/CompiledExpression.h"
#include "lepton/ExpressionProgram.h"
#include <cufft.h> #include <cufft.h>
namespace OpenMM { namespace OpenMM {
...@@ -206,63 +202,6 @@ private: ...@@ -206,63 +202,6 @@ private:
CudaContext& cu; CudaContext& cu;
}; };
/**
* This kernel modifies the positions of particles to enforce distance constraints.
*/
class CudaApplyConstraintsKernel : public ApplyConstraintsKernel {
public:
CudaApplyConstraintsKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyConstraintsKernel(name, platform),
cu(cu), hasInitializedKernel(false) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* Update particle positions to enforce constraints.
*
* @param context the context in which to execute this kernel
* @param tol the distance tolerance within which constraints must be satisfied.
*/
void apply(ContextImpl& context, double tol);
/**
* Update particle velocities to enforce constraints.
*
* @param context the context in which to execute this kernel
* @param tol the velocity tolerance within which constraints must be satisfied.
*/
void applyToVelocities(ContextImpl& context, double tol);
private:
CudaContext& cu;
bool hasInitializedKernel;
CUfunction applyDeltasKernel;
};
/**
* This kernel recomputes the positions of virtual sites.
*/
class CudaVirtualSitesKernel : public VirtualSitesKernel {
public:
CudaVirtualSitesKernel(std::string name, const Platform& platform, CudaContext& cu) : VirtualSitesKernel(name, platform), cu(cu) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* Compute the virtual site locations.
*
* @param context the context in which to execute this kernel
*/
void computePositions(ContextImpl& context);
private:
CudaContext& cu;
};
/** /**
* This kernel is invoked by NonbondedForce to calculate the forces acting on the system. * This kernel is invoked by NonbondedForce to calculate the forces acting on the system.
*/ */
...@@ -399,103 +338,13 @@ private: ...@@ -399,103 +338,13 @@ private:
/** /**
* This kernel is invoked by CustomCVForce to calculate the forces acting on the system and the energy of the system. * This kernel is invoked by CustomCVForce to calculate the forces acting on the system and the energy of the system.
*/ */
class CudaCalcCustomCVForceKernel : public CalcCustomCVForceKernel { class CudaCalcCustomCVForceKernel : public CommonCalcCustomCVForceKernel {
public: public:
CudaCalcCustomCVForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcCustomCVForceKernel(name, platform), CudaCalcCustomCVForceKernel(std::string name, const Platform& platform, ComputeContext& cc) : CommonCalcCustomCVForceKernel(name, platform, cc) {
cu(cu), hasInitializedListeners(false) {
} }
/** ComputeContext& getInnerComputeContext(ContextImpl& innerContext) {
* Initialize the kernel. return *reinterpret_cast<CudaPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
*
* @param system the System this kernel will be applied to
* @param force the CustomCVForce this kernel will be used for
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy);
/**
* Copy state information to the inner context.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void copyState(ContextImpl& context, ContextImpl& innerContext);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomCVForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomCVForce& force);
private:
class ForceInfo;
class ReorderListener;
CudaContext& cu;
bool hasInitializedListeners;
Lepton::ExpressionProgram energyExpression;
std::vector<std::string> variableNames, paramDerivNames, globalParameterNames;
std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
std::vector<CudaArray> cvForces;
CudaArray invAtomOrder;
CudaArray innerInvAtomOrder;
CUfunction copyStateKernel, copyForcesKernel, addForcesKernel;
};
/**
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
*/
class CudaApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel {
public:
CudaApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyMonteCarloBarostatKernel(name, platform), cu(cu),
hasInitializedKernels(false) {
} }
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param barostat the MonteCarloBarostat this kernel will be used for
*/
void initialize(const System& system, const Force& barostat);
/**
* Attempt a Monte Carlo step, scaling particle positions (or cluster centers) by a specified value.
* This version scales the x, y, and z positions independently.
* This is called BEFORE the periodic box size is modified. It should begin by translating each particle
* or cluster into the first periodic box, so that coordinates will still be correct after the box size
* is changed.
*
* @param context the context in which to execute this kernel
* @param scaleX the scale factor by which to multiply particle x-coordinate
* @param scaleY the scale factor by which to multiply particle y-coordinate
* @param scaleZ the scale factor by which to multiply particle z-coordinate
*/
void scaleCoordinates(ContextImpl& context, double scaleX, double scaleY, double scaleZ);
/**
* Reject the most recent Monte Carlo step, restoring the particle positions to where they were before
* scaleCoordinates() was last called.
*
* @param context the context in which to execute this kernel
*/
void restoreCoordinates(ContextImpl& context);
private:
CudaContext& cu;
bool hasInitializedKernels;
int numMolecules;
CudaArray savedPositions;
CudaArray savedForces;
CudaArray moleculeAtoms;
CudaArray moleculeStartIndex;
CUfunction kernel;
std::vector<int> lastAtomOrder;
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -74,9 +74,9 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform ...@@ -74,9 +74,9 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
if (name == UpdateStateDataKernel::Name()) if (name == UpdateStateDataKernel::Name())
return new CudaUpdateStateDataKernel(name, platform, cu); return new CudaUpdateStateDataKernel(name, platform, cu);
if (name == ApplyConstraintsKernel::Name()) if (name == ApplyConstraintsKernel::Name())
return new CudaApplyConstraintsKernel(name, platform, cu); return new CommonApplyConstraintsKernel(name, platform, cu);
if (name == VirtualSitesKernel::Name()) if (name == VirtualSitesKernel::Name())
return new CudaVirtualSitesKernel(name, platform, cu); return new CommonVirtualSitesKernel(name, platform, cu);
if (name == CalcHarmonicBondForceKernel::Name()) if (name == CalcHarmonicBondForceKernel::Name())
return new CommonCalcHarmonicBondForceKernel(name, platform, cu, context.getSystem()); return new CommonCalcHarmonicBondForceKernel(name, platform, cu, context.getSystem());
if (name == CalcCustomBondForceKernel::Name()) if (name == CalcCustomBondForceKernel::Name())
...@@ -136,7 +136,7 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform ...@@ -136,7 +136,7 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
if (name == IntegrateNoseHooverStepKernel::Name()) if (name == IntegrateNoseHooverStepKernel::Name())
return new CommonIntegrateNoseHooverStepKernel(name, platform, cu); return new CommonIntegrateNoseHooverStepKernel(name, platform, cu);
if (name == ApplyMonteCarloBarostatKernel::Name()) if (name == ApplyMonteCarloBarostatKernel::Name())
return new CudaApplyMonteCarloBarostatKernel(name, platform, cu); return new CommonApplyMonteCarloBarostatKernel(name, platform, cu);
if (name == RemoveCMMotionKernel::Name()) if (name == RemoveCMMotionKernel::Name())
return new CommonRemoveCMMotionKernel(name, platform, cu); return new CommonRemoveCMMotionKernel(name, platform, cu);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str()); throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
......
...@@ -27,23 +27,14 @@ ...@@ -27,23 +27,14 @@
#include "CudaKernels.h" #include "CudaKernels.h"
#include "CudaForceInfo.h" #include "CudaForceInfo.h"
#include "openmm/Context.h" #include "openmm/Context.h"
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/ContextImpl.h" #include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomCompoundBondForceImpl.h"
#include "openmm/internal/CustomHbondForceImpl.h"
#include "openmm/internal/NonbondedForceImpl.h" #include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/internal/OSRngSeed.h" #include "CommonKernelSources.h"
#include "CudaBondedUtilities.h" #include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h" #include "CudaExpressionUtilities.h"
#include "CudaIntegrationUtilities.h" #include "CudaIntegrationUtilities.h"
#include "CudaNonbondedUtilities.h" #include "CudaNonbondedUtilities.h"
#include "CudaKernelSources.h" #include "CudaKernelSources.h"
#include "lepton/CustomFunction.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include "ReferenceTabulatedFunction.h"
#include "SimTKOpenMMRealType.h" #include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include <algorithm> #include <algorithm>
...@@ -54,7 +45,6 @@ ...@@ -54,7 +45,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
using namespace Lepton;
#define CHECK_RESULT(result, prefix) \ #define CHECK_RESULT(result, prefix) \
if (result != CUDA_SUCCESS) { \ if (result != CUDA_SUCCESS) { \
...@@ -63,40 +53,6 @@ using namespace Lepton; ...@@ -63,40 +53,6 @@ using namespace Lepton;
throw OpenMMException(m.str());\ throw OpenMMException(m.str());\
} }
static bool isZeroExpression(const Lepton::ParsedExpression& expression) {
const Lepton::Operation& op = expression.getRootNode().getOperation();
if (op.getId() != Lepton::Operation::CONSTANT)
return false;
return (dynamic_cast<const Lepton::Operation::Constant&>(op).getValue() == 0.0);
}
static bool usesVariable(const Lepton::ExpressionTreeNode& node, const string& variable) {
const Lepton::Operation& op = node.getOperation();
if (op.getId() == Lepton::Operation::VARIABLE && op.getName() == variable)
return true;
for (auto& child : node.getChildren())
if (usesVariable(child, variable))
return true;
return false;
}
static bool usesVariable(const Lepton::ParsedExpression& expression, const string& variable) {
return usesVariable(expression.getRootNode(), variable);
}
static pair<ExpressionTreeNode, string> makeVariable(const string& name, const string& value) {
return make_pair(ExpressionTreeNode(new Operation::Variable(name)), value);
}
static void replaceFunctionsInExpression(map<string, CustomFunction*>& functions, ExpressionProgram& expression) {
for (int i = 0; i < expression.getNumOperations(); i++) {
if (expression.getOperation(i).getId() == Operation::CUSTOM) {
const Operation::Custom& op = dynamic_cast<const Operation::Custom&>(expression.getOperation(i));
expression.setOperation(i, new Operation::Custom(op.getName(), functions[op.getName()]->clone(), op.getDerivOrder()));
}
}
}
void CudaCalcForcesAndEnergyKernel::initialize(const System& system) { void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
} }
...@@ -453,38 +409,6 @@ void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& st ...@@ -453,38 +409,6 @@ void CudaUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& st
listener->execute(); listener->execute();
} }
void CudaApplyConstraintsKernel::initialize(const System& system) {
}
void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
cu.setAsCurrent();
if (!hasInitializedKernel) {
hasInitializedKernel = true;
map<string, string> defines;
CUmodule module = cu.createModule(CudaKernelSources::constraints, defines);
applyDeltasKernel = cu.getKernel(module, "applyPositionDeltas");
}
CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
cu.clearBuffer(integration.getPosDelta());
integration.applyConstraints(tol);
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
int numAtoms = cu.getNumAtoms();
void* args[] = {&numAtoms, &cu.getPosq().getDevicePointer(), &posCorrection, &cu.getIntegrationUtilities().getPosDelta().getDevicePointer()};
cu.executeKernel(applyDeltasKernel, args, cu.getNumAtoms());
integration.computeVirtualSites();
}
void CudaApplyConstraintsKernel::applyToVelocities(ContextImpl& context, double tol) {
cu.getIntegrationUtilities().applyVelocityConstraints(tol);
}
void CudaVirtualSitesKernel::initialize(const System& system) {
}
void CudaVirtualSitesKernel::computePositions(ContextImpl& context) {
cu.getIntegrationUtilities().computeVirtualSites();
}
class CudaCalcNonbondedForceKernel::ForceInfo : public CudaForceInfo { class CudaCalcNonbondedForceKernel::ForceInfo : public CudaForceInfo {
public: public:
ForceInfo(const NonbondedForce& force) : force(force) { ForceInfo(const NonbondedForce& force) : force(force) {
...@@ -760,7 +684,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -760,7 +684,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
replacements["EXP_COEFFICIENT"] = cu.doubleToString(-1.0/(4.0*alpha*alpha)); replacements["EXP_COEFFICIENT"] = cu.doubleToString(-1.0/(4.0*alpha*alpha));
replacements["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0); replacements["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0);
replacements["M_PI"] = cu.doubleToString(M_PI); replacements["M_PI"] = cu.doubleToString(M_PI);
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::ewald, replacements); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CommonKernelSources::ewald, replacements);
ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums"); ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces"); ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
...@@ -1055,13 +979,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1055,13 +979,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
replacements["USE_PERIODIC"] = force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0"; replacements["USE_PERIODIC"] = force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0";
if (doLJPME) if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha); replacements["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::pmeExclusions, replacements), force.getForceGroup()); cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::pmeExclusions, replacements), force.getForceGroup());
} }
} }
// Add the interaction to the default nonbonded kernel. // Add the interaction to the default nonbonded kernel.
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines); string source = cu.replaceStrings(CommonKernelSources::coulombLennardJones, defines);
charges.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "charges"); charges.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "charges");
baseParticleParams.initialize<float4>(cu, cu.getPaddedNumAtoms(), "baseParticleParams"); baseParticleParams.initialize<float4>(cu, cu.getPaddedNumAtoms(), "baseParticleParams");
baseParticleParams.upload(baseParticleParamVec); baseParticleParams.upload(baseParticleParamVec);
...@@ -1109,7 +1033,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1109,7 +1033,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
map<string, string> replacements; map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (usePeriodic && force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0"); replacements["APPLY_PERIODIC"] = (usePeriodic && force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4"); replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup()); cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
} }
// Initialize parameter offsets. // Initialize parameter offsets.
...@@ -1181,7 +1105,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1181,7 +1105,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize the kernel for updating parameters. // Initialize the kernel for updating parameters.
CUmodule module = cu.createModule(CudaKernelSources::nonbondedParameters, paramsDefines); CUmodule module = cu.createModule(CommonKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = cu.getKernel(module, "computeParameters"); computeParamsKernel = cu.getKernel(module, "computeParameters");
computeExclusionParamsKernel = cu.getKernel(module, "computeExclusionParameters"); computeExclusionParamsKernel = cu.getKernel(module, "computeExclusionParameters");
info = new ForceInfo(force); info = new ForceInfo(force);
...@@ -1522,288 +1446,3 @@ void CudaCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, in ...@@ -1522,288 +1446,3 @@ void CudaCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, in
nz = dispersionGridSizeZ; nz = dispersionGridSizeZ;
} }
} }
class CudaCalcCustomCVForceKernel::ForceInfo : public CudaForceInfo {
public:
ForceInfo(ComputeForceInfo& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
return force.areParticlesIdentical(particle1, particle2);
}
int getNumParticleGroups() {
return force.getNumParticleGroups();
}
void getParticlesInGroup(int index, std::vector<int>& particles) {
force.getParticlesInGroup(index, particles);
}
bool areGroupsIdentical(int group1, int group2) {
return force.areGroupsIdentical(group1, group2);
}
private:
ComputeForceInfo& force;
};
class CudaCalcCustomCVForceKernel::ReorderListener : public CudaContext::ReorderListener {
public:
ReorderListener(CudaContext& cu, CudaArray& invAtomOrder) : cu(cu), invAtomOrder(invAtomOrder) {
}
void execute() {
vector<int> invOrder(cu.getPaddedNumAtoms());
const vector<int>& order = cu.getAtomIndex();
for (int i = 0; i < order.size(); i++)
invOrder[order[i]] = i;
invAtomOrder.upload(invOrder);
}
private:
CudaContext& cu;
CudaArray& invAtomOrder;
};
void CudaCalcCustomCVForceKernel::initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext) {
int numCVs = force.getNumCollectiveVariables();
for (int i = 0; i < force.getNumGlobalParameters(); i++)
globalParameterNames.push_back(force.getGlobalParameterName(i));
for (int i = 0; i < numCVs; i++)
variableNames.push_back(force.getCollectiveVariableName(i));
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string name = force.getEnergyParameterDerivativeName(i);
paramDerivNames.push_back(name);
cu.addEnergyParameterDerivative(name);
}
// Create custom functions for the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
for (int i = 0; i < (int) force.getNumTabulatedFunctions(); i++)
functions[force.getTabulatedFunctionName(i)] = createReferenceTabulatedFunction(force.getTabulatedFunction(i));
// Create the expressions.
Lepton::ParsedExpression energyExpr = Lepton::Parser::parse(force.getEnergyFunction(), functions);
energyExpression = energyExpr.createProgram();
variableDerivExpressions.clear();
for (auto& name : variableNames)
variableDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
paramDerivExpressions.clear();
for (auto& name : paramDerivNames)
paramDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
// Delete the custom functions.
for (auto& function : functions)
delete function.second;
// Copy parameter derivatives from the inner context.
CudaContext& cu2 = *reinterpret_cast<CudaPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
for (auto& param : cu2.getEnergyParamDerivNames())
cu.addEnergyParameterDerivative(param);
// Create arrays for storing information.
cvForces.resize(numCVs);
for (int i = 0; i < numCVs; i++)
cvForces[i].initialize<long long>(cu, 3*cu.getPaddedNumAtoms(), "cvForce");
invAtomOrder.initialize<int>(cu, cu.getPaddedNumAtoms(), "invAtomOrder");
innerInvAtomOrder.initialize<int>(cu, cu.getPaddedNumAtoms(), "innerInvAtomOrder");
// Create the kernels.
stringstream args, add;
for (int i = 0; i < numCVs; i++) {
args << ", long long* __restrict__ force" << i << ", real dEdV" << i;
add << "forces[i] += (long long) (force" << i << "[i]*dEdV" << i << ");\n";
}
map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = args.str();
replacements["ADD_FORCES"] = add.str();
CUmodule module = cu.createModule(cu.replaceStrings(CudaKernelSources::vectorOps+CudaKernelSources::customCVForce, replacements));
copyStateKernel = cu.getKernel(module, "copyState");
copyForcesKernel = cu.getKernel(module, "copyForces");
addForcesKernel = cu.getKernel(module, "addForces");
// This context needs to respect all forces in the inner context when reordering atoms.
for (auto* info : cu2.getForceInfos())
cu.addForce(new ForceInfo(*info));
}
double CudaCalcCustomCVForceKernel::execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy) {
copyState(context, innerContext);
int numCVs = variableNames.size();
int numAtoms = cu.getNumAtoms();
int paddedNumAtoms = cu.getPaddedNumAtoms();
CudaContext& cu2 = *reinterpret_cast<CudaPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
vector<double> cvValues;
vector<map<string, double> > cvDerivs(numCVs);
void* copyForcesArgs[] = {NULL, &invAtomOrder.getDevicePointer(), &cu2.getForce().getDevicePointer(), &cu2.getAtomIndexArray().getDevicePointer(), &numAtoms, &paddedNumAtoms};
for (int i = 0; i < numCVs; i++) {
cvValues.push_back(innerContext.calcForcesAndEnergy(true, true, 1<<i));
copyForcesArgs[0] = &cvForces[i].getDevicePointer();
cu.executeKernel(copyForcesKernel, copyForcesArgs, numAtoms);
innerContext.getEnergyParameterDerivatives(cvDerivs[i]);
}
// Compute the energy and forces.
map<string, double> variables;
for (auto& name : globalParameterNames)
variables[name] = context.getParameter(name);
for (int i = 0; i < numCVs; i++)
variables[variableNames[i]] = cvValues[i];
double energy = energyExpression.evaluate(variables);
int bufferSize = cu.getForce().getSize();
vector<void*> addForcesArgs;
addForcesArgs.push_back(&cu.getForce().getDevicePointer());
addForcesArgs.push_back(&bufferSize);
vector<double> dEdV(numCVs);
vector<float> dEdVFloat(numCVs);
for (int i = 0; i < numCVs; i++) {
dEdV[i] = variableDerivExpressions[i].evaluate(variables);
dEdVFloat[i] = (float) dEdV[i];
addForcesArgs.push_back(&cvForces[i].getDevicePointer());
if (cu.getUseDoublePrecision())
addForcesArgs.push_back(&dEdV[i]);
else
addForcesArgs.push_back(&dEdVFloat[i]);
}
cu.executeKernel(addForcesKernel, &addForcesArgs[0], numAtoms);
// Compute the energy parameter derivatives.
map<string, double>& energyParamDerivs = cu.getEnergyParamDerivWorkspace();
for (int i = 0; i < paramDerivExpressions.size(); i++)
energyParamDerivs[paramDerivNames[i]] += paramDerivExpressions[i].evaluate(variables);
for (int i = 0; i < numCVs; i++) {
double dEdV = variableDerivExpressions[i].evaluate(variables);
for (auto& deriv : cvDerivs[i])
energyParamDerivs[deriv.first] += dEdV*deriv.second;
}
return energy;
}
void CudaCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& innerContext) {
int numAtoms = cu.getNumAtoms();
CudaContext& cu2 = *reinterpret_cast<CudaPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
if (!hasInitializedListeners) {
hasInitializedListeners = true;
// Initialize the listeners.
ReorderListener* listener1 = new ReorderListener(cu, invAtomOrder);
ReorderListener* listener2 = new ReorderListener(cu2, innerInvAtomOrder);
cu.addReorderListener(listener1);
cu2.addReorderListener(listener2);
listener1->execute();
listener2->execute();
}
CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
CUdeviceptr posCorrection2 = (cu2.getUseMixedPrecision() ? cu2.getPosqCorrection().getDevicePointer() : 0);
void* copyStateArgs[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &cu.getVelm().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(),
&cu2.getPosq().getDevicePointer(), &posCorrection2,& cu2.getVelm().getDevicePointer(), &innerInvAtomOrder.getDevicePointer(), &numAtoms};
cu.executeKernel(copyStateKernel, copyStateArgs, numAtoms);
Vec3 a, b, c;
context.getPeriodicBoxVectors(a, b, c);
innerContext.setPeriodicBoxVectors(a, b, c);
innerContext.setTime(context.getTime());
map<string, double> innerParameters = innerContext.getParameters();
for (auto& param : innerParameters)
innerContext.setParameter(param.first, context.getParameter(param.first));
}
void CudaCalcCustomCVForceKernel::copyParametersToContext(ContextImpl& context, const CustomCVForce& force) {
// Create custom functions for the tabulated functions.
map<string, CustomFunction*> functions;
for (int i = 0; i < (int) force.getNumTabulatedFunctions(); i++)
functions[force.getTabulatedFunctionName(i)] = createReferenceTabulatedFunction(force.getTabulatedFunction(i));
// Replace tabulated functions in the expressions.
replaceFunctionsInExpression(functions, energyExpression);
for (auto& expression : variableDerivExpressions)
replaceFunctionsInExpression(functions, expression);
for (auto& expression : paramDerivExpressions)
replaceFunctionsInExpression(functions, expression);
// Delete the custom functions.
for (auto& function : functions)
delete function.second;
}
void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
cu.setAsCurrent();
savedPositions.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4), "savedPositions");
savedForces.initialize<long long>(cu, cu.getPaddedNumAtoms()*3, "savedForces");
CUmodule module = cu.createModule(CudaKernelSources::monteCarloBarostat);
kernel = cu.getKernel(module, "scalePositions");
}
void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, double scaleX, double scaleY, double scaleZ) {
cu.setAsCurrent();
if (!hasInitializedKernels) {
hasInitializedKernels = true;
// Create the arrays with the molecule definitions.
vector<vector<int> > molecules = context.getMolecules();
numMolecules = molecules.size();
moleculeAtoms.initialize<int>(cu, cu.getNumAtoms(), "moleculeAtoms");
moleculeStartIndex.initialize<int>(cu, numMolecules+1, "moleculeStartIndex");
vector<int> atoms(moleculeAtoms.getSize());
vector<int> startIndex(moleculeStartIndex.getSize());
int index = 0;
for (int i = 0; i < numMolecules; i++) {
startIndex[i] = index;
for (int molecule : molecules[i])
atoms[index++] = molecule;
}
startIndex[numMolecules] = index;
moleculeAtoms.upload(atoms);
moleculeStartIndex.upload(startIndex);
// Initialize the kernel arguments.
}
int bytesToCopy = cu.getPosq().getSize()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
CUresult result = cuMemcpyDtoD(savedPositions.getDevicePointer(), cu.getPosq().getDevicePointer(), bytesToCopy);
if (result != CUDA_SUCCESS) {
std::stringstream m;
m<<"Error saving positions for MC barostat: "<<cu.getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(m.str());
}
result = cuMemcpyDtoD(savedForces.getDevicePointer(), cu.getForce().getDevicePointer(), savedForces.getSize()*savedForces.getElementSize());
if (result != CUDA_SUCCESS) {
std::stringstream m;
m<<"Error saving forces for MC barostat: "<<cu.getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(m.str());
}
float scalefX = (float) scaleX;
float scalefY = (float) scaleY;
float scalefZ = (float) scaleZ;
void* args[] = {&scalefX, &scalefY, &scalefZ, &numMolecules, cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer(),
cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
&cu.getPosq().getDevicePointer(), &moleculeAtoms.getDevicePointer(), &moleculeStartIndex.getDevicePointer()};
cu.executeKernel(kernel, args, cu.getNumAtoms());
for (auto& offset : cu.getPosCellOffsets())
offset = mm_int4(0, 0, 0, 0);
lastAtomOrder = cu.getAtomIndex();
}
void CudaApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
cu.setAsCurrent();
int bytesToCopy = cu.getPosq().getSize()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4));
CUresult result = cuMemcpyDtoD(cu.getPosq().getDevicePointer(), savedPositions.getDevicePointer(), bytesToCopy);
if (result != CUDA_SUCCESS) {
std::stringstream m;
m<<"Error restoring positions for MC barostat: "<<cu.getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(m.str());
}
result = cuMemcpyDtoD(cu.getForce().getDevicePointer(), savedForces.getDevicePointer(), savedForces.getSize()*savedForces.getElementSize());
if (result != CUDA_SUCCESS) {
std::stringstream m;
m<<"Error restoring forces for MC barostat: "<<cu.getErrorString(result)<<" ("<<result<<")";
throw OpenMMException(m.str());
}
}
real3 v0 = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
real3 v1 = make_real3(pos2.x-pos3.x, pos2.y-pos3.y, pos2.z-pos3.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0)
APPLY_PERIODIC_TO_DELTA(v1)
#endif
real3 cp = cross(v0, v1);
real rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z;
rp = max(SQRT(rp), (real) 1.0e-06f);
real r21 = v0.x*v0.x + v0.y*v0.y + v0.z*v0.z;
real r23 = v1.x*v1.x + v1.y*v1.y + v1.z*v1.z;
real dot = v0.x*v1.x + v0.y*v1.y + v0.z*v1.z;
real cosine = min(max(dot*RSQRT(r21*r23), (real) -1), (real) 1);
real theta = ACOS(cosine);
COMPUTE_FORCE
real3 force1 = cross(v0, cp)*(dEdAngle/(r21*rp));
real3 force3 = cross(cp, v1)*(dEdAngle/(r23*rp));
real3 force2 = -force1-force3;
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
COMPUTE_FORCE
dEdR = (r > 0) ? (dEdR / r) : 0;
delta *= dEdR;
real3 force1 = delta;
real3 force2 = -delta;
const real PI = (real) 3.14159265358979323846;
real3 v0 = make_real3(pos1.x-pos2.x, pos1.y-pos2.y, pos1.z-pos2.z);
real3 v1 = make_real3(pos3.x-pos2.x, pos3.y-pos2.y, pos3.z-pos2.z);
real3 v2 = make_real3(pos3.x-pos4.x, pos3.y-pos4.y, pos3.z-pos4.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0)
APPLY_PERIODIC_TO_DELTA(v1)
APPLY_PERIODIC_TO_DELTA(v2)
#endif
real3 cp0 = cross(v0, v1);
real3 cp1 = cross(v1, v2);
real cosangle = dot(normalize(cp0), normalize(cp1));
real theta;
if (cosangle > 0.99f || cosangle < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real3 cross_prod = cross(cp0, cp1);
real scale = dot(cp0, cp0)*dot(cp1, cp1);
theta = ASIN(SQRT(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0)
theta = PI-theta;
}
else
theta = ACOS(cosangle);
theta = (dot(v0, cp1) >= 0 ? theta : -theta);
COMPUTE_FORCE
real normCross1 = dot(cp0, cp0);
real normSqrBC = dot(v1, v1);
real normBC = SQRT(normSqrBC);
real normCross2 = dot(cp1, cp1);
real dp = RECIP(normSqrBC);
real4 ff = make_real4((-dEdAngle*normBC)/normCross1, dot(v0, v1)*dp, dot(v2, v1)*dp, (dEdAngle*normBC)/normCross2);
real3 force1 = ff.x*cp0;
real3 force4 = ff.w*cp1;
real3 s = ff.y*force1 - ff.z*force4;
real3 force2 = s-force1;
real3 force3 = -s-force4;
...@@ -255,6 +255,13 @@ public: ...@@ -255,6 +255,13 @@ public:
OpenCLArray& getForceBuffers() { OpenCLArray& getForceBuffers() {
return forceBuffers; return forceBuffers;
} }
/**
* Get the array which contains a contribution to each force represented as a real4.
* This is a synonym for getForce(). It exists to satisfy the ComputeContext interface.
*/
ArrayInterface& getFloatForceBuffer() {
return force;
}
/** /**
* Get the array which contains a contribution to each force represented as 64 bit fixed point. * Get the array which contains a contribution to each force represented as 64 bit fixed point.
*/ */
......
...@@ -31,14 +31,10 @@ ...@@ -31,14 +31,10 @@
#include "OpenCLArray.h" #include "OpenCLArray.h"
#include "OpenCLContext.h" #include "OpenCLContext.h"
#include "OpenCLFFT3D.h" #include "OpenCLFFT3D.h"
#include "OpenCLParameterSet.h"
#include "OpenCLSort.h" #include "OpenCLSort.h"
#include "openmm/kernels.h" #include "openmm/kernels.h"
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/CustomIntegratorUtilities.h"
#include "lepton/CompiledExpression.h"
#include "lepton/ExpressionProgram.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "openmm/common/CommonKernels.h"
namespace OpenMM { namespace OpenMM {
...@@ -180,63 +176,6 @@ private: ...@@ -180,63 +176,6 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
}; };
/**
* This kernel modifies the positions of particles to enforce distance constraints.
*/
class OpenCLApplyConstraintsKernel : public ApplyConstraintsKernel {
public:
OpenCLApplyConstraintsKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyConstraintsKernel(name, platform),
cl(cl), hasInitializedKernel(false) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* Update particle positions to enforce constraints.
*
* @param context the context in which to execute this kernel
* @param tol the distance tolerance within which constraints must be satisfied.
*/
void apply(ContextImpl& context, double tol);
/**
* Update particle velocities to enforce constraints.
*
* @param context the context in which to execute this kernel
* @param tol the velocity tolerance within which constraints must be satisfied.
*/
void applyToVelocities(ContextImpl& context, double tol);
private:
OpenCLContext& cl;
bool hasInitializedKernel;
cl::Kernel applyDeltasKernel;
};
/**
* This kernel recomputes the positions of virtual sites.
*/
class OpenCLVirtualSitesKernel : public VirtualSitesKernel {
public:
OpenCLVirtualSitesKernel(std::string name, const Platform& platform, OpenCLContext& cl) : VirtualSitesKernel(name, platform), cl(cl) {
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* Compute the virtual site locations.
*
* @param context the context in which to execute this kernel
*/
void computePositions(ContextImpl& context);
private:
OpenCLContext& cl;
};
/** /**
* This kernel is invoked by NonbondedForce to calculate the forces acting on the system. * This kernel is invoked by NonbondedForce to calculate the forces acting on the system.
*/ */
...@@ -376,103 +315,13 @@ private: ...@@ -376,103 +315,13 @@ private:
/** /**
* This kernel is invoked by CustomCVForce to calculate the forces acting on the system and the energy of the system. * This kernel is invoked by CustomCVForce to calculate the forces acting on the system and the energy of the system.
*/ */
class OpenCLCalcCustomCVForceKernel : public CalcCustomCVForceKernel { class OpenCLCalcCustomCVForceKernel : public CommonCalcCustomCVForceKernel {
public: public:
OpenCLCalcCustomCVForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcCustomCVForceKernel(name, platform), OpenCLCalcCustomCVForceKernel(std::string name, const Platform& platform, ComputeContext& cc) : CommonCalcCustomCVForceKernel(name, platform, cc) {
cl(cl), hasInitializedKernels(false) {
} }
/** ComputeContext& getInnerComputeContext(ContextImpl& innerContext) {
* Initialize the kernel. return *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
*
* @param system the System this kernel will be applied to
* @param force the CustomCVForce this kernel will be used for
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy);
/**
* Copy state information to the inner context.
*
* @param context the context in which to execute this kernel
* @param innerContext the context created by the CustomCVForce for computing collective variables
*/
void copyState(ContextImpl& context, ContextImpl& innerContext);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomCVForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const CustomCVForce& force);
private:
class ForceInfo;
class ReorderListener;
OpenCLContext& cl;
bool hasInitializedKernels;
Lepton::ExpressionProgram energyExpression;
std::vector<std::string> variableNames, paramDerivNames, globalParameterNames;
std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
std::vector<OpenCLArray> cvForces;
OpenCLArray invAtomOrder;
OpenCLArray innerInvAtomOrder;
cl::Kernel copyStateKernel, copyForcesKernel, addForcesKernel;
};
/**
* This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
*/
class OpenCLApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel {
public:
OpenCLApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyMonteCarloBarostatKernel(name, platform), cl(cl),
hasInitializedKernels(false) {
} }
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param barostat the MonteCarloBarostat this kernel will be used for
*/
void initialize(const System& system, const Force& barostat);
/**
* Attempt a Monte Carlo step, scaling particle positions (or cluster centers) by a specified value.
* This version scales the x, y, and z positions independently.
* This is called BEFORE the periodic box size is modified. It should begin by translating each particle
* or cluster into the first periodic box, so that coordinates will still be correct after the box size
* is changed.
*
* @param context the context in which to execute this kernel
* @param scaleX the scale factor by which to multiply particle x-coordinate
* @param scaleY the scale factor by which to multiply particle y-coordinate
* @param scaleZ the scale factor by which to multiply particle z-coordinate
*/
void scaleCoordinates(ContextImpl& context, double scaleX, double scaleY, double scaleZ);
/**
* Reject the most recent Monte Carlo step, restoring the particle positions to where they were before
* scaleCoordinates() was last called.
*
* @param context the context in which to execute this kernel
*/
void restoreCoordinates(ContextImpl& context);
private:
OpenCLContext& cl;
bool hasInitializedKernels;
int numMolecules;
OpenCLArray savedPositions;
OpenCLArray savedForces;
OpenCLArray moleculeAtoms;
OpenCLArray moleculeStartIndex;
cl::Kernel kernel;
std::vector<int> lastAtomOrder;
}; };
} // namespace OpenMM } // namespace OpenMM
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment