Commit fd473eea authored by Peter Eastman's avatar Peter Eastman
Browse files

Merge branch 'master' into nucleic

parents 0a751b5b 6a985cfd
......@@ -31,6 +31,7 @@
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/CMAPTorsionForceImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomCentroidBondForceImpl.h"
#include "openmm/internal/CustomCompoundBondForceImpl.h"
#include "openmm/internal/CustomHbondForceImpl.h"
#include "openmm/internal/CustomManyParticleForceImpl.h"
......@@ -104,14 +105,14 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cu.getBondedUtilities().computeInteractions(groups);
cu.getNonbondedUtilities().computeInteractions(groups);
cu.getNonbondedUtilities().computeInteractions(groups, includeForces, includeEnergy);
double sum = 0.0;
for (vector<CudaContext::ForcePostComputation*>::iterator iter = cu.getPostComputations().begin(); iter != cu.getPostComputations().end(); ++iter)
sum += (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
cu.getIntegrationUtilities().distributeForcesFromVirtualSites();
if (includeEnergy) {
CudaArray& energyArray = cu.getEnergyBuffer();
if (cu.getUseDoublePrecision()) {
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
double* energy = (double*) cu.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
......@@ -1560,8 +1561,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
posq.upload(&temp[0]);
sigmaEpsilon->upload(sigmaEpsilonVector);
bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
map<string, string> defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
......@@ -1589,7 +1591,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
dispersionCoefficient = 0.0;
alpha = 0;
ewaldSelfEnergy = 0.0;
if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
int kmaxx, kmaxy, kmaxz;
......@@ -1618,11 +1620,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cosSinSums = new CudaArray(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if (force.getNonbondedMethod() == NonbondedForce::PME) {
else if (nonbondedMethod == PME) {
// Compute the PME parameters.
int gridSizeX, gridSizeY, gridSizeZ;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
......@@ -1682,7 +1682,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
useCudaFFT = false; // We might switch back in the future, once Nvidia has all their bugs worked out
int cufftVersion;
cufftGetVersion(&cufftVersion);
useCudaFFT = (cufftVersion >= 7050); // There was a critical bug in version 7.0
if (useCudaFFT) {
cufftResult result = cufftPlan3d(&fftForward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C);
if (result != CUFFT_SUCCESS)
......@@ -1993,14 +1995,26 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Compute other values.
NonbondedForce::NonbondedMethod method = force.getNonbondedMethod();
if (method == NonbondedForce::Ewald || method == NonbondedForce::PME)
if (nonbondedMethod == Ewald || nonbondedMethod == PME)
ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (method == NonbondedForce::CutoffPeriodic || method == NonbondedForce::Ewald || method == NonbondedForce::PME))
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cu.invalidateMolecules();
}
void CudaCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (cu.getPlatformData().useCpuPme)
cpuPme.getAs<CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = this->alpha;
nx = gridSizeX;
ny = gridSizeY;
nz = gridSizeZ;
}
}
class CudaCustomNonbondedForceInfo : public CudaForceInfo {
public:
CudaCustomNonbondedForceInfo(const CustomNonbondedForce& force) : force(force) {
......@@ -2570,8 +2584,8 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
cutoff = force.getCutoffDistance();
string source = CudaKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params->getDevicePointer()));;
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce->getDevicePointer()));;
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params->getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce->getDevicePointer()));
cu.addForce(new CudaGBSAOBCForceInfo(force));
}
......@@ -3638,7 +3652,9 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
map<string, Lepton::CustomFunction*> customFunctions;
customFunctions["periodicdistance"] = cu.getExpressionUtilities().getPeriodicDistancePlaceholder();
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), customFunctions).optimize();
Lepton::ParsedExpression forceExpressionX = energyExpression.differentiate("x").optimize();
Lepton::ParsedExpression forceExpressionY = energyExpression.differentiate("y").optimize();
Lepton::ParsedExpression forceExpressionZ = energyExpression.differentiate("z").optimize();
......@@ -4242,6 +4258,435 @@ void CudaCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& contex
cu.invalidateMolecules();
}
class CudaCustomCentroidBondForceInfo : public CudaForceInfo {
public:
CudaCustomCentroidBondForceInfo(const CustomCentroidBondForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, vector<int>& particles) {
vector<double> parameters;
vector<int> groups;
force.getBondParameters(index, groups, parameters);
for (int i = 0; i < groups.size(); i++) {
vector<int> groupParticles;
vector<double> weights;
force.getGroupParameters(groups[i], groupParticles, weights);
particles.insert(particles.end(), groupParticles.begin(), groupParticles.end());
}
}
bool areGroupsIdentical(int group1, int group2) {
vector<int> groups1, groups2;
vector<double> parameters1, parameters2;
force.getBondParameters(group1, groups1, parameters1);
force.getBondParameters(group2, groups2, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
for (int i = 0; i < groups1.size(); i++) {
vector<int> groupParticles;
vector<double> weights1, weights2;
force.getGroupParameters(groups1[i], groupParticles, weights1);
force.getGroupParameters(groups2[i], groupParticles, weights2);
if (weights1.size() != weights2.size())
return false;
for (int j = 0; j < weights1.size(); j++)
if (weights1[j] != weights2[j])
return false;
}
return true;
}
private:
const CustomCentroidBondForce& force;
};
CudaCalcCustomCentroidBondForceKernel::~CudaCalcCustomCentroidBondForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (groupParticles != NULL)
delete groupParticles;
if (groupWeights != NULL)
delete groupWeights;
if (groupOffsets != NULL)
delete groupOffsets;
if (groupForces != NULL)
delete groupForces;
if (bondGroups != NULL)
delete bondGroups;
if (centerPositions != NULL)
delete centerPositions;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i];
}
void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
cu.setAsCurrent();
numBonds = force.getNumBonds();
if (numBonds == 0)
return;
cu.addForce(new CudaCustomCentroidBondForceInfo(force));
// Record the groups.
numGroups = force.getNumGroups();
vector<int> groupParticleVec;
vector<float> groupWeightVecFloat;
vector<double> groupWeightVecDouble;
vector<int> groupOffsetVec;
groupOffsetVec.push_back(0);
for (int i = 0; i < numGroups; i++) {
vector<int> particles;
vector<double> weights;
force.getGroupParameters(i, particles, weights);
groupParticleVec.insert(groupParticleVec.end(), particles.begin(), particles.end());
groupOffsetVec.push_back(groupParticleVec.size());
}
vector<vector<double> > normalizedWeights;
CustomCentroidBondForceImpl::computeNormalizedWeights(force, system, normalizedWeights);
if (cu.getUseDoublePrecision()) {
for (int i = 0; i < numGroups; i++)
groupWeightVecDouble.insert(groupWeightVecDouble.end(), normalizedWeights[i].begin(), normalizedWeights[i].end());
}
else {
for (int i = 0; i < numGroups; i++)
for (int j = 0; j < normalizedWeights[i].size(); j++)
groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
}
groupParticles = CudaArray::create<int>(cu, groupParticleVec.size(), "groupParticles");
groupParticles->upload(groupParticleVec);
if (cu.getUseDoublePrecision()) {
groupWeights = CudaArray::create<double>(cu, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecDouble);
centerPositions = CudaArray::create<double4>(cu, numGroups, "centerPositions");
}
else {
groupWeights = CudaArray::create<float>(cu, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecFloat);
centerPositions = CudaArray::create<float4>(cu, numGroups, "centerPositions");
}
groupOffsets = CudaArray::create<int>(cu, groupOffsetVec.size(), "groupOffsets");
groupOffsets->upload(groupOffsetVec);
groupForces = CudaArray::create<long long>(cu, numGroups*3, "groupForces");
cu.addAutoclearBuffer(*groupForces);
// Record the bonds.
int groupsPerBond = force.getNumGroupsPerBond();
vector<int> bondGroupVec(numBonds*groupsPerBond);
params = new CudaParameterSet(cu, force.getNumPerBondParameters(), numBonds, "customCentroidBondParams");
vector<vector<float> > paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
vector<int> groups;
vector<double> parameters;
force.getBondParameters(i, groups, parameters);
for (int j = 0; j < groups.size(); j++)
bondGroupVec[i+j*numBonds] = groups[j];
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
bondGroups = CudaArray::create<int>(cu, bondGroupVec.size(), "bondGroups");
bondGroups->upload(bondGroupVec);
// Record the arguments to the force kernel.
groupForcesArgs.push_back(&groupForces->getDevicePointer());
groupForcesArgs.push_back(NULL); // Energy buffer hasn't been created yet
groupForcesArgs.push_back(&centerPositions->getDevicePointer());
groupForcesArgs.push_back(&bondGroups->getDevicePointer());
// Record the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream extraArgs;
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string arrayName = "table"+cu.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
tabulatedFunctions.back()->upload(f);
extraArgs << ", const float";
if (width > 1)
extraArgs << width;
extraArgs << "* __restrict__ " << arrayName;
groupForcesArgs.push_back(&tabulatedFunctions.back()->getDevicePointer());
}
// Record information about parameters.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
map<string, string> variables;
for (int i = 0; i < groupsPerBond; i++) {
string index = cu.intToString(i+1);
variables["x"+index] = "pos"+index+".x";
variables["y"+index] = "pos"+index+".y";
variables["z"+index] = "pos"+index+".z";
}
for (int i = 0; i < force.getNumPerBondParameters(); i++) {
const string& name = force.getPerBondParameterName(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals->upload(globalParamValues);
extraArgs << ", const float* __restrict__ globals";
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cu.intToString(i)+"]";
variables[name] = value;
}
groupForcesArgs.push_back(&globals->getDevicePointer());
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
// and dihedrals the expression depends on.
map<string, vector<int> > distances;
map<string, vector<int> > angles;
map<string, vector<int> > dihedrals;
Lepton::ParsedExpression energyExpression = CustomCentroidBondForceImpl::prepareExpression(force, functions, distances, angles, dihedrals);
map<string, Lepton::ParsedExpression> forceExpressions;
set<string> computedDeltas;
vector<string> atomNames, posNames;
for (int i = 0; i < groupsPerBond; i++) {
string index = cu.intToString(i+1);
atomNames.push_back("P"+index);
posNames.push_back("pos"+index);
}
stringstream compute;
for (int i = 0; i < groupsPerBond; i++) {
compute<<"int group"<<(i+1)<<" = bondGroups[index+"<<(i*numBonds)<<"];\n";
compute<<"real4 pos"<<(i+1)<<" = centerPositions[group"<<(i+1)<<"];\n";
}
int index = 0;
for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName = atomNames[groups[0]]+atomNames[groups[1]];
if (computedDeltas.count(deltaName) == 0) {
compute<<"real4 delta"<<deltaName<<" = delta("<<posNames[groups[0]]<<", "<<posNames[groups[1]]<<");\n";
computedDeltas.insert(deltaName);
}
compute<<"real r_"<<deltaName<<" = sqrt(delta"<<deltaName<<".w);\n";
variables[iter->first] = "r_"+deltaName;
forceExpressions["real dEdDistance"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[1]]+atomNames[groups[0]];
string deltaName2 = atomNames[groups[1]]+atomNames[groups[2]];
string angleName = "angle_"+atomNames[groups[0]]+atomNames[groups[1]]+atomNames[groups[2]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[groups[1]]<<", "<<posNames[groups[0]]<<");\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[groups[1]]<<", "<<posNames[groups[2]]<<");\n";
computedDeltas.insert(deltaName2);
}
compute<<"real "<<angleName<<" = computeAngle(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
variables[iter->first] = angleName;
forceExpressions["real dEdAngle"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[0]]+atomNames[groups[1]];
string deltaName2 = atomNames[groups[2]]+atomNames[groups[1]];
string deltaName3 = atomNames[groups[2]]+atomNames[groups[3]];
string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
string dihedralName = "dihedral_"+atomNames[groups[0]]+atomNames[groups[1]]+atomNames[groups[2]]+atomNames[groups[3]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[groups[0]]<<", "<<posNames[groups[1]]<<");\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[groups[2]]<<", "<<posNames[groups[1]]<<");\n";
computedDeltas.insert(deltaName2);
}
if (computedDeltas.count(deltaName3) == 0) {
compute<<"real4 delta"<<deltaName3<<" = delta("<<posNames[groups[2]]<<", "<<posNames[groups[3]]<<");\n";
computedDeltas.insert(deltaName3);
}
compute<<"real4 "<<crossName1<<" = computeCross(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
compute<<"real4 "<<crossName2<<" = computeCross(delta"<<deltaName2<<", delta"<<deltaName3<<");\n";
compute<<"real "<<dihedralName<<" = computeAngle("<<crossName1<<", "<<crossName2<<");\n";
compute<<dihedralName<<" *= (delta"<<deltaName1<<".x*"<<crossName2<<".x + delta"<<deltaName1<<".y*"<<crossName2<<".y + delta"<<deltaName1<<".z*"<<crossName2<<".z < 0 ? -1 : 1);\n";
variables[iter->first] = dihedralName;
forceExpressions["real dEdDihedral"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
// Now evaluate the expressions.
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
extraArgs<<", const "<<buffer.getType()<<"* __restrict__ globalParams"<<i;
compute<<buffer.getType()<<" bondParams"<<(i+1)<<" = globalParams"<<i<<"[index];\n";
groupForcesArgs.push_back(&buffer.getMemory());
}
forceExpressions["energy += "] = energyExpression;
compute << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionList, functionDefinitions, "temp");
// Finally, apply forces to groups.
vector<string> forceNames;
for (int i = 0; i < groupsPerBond; i++) {
string istr = cu.intToString(i+1);
string forceName = "force"+istr;
forceNames.push_back(forceName);
compute<<"real3 "<<forceName<<" = make_real3(0);\n";
compute<<"{\n";
Lepton::ParsedExpression forceExpressionX = energyExpression.differentiate("x"+istr).optimize();
Lepton::ParsedExpression forceExpressionY = energyExpression.differentiate("y"+istr).optimize();
Lepton::ParsedExpression forceExpressionZ = energyExpression.differentiate("z"+istr).optimize();
map<string, Lepton::ParsedExpression> expressions;
if (!isZeroExpression(forceExpressionX))
expressions[forceName+".x -= "] = forceExpressionX;
if (!isZeroExpression(forceExpressionY))
expressions[forceName+".y -= "] = forceExpressionY;
if (!isZeroExpression(forceExpressionZ))
expressions[forceName+".z -= "] = forceExpressionZ;
if (expressions.size() > 0)
compute<<cu.getExpressionUtilities().createExpressions(expressions, variables, functionList, functionDefinitions, "coordtemp");
compute<<"}\n";
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName = atomNames[groups[0]]+atomNames[groups[1]];
string value = "(dEdDistance"+cu.intToString(index)+"/r_"+deltaName+")*trim(delta"+deltaName+")";
compute<<forceNames[groups[0]]<<" += "<<"-"<<value<<";\n";
compute<<forceNames[groups[1]]<<" += "<<value<<";\n";
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[1]]+atomNames[groups[0]];
string deltaName2 = atomNames[groups[1]]+atomNames[groups[2]];
compute<<"{\n";
compute<<"real3 crossProd = cross(delta"<<deltaName2<<", delta"<<deltaName1<<");\n";
compute<<"real lengthCross = max(SQRT(dot(crossProd, crossProd)), 1e-6f);\n";
compute<<"real3 deltaCross0 = -cross(trim(delta"<<deltaName1<<"), crossProd)*dEdAngle"<<cu.intToString(index)<<"/(delta"<<deltaName1<<".w*lengthCross);\n";
compute<<"real3 deltaCross2 = cross(trim(delta"<<deltaName2<<"), crossProd)*dEdAngle"<<cu.intToString(index)<<"/(delta"<<deltaName2<<".w*lengthCross);\n";
compute<<"real3 deltaCross1 = -(deltaCross0+deltaCross2);\n";
compute<<forceNames[groups[0]]<<" += deltaCross0;\n";
compute<<forceNames[groups[1]]<<" += deltaCross1;\n";
compute<<forceNames[groups[2]]<<" += deltaCross2;\n";
compute<<"}\n";
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[0]]+atomNames[groups[1]];
string deltaName2 = atomNames[groups[2]]+atomNames[groups[1]];
string deltaName3 = atomNames[groups[2]]+atomNames[groups[3]];
string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
compute<<"{\n";
compute<<"real r = sqrt(delta"<<deltaName2<<".w);\n";
compute<<"real4 ff;\n";
compute<<"ff.x = (-dEdDihedral"<<cu.intToString(index)<<"*r)/"<<crossName1<<".w;\n";
compute<<"ff.y = (delta"<<deltaName1<<".x*delta"<<deltaName2<<".x + delta"<<deltaName1<<".y*delta"<<deltaName2<<".y + delta"<<deltaName1<<".z*delta"<<deltaName2<<".z)/delta"<<deltaName2<<".w;\n";
compute<<"ff.z = (delta"<<deltaName3<<".x*delta"<<deltaName2<<".x + delta"<<deltaName3<<".y*delta"<<deltaName2<<".y + delta"<<deltaName3<<".z*delta"<<deltaName2<<".z)/delta"<<deltaName2<<".w;\n";
compute<<"ff.w = (dEdDihedral"<<cu.intToString(index)<<"*r)/"<<crossName2<<".w;\n";
compute<<"real3 internalF0 = ff.x*trim("<<crossName1<<");\n";
compute<<"real3 internalF3 = ff.w*trim("<<crossName2<<");\n";
compute<<"real3 s = ff.y*internalF0 - ff.z*internalF3;\n";
compute<<forceNames[groups[0]]<<" += internalF0;\n";
compute<<forceNames[groups[1]]<<" += s-internalF0;\n";
compute<<forceNames[groups[2]]<<" += -s-internalF3;\n";
compute<<forceNames[groups[3]]<<" += internalF3;\n";
compute<<"}\n";
}
// Save the forces to global memory.
for (int i = 0; i < groupsPerBond; i++) {
compute<<"atomicAdd(&groupForce[group"<<(i+1)<<"], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".x*0x100000000)));\n";
compute<<"atomicAdd(&groupForce[group"<<(i+1)<<"+NUM_GROUPS], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".y*0x100000000)));\n";
compute<<"atomicAdd(&groupForce[group"<<(i+1)<<"+NUM_GROUPS*2], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".z*0x100000000)));\n";
compute<<"__threadfence_block();\n";
}
map<string, string> replacements;
replacements["M_PI"] = cu.doubleToString(M_PI);
replacements["NUM_GROUPS"] = cu.intToString(numGroups);
replacements["NUM_BONDS"] = cu.intToString(numBonds);
replacements["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
replacements["EXTRA_ARGS"] = extraArgs.str();
replacements["COMPUTE_FORCE"] = compute.str();
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::customCentroidBond, replacements));
computeCentersKernel = cu.getKernel(module, "computeGroupCenters");
groupForcesKernel = cu.getKernel(module, "computeGroupForces");
applyForcesKernel = cu.getKernel(module, "applyForcesToAtoms");
}
double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numBonds == 0)
return 0.0;
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
void* computeCentersArgs[] = {&cu.getPosq().getDevicePointer(), &groupParticles->getDevicePointer(), &groupWeights->getDevicePointer(),
&groupOffsets->getDevicePointer(), &centerPositions->getDevicePointer()};
cu.executeKernel(computeCentersKernel, computeCentersArgs, CudaContext::TileSize*numGroups);
groupForcesArgs[1] = &cu.getEnergyBuffer().getDevicePointer();
cu.executeKernel(groupForcesKernel, &groupForcesArgs[0], numBonds);
void* applyForcesArgs[] = {&groupParticles->getDevicePointer(), &groupWeights->getDevicePointer(), &groupOffsets->getDevicePointer(),
&groupForces->getDevicePointer(), &cu.getForce().getDevicePointer()};
cu.executeKernel(applyForcesKernel, applyForcesArgs, CudaContext::TileSize*numGroups);
return 0.0;
}
void CudaCalcCustomCentroidBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force) {
cu.setAsCurrent();
if (numBonds != force.getNumBonds())
throw OpenMMException("updateParametersInContext: The number of bonds has changed");
if (numBonds == 0)
return;
// Record the per-bond parameters.
vector<vector<float> > paramVector(numBonds);
vector<int> particles;
vector<double> parameters;
for (int i = 0; i < numBonds; i++) {
force.getBondParameters(i, particles, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCustomCompoundBondForceInfo : public CudaForceInfo {
public:
CudaCustomCompoundBondForceInfo(const CustomCompoundBondForce& force) : force(force) {
......@@ -4304,7 +4749,6 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
......@@ -4507,7 +4951,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
cu.getBondedUtilities().addInteraction(atoms, compute.str(), force.getForceGroup());
map<string, string> replacements;
replacements["M_PI"] = cu.doubleToString(M_PI);
cu.getBondedUtilities().addPrefixCode(cu.replaceStrings(CudaKernelSources::customCompoundBond, replacements));;
cu.getBondedUtilities().addPrefixCode(cu.replaceStrings(CudaKernelSources::customCompoundBond, replacements));
}
double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
......
......@@ -375,32 +375,38 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
if (lastCutoff != kernels.cutoffDistance)
forceRebuildNeighborList = true;
context.executeKernel(kernels.findBlockBoundsKernel, &findBlockBoundsArgs[0], context.getNumAtoms());
blockSorter->sort(*sortedBlocks);
context.executeKernel(kernels.sortBoxDataKernel, &sortBoxDataArgs[0], context.getNumAtoms());
context.executeKernel(kernels.findInteractingBlocksKernel, &findInteractingBlocksArgs[0], context.getNumAtoms(), 256);
forceRebuildNeighborList = false;
bool rebuild = false;
do {
context.executeKernel(kernels.findBlockBoundsKernel, &findBlockBoundsArgs[0], context.getNumAtoms());
blockSorter->sort(*sortedBlocks);
context.executeKernel(kernels.sortBoxDataKernel, &sortBoxDataArgs[0], context.getNumAtoms());
context.executeKernel(kernels.findInteractingBlocksKernel, &findInteractingBlocksArgs[0], context.getNumAtoms(), 256);
forceRebuildNeighborList = false;
if (context.getComputeForceCount() == 1)
rebuild = updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough.
} while(rebuild);
lastCutoff = kernels.cutoffDistance;
}
void CudaNonbondedUtilities::computeInteractions(int forceGroups) {
void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) {
if ((forceGroups&groupFlags) == 0)
return;
KernelSet& kernels = groupKernels[forceGroups];
if (kernels.hasForces) {
context.executeKernel(kernels.forceKernel, &forceArgs[0], numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
if (context.getComputeForceCount() == 1)
updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough.
CUfunction& kernel = (includeForces ? (includeEnergy ? kernels.forceEnergyKernel : kernels.forceKernel) : kernels.energyKernel);
if (kernel == NULL)
kernel = createInteractionKernel(kernels.source, parameters, arguments, true, true, forceGroups, includeForces, includeEnergy);
context.executeKernel(kernel, &forceArgs[0], numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
}
void CudaNonbondedUtilities::updateNeighborListSize() {
bool CudaNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff)
return;
return false;
unsigned int* pinnedInteractionCount = (unsigned int*) context.getPinnedBuffer();
interactionCount->download(pinnedInteractionCount);
if (pinnedInteractionCount[0] <= (unsigned int) maxTiles)
return;
return false;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
......@@ -422,6 +428,7 @@ void CudaNonbondedUtilities::updateNeighborListSize() {
forceArgs[17] = &interactingAtoms->getDevicePointer();
findInteractingBlocksArgs[7] = &interactingAtoms->getDevicePointer();
forceRebuildNeighborList = true;
return true;
}
void CudaNonbondedUtilities::setUsePadding(bool padding) {
......@@ -450,8 +457,8 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
}
kernels.hasForces = (source.size() > 0);
kernels.cutoffDistance = cutoff;
if (kernels.hasForces)
kernels.forceKernel = createInteractionKernel(source, parameters, arguments, true, true, groups);
kernels.source = source;
kernels.forceKernel = kernels.energyKernel = kernels.forceEnergyKernel = NULL;
if (useCutoff) {
double padding = (usePadding ? 0.1*cutoff : 0.0);
double paddedCutoff = cutoff+padding;
......@@ -474,7 +481,7 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
groupKernels[groups] = kernels;
}
CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, vector<ParameterInfo>& params, vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups) {
CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, vector<ParameterInfo>& params, vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups, bool includeForces, bool includeEnergy) {
map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = source;
const string suffixes[] = {"x", "y", "z", "w"};
......@@ -650,6 +657,10 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines["USE_SYMMETRIC"] = "1";
if (useShuffle)
defines["ENABLE_SHUFFLE"] = "1";
if (includeForces)
defines["INCLUDE_FORCES"] = "1";
if (includeEnergy)
defines["INCLUDE_ENERGY"] = "1";
defines["THREAD_BLOCK_SIZE"] = context.intToString(forceThreadBlockSize);
double maxCutoff = 0.0;
for (int i = 0; i < 32; i++) {
......
......@@ -623,6 +623,10 @@ void CudaParallelCalcNonbondedForceKernel::copyParametersToContext(ContextImpl&
getKernel(i).copyParametersToContext(context, force);
}
void CudaParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
dynamic_cast<const CudaCalcNonbondedForceKernel&>(kernels[0].getImpl()).getPMEParameters(alpha, nx, ny, nz);
}
class CudaParallelCalcCustomNonbondedForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcCustomNonbondedForceKernel& kernel, bool includeForce,
......
......@@ -80,6 +80,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory(CalcCustomGBForceKernel::Name(), factory);
registerKernelFactory(CalcCustomExternalForceKernel::Name(), factory);
registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
......
/**
* Compute the center of each group.
*/
extern "C" __global__ void computeGroupCenters(const real4* __restrict__ posq, const int* __restrict__ groupParticles,
const real* __restrict__ groupWeights, const int* __restrict__ groupOffsets, real4* __restrict__ centerPositions) {
__shared__ volatile real3 temp[64];
for (int group = blockIdx.x; group < NUM_GROUPS; group += gridDim.x) {
// The threads in this block work together to compute the center one group.
int firstIndex = groupOffsets[group];
int lastIndex = groupOffsets[group+1];
real3 center = make_real3(0, 0, 0);
for (int index = threadIdx.x; index < lastIndex-firstIndex; index += blockDim.x) {
int atom = groupParticles[firstIndex+index];
real weight = groupWeights[firstIndex+index];
real4 pos = posq[atom];
center.x += weight*pos.x;
center.y += weight*pos.y;
center.z += weight*pos.z;
}
// Sum the values.
int thread = threadIdx.x;
temp[thread].x = center.x;
temp[thread].y = center.y;
temp[thread].z = center.z;
__syncthreads();
if (thread < 32) {
temp[thread].x += temp[thread+32].x;
temp[thread].y += temp[thread+32].y;
temp[thread].z += temp[thread+32].z;
if (thread < 16) {
temp[thread].x += temp[thread+16].x;
temp[thread].y += temp[thread+16].y;
temp[thread].z += temp[thread+16].z;
}
if (thread < 8) {
temp[thread].x += temp[thread+8].x;
temp[thread].y += temp[thread+8].y;
temp[thread].z += temp[thread+8].z;
}
if (thread < 4) {
temp[thread].x += temp[thread+4].x;
temp[thread].y += temp[thread+4].y;
temp[thread].z += temp[thread+4].z;
}
if (thread < 2) {
temp[thread].x += temp[thread+2].x;
temp[thread].y += temp[thread+2].y;
temp[thread].z += temp[thread+2].z;
}
}
if (thread == 0)
centerPositions[group] = make_real4(temp[0].x+temp[1].x, temp[0].y+temp[1].y, temp[0].z+temp[1].z, 0);
}
}
/**
* Convert a real4 to a real3 by removing its last element.
*/
inline __device__ real3 trim(real4 v) {
return make_real3(v.x, v.y, v.z);
}
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
inline __device__ real4 delta(real4 vec1, real4 vec2) {
real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
}
/**
* Compute the angle between two vectors. The w component of each vector should contain the squared magnitude.
*/
__device__ real computeAngle(real4 vec1, real4 vec2) {
real dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
real cosine = dotProduct*RSQRT(vec1.w*vec2.w);
real angle;
if (cosine > 0.99f || cosine < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real3 crossProduct = cross(vec1, vec2);
real scale = vec1.w*vec2.w;
angle = ASIN(SQRT(dot(crossProduct, crossProduct)/scale));
if (cosine < 0.0f)
angle = M_PI-angle;
}
else
angle = ACOS(cosine);
return angle;
}
/**
* Compute the cross product of two vectors, setting the fourth component to the squared magnitude.
*/
inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
real3 cp = cross(vec1, vec2);
return make_real4(cp.x, cp.y, cp.z, cp.x*cp.x+cp.y*cp.y+cp.z*cp.z);
}
/**
* Compute the forces on groups based on the bonds.
*/
extern "C" __global__ void computeGroupForces(unsigned long long* __restrict__ groupForce, mixed* __restrict__ energyBuffer, const real4* __restrict__ centerPositions,
const int* __restrict__ bondGroups
EXTRA_ARGS) {
mixed energy = 0;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_BONDS; index += blockDim.x*gridDim.x) {
COMPUTE_FORCE
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
/**
* Apply the forces from the group centers to the individual atoms.
*/
extern "C" __global__ void applyForcesToAtoms(const int* __restrict__ groupParticles, const real* __restrict__ groupWeights, const int* __restrict__ groupOffsets,
const long long* __restrict__ groupForce, unsigned long long* __restrict__ atomForce) {
for (int group = blockIdx.x; group < NUM_GROUPS; group += gridDim.x) {
long long fx = groupForce[group];
long long fy = groupForce[group+NUM_GROUPS];
long long fz = groupForce[group+NUM_GROUPS*2];
int firstIndex = groupOffsets[group];
int lastIndex = groupOffsets[group+1];
for (int index = threadIdx.x; index < lastIndex-firstIndex; index += blockDim.x) {
int atom = groupParticles[firstIndex+index];
real weight = groupWeights[firstIndex+index];
atomicAdd(&atomForce[atom], static_cast<unsigned long long>((long long) (fx*weight)));
atomicAdd(&atomForce[atom+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (fy*weight)));
atomicAdd(&atomForce[atom+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (fz*weight)));
}
}
}
......@@ -13,7 +13,7 @@ typedef struct {
/**
* Compute a force based on pair interactions.
*/
extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer,
extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, const ushort2* __restrict__ exclusionTiles,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
......@@ -27,7 +27,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
real energy = 0;
mixed energy = 0;
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
// First loop: process tiles that contain exclusions.
......
......@@ -2,9 +2,9 @@
* Reduce the derivatives computed in the N^2 energy kernel, and compute all per-particle energy terms.
*/
extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq
extern "C" __global__ void computePerParticleEnergy(long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq
PARAMETER_ARGUMENTS) {
real energy = 0;
mixed energy = 0;
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Load the derivatives
......
......@@ -66,12 +66,12 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
/**
* Compute forces on donors.
*/
extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ force, real* __restrict__ energyBuffer, const real4* __restrict__ posq,
extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ force, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq,
const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
extern __shared__ real4 posBuffer[];
real energy = 0;
mixed energy = 0;
real3 f1 = make_real3(0);
real3 f2 = make_real3(0);
real3 f3 = make_real3(0);
......@@ -155,7 +155,7 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
/**
* Compute forces on acceptors.
*/
extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict__ force, real* __restrict__ energyBuffer, const real4* __restrict__ posq,
extern "C" __global__ void computeAcceptorForces(unsigned long long* __restrict__ force, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq,
const int4* __restrict__ exclusions, const int4* __restrict__ donorAtoms, const int4* __restrict__ acceptorAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
......
......@@ -78,7 +78,7 @@ __constant__ float globals[NUM_GLOBALS];
* Compute the interaction.
*/
extern "C" __global__ void computeInteraction(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq,
unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
#ifdef USE_CUTOFF
, const int* __restrict__ neighbors, const int* __restrict__ neighborStartIndex
......@@ -90,7 +90,7 @@ extern "C" __global__ void computeInteraction(
, int* __restrict__ exclusions, int* __restrict__ exclusionStartIndex
#endif
PARAMETER_ARGUMENTS) {
real energy = 0.0f;
mixed energy = 0;
// Loop over particles to be the first one in the set.
......
......@@ -9,14 +9,14 @@ typedef struct {
} AtomData;
extern "C" __global__ void computeInteractionGroups(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const int4* __restrict__ groupData,
unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const int4* __restrict__ groupData,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; // global warpIndex
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = threadIdx.x - tgx; // block warpIndex
real energy = 0.0f;
mixed energy = 0;
__shared__ AtomData localData[LOCAL_MEMORY_SIZE];
const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps;
......
......@@ -6,7 +6,7 @@ __device__ real2 multofReal2(real2 a, real2 b) {
* Precompute the cosine and sine sums which appear in each force term.
*/
extern "C" __global__ void calculateEwaldCosSinSums(real* __restrict__ energyBuffer, const real4* __restrict__ posq, real2* __restrict__ cosSinSum, real4 periodicBoxSize) {
extern "C" __global__ void calculateEwaldCosSinSums(mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, real2* __restrict__ cosSinSum, real4 periodicBoxSize) {
const unsigned int ksizex = 2*KMAX_X-1;
const unsigned int ksizey = 2*KMAX_Y-1;
const unsigned int ksizez = 2*KMAX_Z-1;
......@@ -14,7 +14,7 @@ extern "C" __global__ void calculateEwaldCosSinSums(real* __restrict__ energyBuf
real3 reciprocalBoxSize = make_real3(2*M_PI/periodicBoxSize.x, 2*M_PI/periodicBoxSize.y, 2*M_PI/periodicBoxSize.z);
real reciprocalCoefficient = ONE_4PI_EPS0*4*M_PI/(periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
unsigned int index = blockIdx.x*blockDim.x+threadIdx.x;
real energy = 0;
mixed energy = 0;
while (index < (KMAX_Y-1)*ksizez+KMAX_Z)
index += blockDim.x*gridDim.x;
while (index < totalK) {
......
......@@ -33,9 +33,9 @@ extern "C" __global__ void reduceBornSum(float alpha, float beta, float gamma, c
* Reduce the Born force.
*/
extern "C" __global__ void reduceBornForce(long long* __restrict__ bornForce, real* __restrict__ energyBuffer,
extern "C" __global__ void reduceBornForce(long long* __restrict__ bornForce, mixed* __restrict__ energyBuffer,
const float2* __restrict__ params, const real* __restrict__ bornRadii, const real* __restrict__ obcChain) {
real energy = 0;
mixed energy = 0;
for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// Get summed Born force
......@@ -402,7 +402,7 @@ typedef struct {
*/
extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ global_bornForce,
real* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ global_bornRadii,
mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ global_bornRadii,
#ifdef USE_CUTOFF
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
......@@ -415,7 +415,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
real energy = 0;
mixed energy = 0;
__shared__ AtomData2 localData[FORCE_WORK_GROUP_SIZE];
// First loop: process tiles that contain exclusions.
......
......@@ -100,7 +100,7 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
*
*/
extern "C" __global__ void computeNonbonded(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions,
unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF
, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount,real4 periodicBoxSize, real4 invPeriodicBoxSize,
......@@ -112,7 +112,7 @@ extern "C" __global__ void computeNonbonded(
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; // global warpIndex
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = threadIdx.x - tgx; // block warpIndex
real energy = 0.0f;
mixed energy = 0;
// used shared memory if the device cannot shuffle
#ifndef ENABLE_SHUFFLE
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
......@@ -175,6 +175,7 @@ extern "C" __global__ void computeNonbonded(
real tempEnergy = 0.0f;
COMPUTE_INTERACTION
energy += 0.5f*tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
force.x -= delta.x*dEdR;
force.y -= delta.y*dEdR;
......@@ -184,6 +185,7 @@ extern "C" __global__ void computeNonbonded(
force.y -= dEdR1.y;
force.z -= dEdR1.z;
#endif
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
......@@ -241,6 +243,7 @@ extern "C" __global__ void computeNonbonded(
real tempEnergy = 0.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta *= dEdR;
force.x -= delta.x;
......@@ -270,11 +273,12 @@ extern "C" __global__ void computeNonbonded(
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif // end USE_SYMMETRIC
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
// cycles the indices
// 0 1 2 3 4 5 6 7 -> 1 2 3 4 5 6 7 0
......@@ -282,6 +286,7 @@ extern "C" __global__ void computeNonbonded(
}
const unsigned int offset = y*TILE_SIZE + tgx;
// write results for off diagonal tiles
#ifdef INCLUDE_FORCES
#ifdef ENABLE_SHUFFLE
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (shflForce.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (shflForce.y*0x100000000)));
......@@ -290,13 +295,16 @@ extern "C" __global__ void computeNonbonded(
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fx*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fy*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000)));
#endif
#endif
}
// Write results for on and off diagonal tiles
#ifdef INCLUDE_FORCES
const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (force.x*0x100000000)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000)));
#endif
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
......@@ -441,6 +449,7 @@ extern "C" __global__ void computeNonbonded(
real tempEnergy = 0.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta *= dEdR;
force.x -= delta.x;
......@@ -472,6 +481,7 @@ extern "C" __global__ void computeNonbonded(
#endif // end USE_SYMMETRIC
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
tj = (tj + 1) & (TILE_SIZE - 1);
}
......@@ -509,6 +519,7 @@ extern "C" __global__ void computeNonbonded(
real tempEnergy = 0.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta *= dEdR;
force.x -= delta.x;
......@@ -540,12 +551,14 @@ extern "C" __global__ void computeNonbonded(
#endif // end USE_SYMMETRIC
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
tj = (tj + 1) & (TILE_SIZE - 1);
}
}
// Write results.
#ifdef INCLUDE_FORCES
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000)));
......@@ -565,8 +578,11 @@ extern "C" __global__ void computeNonbonded(
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000)));
#endif
}
#endif
}
pos++;
}
#ifdef INCLUDE_ENERGY
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
#endif
}
\ No newline at end of file
......@@ -115,7 +115,7 @@ extern "C" __global__ void finishSpreadCharge(long long* __restrict__ originalPm
// convolutes on the halfcomplex_pmeGrid, which is of size NX*NY*(NZ/2+1) as F(Q) is conjugate symmetric
extern "C" __global__ void
reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ energyBuffer,
reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__ energyBuffer,
const real* __restrict__ pmeBsplineModuliX, const real* __restrict__ pmeBsplineModuliY, const real* __restrict__ pmeBsplineModuliZ,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
......@@ -150,14 +150,14 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
extern "C" __global__ void
gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ energyBuffer,
gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__ energyBuffer,
const real* __restrict__ pmeBsplineModuliX, const real* __restrict__ pmeBsplineModuliY, const real* __restrict__ pmeBsplineModuliZ,
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
const real recipScaleFactor = RECIP(M_PI*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
real energy = 0;
mixed energy = 0;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < gridSize; index += blockDim.x*gridDim.x) {
// real indices
int kx = index/(GRID_SIZE_Y*(GRID_SIZE_Z));
......
......@@ -19,7 +19,7 @@ SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE
IF (APPLE)
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS} -F/Library/Frameworks -framework CUDA")
ELSE (APPLE)
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}")
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}")
ENDIF (APPLE)
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${STATIC_TARGET})
......@@ -28,7 +28,7 @@ FOREACH(TEST_PROG ${TEST_PROGS})
IF (APPLE)
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS} -F/Library/Frameworks -framework CUDA" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS}")
ELSE (APPLE)
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS}")
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS}")
ENDIF (APPLE)
ADD_TEST(${TEST_ROOT}Single ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} single)
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#ifdef WIN32
#define _USE_MATH_DEFINES // Needed to get M_PI
#endif
#include "CudaPlatform.h"
#include <string>
OpenMM::CudaPlatform platform;
void initializeTests(int argc, char* argv[]) {
if (argc > 1)
platform.setPropertyDefaultValue("CudaPrecision", std::string(argv[1]));
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -29,191 +29,8 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of AndersenThermostat.
*/
#include "CudaTests.h"
#include "TestAndersenThermostat.h"
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/AndersenThermostat.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
CudaPlatform platform;
void testTemperature() {
const int numParticles = 8;
const double temp = 100.0;
const double collisionFreq = 10.0;
const int numSteps = 5000;
System system;
VerletIntegrator integrator(0.003);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(2.0);
forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
}
system.addForce(forceField);
AndersenThermostat* thermstat = new AndersenThermostat(temp, collisionFreq);
system.addForce(thermstat);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; ++i)
positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
context.setPositions(positions);
context.setVelocitiesToTemperature(temp);
// Let it equilibrate.
integrator.step(10000);
// Now run it for a while and see if the temperature is correct.
double ke = 0.0;
for (int i = 0; i < numSteps; ++i) {
State state = context.getState(State::Energy);
ke += state.getKineticEnergy();
integrator.step(10);
}
ke /= numSteps;
double expected = 0.5*numParticles*3*BOLTZ*temp;
ASSERT_USUALLY_EQUAL_TOL(expected, ke, 0.1);
}
void testConstraints() {
const int numParticles = 8;
const double temp = 100.0;
const double collisionFreq = 10.0;
const int numSteps = 15000;
System system;
VerletIntegrator integrator(0.004);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(2.0);
forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
}
system.addForce(forceField);
system.addConstraint(0, 1, 1);
system.addConstraint(1, 2, 1);
system.addConstraint(2, 3, 1);
system.addConstraint(3, 0, 1);
system.addConstraint(4, 5, 1);
system.addConstraint(5, 6, 1);
system.addConstraint(6, 7, 1);
system.addConstraint(7, 4, 1);
AndersenThermostat* thermstat = new AndersenThermostat(temp, collisionFreq);
system.addForce(thermstat);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(1, 0, 0);
positions[2] = Vec3(1, 1, 0);
positions[3] = Vec3(0, 1, 0);
positions[4] = Vec3(1, 0, 1);
positions[5] = Vec3(1, 1, 1);
positions[6] = Vec3(0, 1, 1);
positions[7] = Vec3(0, 0, 1);
context.setPositions(positions);
context.setVelocitiesToTemperature(temp);
// Let it equilibrate.
integrator.step(5000);
// Now run it for a while and see if the temperature is correct.
double ke = 0.0;
for (int i = 0; i < numSteps; ++i) {
State state = context.getState(State::Energy);
ke += state.getKineticEnergy();
integrator.step(1);
}
ke /= numSteps;
double expected = 0.5*(numParticles*3-system.getNumConstraints())*BOLTZ*temp;
ASSERT_USUALLY_EQUAL_TOL(expected, ke, 0.1);
}
void testRandomSeed() {
const int numParticles = 8;
const double temp = 100.0;
const double collisionFreq = 10.0;
System system;
VerletIntegrator integrator(0.01);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(2.0);
forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
}
system.addForce(forceField);
AndersenThermostat* thermostat = new AndersenThermostat(temp, collisionFreq);
system.addForce(thermostat);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
for (int i = 0; i < numParticles; ++i) {
positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
velocities[i] = Vec3(0, 0, 0);
}
// Try twice with the same random seed.
thermostat->setRandomNumberSeed(5);
Context context(system, integrator, platform);
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state1 = context.getState(State::Positions);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state2 = context.getState(State::Positions);
// Try twice with a different random seed.
thermostat->setRandomNumberSeed(10);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state3 = context.getState(State::Positions);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state4 = context.getState(State::Positions);
// Compare the results.
for (int i = 0; i < numParticles; i++) {
for (int j = 0; j < 3; j++) {
ASSERT(state1.getPositions()[i][j] == state2.getPositions()[i][j]);
ASSERT(state3.getPositions()[i][j] == state4.getPositions()[i][j]);
ASSERT(state1.getPositions()[i][j] != state3.getPositions()[i][j]);
}
}
}
int main(int argc, char* argv[]) {
try {
if (argc > 1)
platform.setPropertyDefaultValue("CudaPrecision", string(argv[1]));
testTemperature();
testConstraints();
testRandomSeed();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
void runPlatformTests() {
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -29,252 +29,8 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/System.h"
#include "CudaTests.h"
#include "TestBrownianIntegrator.h"
/**
* This tests the CUDA implementation of BrownianIntegrator.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/BrownianIntegrator.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
CudaPlatform platform;
void testSingleBond() {
System system;
system.addParticle(2.0);
system.addParticle(2.0);
double dt = 0.01;
BrownianIntegrator integrator(0, 0.1, dt);
HarmonicBondForce* forceField = new HarmonicBondForce();
forceField->addBond(0, 1, 1.5, 1);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(-1, 0, 0);
positions[1] = Vec3(1, 0, 0);
context.setPositions(positions);
// This is simply an overdamped harmonic oscillator, so compare it to the analytical solution.
double rate = 2*1.0/(0.1*2.0);
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Positions | State::Velocities);
double time = state.getTime();
double expectedDist = 1.5+0.5*std::exp(-rate*time);
ASSERT_EQUAL_VEC(Vec3(-0.5*expectedDist, 0, 0), state.getPositions()[0], 0.02);
ASSERT_EQUAL_VEC(Vec3(0.5*expectedDist, 0, 0), state.getPositions()[1], 0.02);
if (i > 0) {
double expectedSpeed = -0.5*rate*std::exp(-rate*(time-0.5*dt));
ASSERT_EQUAL_VEC(Vec3(-0.5*expectedSpeed, 0, 0), state.getVelocities()[0], 0.11);
ASSERT_EQUAL_VEC(Vec3(0.5*expectedSpeed, 0, 0), state.getVelocities()[1], 0.11);
}
integrator.step(1);
}
}
void testTemperature() {
const int numParticles = 8;
const int numBonds = numParticles-1;
const double temp = 10.0;
System system;
BrownianIntegrator integrator(temp, 2.0, 0.01);
HarmonicBondForce* forceField = new HarmonicBondForce();
for (int i = 0; i < numParticles; ++i)
system.addParticle(2.0);
for (int i = 0; i < numBonds; ++i)
forceField->addBond(i, i+1, 1.0, 5.0);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; ++i)
positions[i] = Vec3(i, 0, 0);
context.setPositions(positions);
// Let it equilibrate.
integrator.step(10000);
// Now run it for a while and see if the temperature is correct.
double pe = 0.0;
const int steps = 50000;
for (int i = 0; i < steps; ++i) {
State state = context.getState(State::Energy);
pe += state.getPotentialEnergy();
integrator.step(1);
}
pe /= steps;
double expected = 0.5*numBonds*BOLTZ*temp;
ASSERT_USUALLY_EQUAL_TOL(expected, pe, 0.1*expected);
}
void testConstraints() {
const int numParticles = 8;
const int numConstraints = 5;
const double temp = 20.0;
System system;
BrownianIntegrator integrator(temp, 2.0, 0.001);
integrator.setConstraintTolerance(1e-5);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(10.0);
forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
}
system.addConstraint(0, 1, 1.0);
system.addConstraint(1, 2, 1.0);
system.addConstraint(2, 3, 1.0);
system.addConstraint(4, 5, 1.0);
system.addConstraint(6, 7, 1.0);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < numParticles; ++i) {
positions[i] = Vec3(i, 0, 0);
velocities[i] = Vec3(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5);
}
context.setPositions(positions);
context.setVelocities(velocities);
// Simulate it and see whether the constraints remain satisfied.
for (int i = 0; i < 1000; ++i) {
State state = context.getState(State::Positions);
for (int j = 0; j < numConstraints; ++j) {
int particle1, particle2;
double distance;
system.getConstraintParameters(j, particle1, particle2, distance);
Vec3 p1 = state.getPositions()[particle1];
Vec3 p2 = state.getPositions()[particle2];
double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
ASSERT_EQUAL_TOL(distance, dist, 1e-4);
}
integrator.step(1);
}
}
void testConstrainedMasslessParticles() {
System system;
system.addParticle(0.0);
system.addParticle(1.0);
system.addConstraint(0, 1, 1.5);
vector<Vec3> positions(2);
positions[0] = Vec3(-1, 0, 0);
positions[1] = Vec3(1, 0, 0);
BrownianIntegrator integrator(300.0, 2.0, 0.01);
bool failed = false;
try {
// This should throw an exception.
Context context(system, integrator, platform);
}
catch (exception& ex) {
failed = true;
}
ASSERT(failed);
// Now make both particles massless, which should work.
system.setParticleMass(1, 0.0);
Context context(system, integrator, platform);
context.setPositions(positions);
context.setVelocitiesToTemperature(300.0);
integrator.step(1);
State state = context.getState(State::Velocities);
ASSERT_EQUAL(0.0, state.getVelocities()[0][0]);
}
void testRandomSeed() {
const int numParticles = 8;
const double temp = 100.0;
const double collisionFreq = 10.0;
System system;
BrownianIntegrator integrator(temp, 2.0, 0.001);
NonbondedForce* forceField = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(2.0);
forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
}
system.addForce(forceField);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
for (int i = 0; i < numParticles; ++i) {
positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
velocities[i] = Vec3(0, 0, 0);
}
// Try twice with the same random seed.
integrator.setRandomNumberSeed(5);
Context context(system, integrator, platform);
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state1 = context.getState(State::Positions);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state2 = context.getState(State::Positions);
// Try twice with a different random seed.
integrator.setRandomNumberSeed(10);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state3 = context.getState(State::Positions);
context.reinitialize();
context.setPositions(positions);
context.setVelocities(velocities);
integrator.step(10);
State state4 = context.getState(State::Positions);
// Compare the results.
for (int i = 0; i < numParticles; i++) {
for (int j = 0; j < 3; j++) {
ASSERT(state1.getPositions()[i][j] == state2.getPositions()[i][j]);
ASSERT(state3.getPositions()[i][j] == state4.getPositions()[i][j]);
ASSERT(state1.getPositions()[i][j] != state3.getPositions()[i][j]);
}
}
}
int main(int argc, char* argv[]) {
try {
if (argc > 1)
platform.setPropertyDefaultValue("CudaPrecision", string(argv[1]));
testSingleBond();
testTemperature();
testConstraints();
testConstrainedMasslessParticles();
testRandomSeed();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
void runPlatformTests() {
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010-2015 Stanford University and the Authors. *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -29,149 +29,8 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of CMAPTorsionForce.
*/
#include "CudaTests.h"
#include "TestCMAPTorsionForce.h"
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/CMAPTorsionForce.h"
#include "openmm/PeriodicTorsionForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
CudaPlatform platform;
void testCMAPTorsions() {
const int mapSize = 36;
// Create two systems: one with a pair of periodic torsions, and one with a CMAP torsion
// that approximates the same force.
System system1;
for (int i = 0; i < 5; i++)
system1.addParticle(1.0);
PeriodicTorsionForce* periodic = new PeriodicTorsionForce();
periodic->addTorsion(0, 1, 2, 3, 2, M_PI/4, 1.5);
periodic->addTorsion(1, 2, 3, 4, 3, M_PI/3, 2.0);
system1.addForce(periodic);
System system2;
for (int i = 0; i < 5; i++)
system2.addParticle(1.0);
CMAPTorsionForce* cmap = new CMAPTorsionForce();
vector<double> mapEnergy(mapSize*mapSize);
for (int i = 0; i < mapSize; i++) {
double angle1 = i*2*M_PI/mapSize;
double energy1 = 1.5*(1+cos(2*angle1-M_PI/4));
for (int j = 0; j < mapSize; j++) {
double angle2 = j*2*M_PI/mapSize;
double energy2 = 2.0*(1+cos(3*angle2-M_PI/3));
mapEnergy[i+j*mapSize] = energy1+energy2;
}
}
cmap->addMap(mapSize, mapEnergy);
cmap->addTorsion(0, 0, 1, 2, 3, 1, 2, 3, 4);
system2.addForce(cmap);
// Set the atoms in various positions, and verify that both systems give equal forces and energy.
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<Vec3> positions(5);
VerletIntegrator integrator1(0.01);
VerletIntegrator integrator2(0.01);
Context c1(system1, integrator1, platform);
Context c2(system2, integrator2, platform);
for (int i = 0; i < 50; i++) {
for (int j = 0; j < (int) positions.size(); j++)
positions[j] = Vec3(5.0*genrand_real2(sfmt), 5.0*genrand_real2(sfmt), 5.0*genrand_real2(sfmt));
c1.setPositions(positions);
c2.setPositions(positions);
State s1 = c1.getState(State::Forces | State::Energy);
State s2 = c2.getState(State::Forces | State::Energy);
for (int i = 0; i < system1.getNumParticles(); i++)
ASSERT_EQUAL_VEC(s1.getForces()[i], s2.getForces()[i], 0.05);
ASSERT_EQUAL_TOL(s1.getPotentialEnergy(), s2.getPotentialEnergy(), 1e-3);
}
}
void testChangingParameters() {
// Create a system with two maps and one torsion.
const int mapSize = 8;
System system;
for (int i = 0; i < 5; i++)
system.addParticle(1.0);
CMAPTorsionForce* cmap = new CMAPTorsionForce();
vector<double> mapEnergy1(mapSize*mapSize);
vector<double> mapEnergy2(mapSize*mapSize);
for (int i = 0; i < mapSize; i++) {
double angle1 = i*2*M_PI/mapSize;
double energy1 = cos(angle1);
for (int j = 0; j < mapSize; j++) {
double angle2 = j*2*M_PI/mapSize;
double energy2 = 10*sin(angle2);
mapEnergy1[i+j*mapSize] = energy1+energy2;
mapEnergy2[i+j*mapSize] = energy1-energy2;
}
}
cmap->addMap(mapSize, mapEnergy1);
cmap->addMap(mapSize, mapEnergy2);
cmap->addTorsion(0, 0, 1, 2, 3, 1, 2, 3, 4);
system.addForce(cmap);
// Set particle positions so angle1=0 and angle2=PI/4.
vector<Vec3> positions(5);
positions[0] = Vec3(0, 0, 1);
positions[1] = Vec3(0, 0, 0);
positions[2] = Vec3(1, 0, 0);
positions[3] = Vec3(1, 0, 1);
positions[4] = Vec3(0.5, -0.5, 1);
VerletIntegrator integrator(0.01);
Context context(system, integrator, platform);
context.setPositions(positions);
// Check that the energy is correct.
double energy = context.getState(State::Energy).getPotentialEnergy();
ASSERT_EQUAL_TOL(1+10*sin(M_PI/4), energy, 1e-5);
// Modify the parameters.
cmap->setTorsionParameters(0, 1, 0, 1, 2, 3, 1, 2, 3, 4);
for (int i = 0; i < mapSize*mapSize; i++)
mapEnergy2[i] *= 2.0;
cmap->setMapParameters(1, mapSize, mapEnergy2);
cmap->updateParametersInContext(context);
// See if the results are correct.
energy = context.getState(State::Energy).getPotentialEnergy();
ASSERT_EQUAL_TOL(2-20*sin(M_PI/4), energy, 1e-5);
}
int main(int argc, char* argv[]) {
try {
if (argc > 1)
platform.setPropertyDefaultValue("CudaPrecision", string(argv[1]));
testCMAPTorsions();
testChangingParameters();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
void runPlatformTests() {
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment