Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
fd473eea
Commit
fd473eea
authored
Oct 29, 2015
by
Peter Eastman
Browse files
Merge branch 'master' into nucleic
parents
0a751b5b
6a985cfd
Changes
282
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
726 additions
and
639 deletions
+726
-639
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+461
-17
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+26
-15
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+4
-0
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+1
-0
platforms/cuda/src/kernels/customCentroidBond.cu
platforms/cuda/src/kernels/customCentroidBond.cu
+136
-0
platforms/cuda/src/kernels/customGBEnergyN2.cu
platforms/cuda/src/kernels/customGBEnergyN2.cu
+2
-2
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
+2
-2
platforms/cuda/src/kernels/customHbondForce.cu
platforms/cuda/src/kernels/customHbondForce.cu
+3
-3
platforms/cuda/src/kernels/customManyParticle.cu
platforms/cuda/src/kernels/customManyParticle.cu
+2
-2
platforms/cuda/src/kernels/customNonbondedGroups.cu
platforms/cuda/src/kernels/customNonbondedGroups.cu
+2
-2
platforms/cuda/src/kernels/ewald.cu
platforms/cuda/src/kernels/ewald.cu
+2
-2
platforms/cuda/src/kernels/gbsaObc1.cu
platforms/cuda/src/kernels/gbsaObc1.cu
+4
-4
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+21
-5
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+3
-3
platforms/cuda/staticTarget/CMakeLists.txt
platforms/cuda/staticTarget/CMakeLists.txt
+1
-1
platforms/cuda/tests/CMakeLists.txt
platforms/cuda/tests/CMakeLists.txt
+1
-1
platforms/cuda/tests/CudaTests.h
platforms/cuda/tests/CudaTests.h
+43
-0
platforms/cuda/tests/TestCudaAndersenThermostat.cpp
platforms/cuda/tests/TestCudaAndersenThermostat.cpp
+4
-187
platforms/cuda/tests/TestCudaBrownianIntegrator.cpp
platforms/cuda/tests/TestCudaBrownianIntegrator.cpp
+4
-248
platforms/cuda/tests/TestCudaCMAPTorsionForce.cpp
platforms/cuda/tests/TestCudaCMAPTorsionForce.cpp
+4
-145
No files found.
Too many changes to show.
To preserve performance only
282 of 282+
files are displayed.
Plain diff
Email patch
platforms/cuda/src/CudaKernels.cpp
View file @
fd473eea
...
...
@@ -31,6 +31,7 @@
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/CMAPTorsionForceImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomCentroidBondForceImpl.h"
#include "openmm/internal/CustomCompoundBondForceImpl.h"
#include "openmm/internal/CustomHbondForceImpl.h"
#include "openmm/internal/CustomManyParticleForceImpl.h"
...
...
@@ -104,14 +105,14 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cu.getBondedUtilities().computeInteractions(groups);
cu.getNonbondedUtilities().computeInteractions(groups);
cu.getNonbondedUtilities().computeInteractions(groups
, includeForces, includeEnergy
);
double sum = 0.0;
for (vector<CudaContext::ForcePostComputation*>::iterator iter = cu.getPostComputations().begin(); iter != cu.getPostComputations().end(); ++iter)
sum += (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
cu.getIntegrationUtilities().distributeForcesFromVirtualSites();
if (includeEnergy) {
CudaArray& energyArray = cu.getEnergyBuffer();
if (cu.getUseDoublePrecision()) {
if (cu.getUseDoublePrecision()
|| cu.getUseMixedPrecision()
) {
double* energy = (double*) cu.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
...
...
@@ -1560,8 +1561,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
posq.upload(&temp[0]);
sigmaEpsilon->upload(sigmaEpsilonVector);
bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
map<string, string> defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
...
...
@@ -1589,7 +1591,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
dispersionCoefficient = 0.0;
alpha = 0;
ewaldSelfEnergy = 0.0;
if (
force.getN
onbondedMethod
()
==
NonbondedForce::
Ewald) {
if (
n
onbondedMethod == Ewald) {
// Compute the Ewald parameters.
int kmaxx, kmaxy, kmaxz;
...
...
@@ -1618,11 +1620,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cosSinSums = new CudaArray(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if (
force.getN
onbondedMethod
()
==
NonbondedForce::
PME) {
else if (
n
onbondedMethod == PME) {
// Compute the PME parameters.
int gridSizeX, gridSizeY, gridSizeZ;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
...
...
@@ -1682,7 +1682,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
useCudaFFT = false; // We might switch back in the future, once Nvidia has all their bugs worked out
int cufftVersion;
cufftGetVersion(&cufftVersion);
useCudaFFT = (cufftVersion >= 7050); // There was a critical bug in version 7.0
if (useCudaFFT) {
cufftResult result = cufftPlan3d(&fftForward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C);
if (result != CUFFT_SUCCESS)
...
...
@@ -1993,14 +1995,26 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Compute other values.
NonbondedForce::NonbondedMethod method = force.getNonbondedMethod();
if (method == NonbondedForce::Ewald || method == NonbondedForce::PME)
if (nonbondedMethod == Ewald || nonbondedMethod == PME)
ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (
method == NonbondedForce::CutoffPeriodic || method == NonbondedForce::Ewald || method == NonbondedForce::
PME))
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (
nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod ==
PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cu.invalidateMolecules();
}
void CudaCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (cu.getPlatformData().useCpuPme)
cpuPme.getAs<CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = this->alpha;
nx = gridSizeX;
ny = gridSizeY;
nz = gridSizeZ;
}
}
class CudaCustomNonbondedForceInfo : public CudaForceInfo {
public:
CudaCustomNonbondedForceInfo(const CustomNonbondedForce& force) : force(force) {
...
...
@@ -2570,8 +2584,8 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
cutoff = force.getCutoffDistance();
string source = CudaKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params->getDevicePointer()));
;
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce->getDevicePointer()));
;
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params->getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce->getDevicePointer()));
cu.addForce(new CudaGBSAOBCForceInfo(force));
}
...
...
@@ -3638,7 +3652,9 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
map<string, Lepton::CustomFunction*> customFunctions;
customFunctions["periodicdistance"] = cu.getExpressionUtilities().getPeriodicDistancePlaceholder();
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), customFunctions).optimize();
Lepton::ParsedExpression forceExpressionX = energyExpression.differentiate("x").optimize();
Lepton::ParsedExpression forceExpressionY = energyExpression.differentiate("y").optimize();
Lepton::ParsedExpression forceExpressionZ = energyExpression.differentiate("z").optimize();
...
...
@@ -4242,6 +4258,435 @@ void CudaCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& contex
cu.invalidateMolecules();
}
class CudaCustomCentroidBondForceInfo : public CudaForceInfo {
public:
CudaCustomCentroidBondForceInfo(const CustomCentroidBondForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, vector<int>& particles) {
vector<double> parameters;
vector<int> groups;
force.getBondParameters(index, groups, parameters);
for (int i = 0; i < groups.size(); i++) {
vector<int> groupParticles;
vector<double> weights;
force.getGroupParameters(groups[i], groupParticles, weights);
particles.insert(particles.end(), groupParticles.begin(), groupParticles.end());
}
}
bool areGroupsIdentical(int group1, int group2) {
vector<int> groups1, groups2;
vector<double> parameters1, parameters2;
force.getBondParameters(group1, groups1, parameters1);
force.getBondParameters(group2, groups2, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
for (int i = 0; i < groups1.size(); i++) {
vector<int> groupParticles;
vector<double> weights1, weights2;
force.getGroupParameters(groups1[i], groupParticles, weights1);
force.getGroupParameters(groups2[i], groupParticles, weights2);
if (weights1.size() != weights2.size())
return false;
for (int j = 0; j < weights1.size(); j++)
if (weights1[j] != weights2[j])
return false;
}
return true;
}
private:
const CustomCentroidBondForce& force;
};
CudaCalcCustomCentroidBondForceKernel::~CudaCalcCustomCentroidBondForceKernel() {
cu.setAsCurrent();
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (groupParticles != NULL)
delete groupParticles;
if (groupWeights != NULL)
delete groupWeights;
if (groupOffsets != NULL)
delete groupOffsets;
if (groupForces != NULL)
delete groupForces;
if (bondGroups != NULL)
delete bondGroups;
if (centerPositions != NULL)
delete centerPositions;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i];
}
void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
cu.setAsCurrent();
numBonds = force.getNumBonds();
if (numBonds == 0)
return;
cu.addForce(new CudaCustomCentroidBondForceInfo(force));
// Record the groups.
numGroups = force.getNumGroups();
vector<int> groupParticleVec;
vector<float> groupWeightVecFloat;
vector<double> groupWeightVecDouble;
vector<int> groupOffsetVec;
groupOffsetVec.push_back(0);
for (int i = 0; i < numGroups; i++) {
vector<int> particles;
vector<double> weights;
force.getGroupParameters(i, particles, weights);
groupParticleVec.insert(groupParticleVec.end(), particles.begin(), particles.end());
groupOffsetVec.push_back(groupParticleVec.size());
}
vector<vector<double> > normalizedWeights;
CustomCentroidBondForceImpl::computeNormalizedWeights(force, system, normalizedWeights);
if (cu.getUseDoublePrecision()) {
for (int i = 0; i < numGroups; i++)
groupWeightVecDouble.insert(groupWeightVecDouble.end(), normalizedWeights[i].begin(), normalizedWeights[i].end());
}
else {
for (int i = 0; i < numGroups; i++)
for (int j = 0; j < normalizedWeights[i].size(); j++)
groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
}
groupParticles = CudaArray::create<int>(cu, groupParticleVec.size(), "groupParticles");
groupParticles->upload(groupParticleVec);
if (cu.getUseDoublePrecision()) {
groupWeights = CudaArray::create<double>(cu, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecDouble);
centerPositions = CudaArray::create<double4>(cu, numGroups, "centerPositions");
}
else {
groupWeights = CudaArray::create<float>(cu, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecFloat);
centerPositions = CudaArray::create<float4>(cu, numGroups, "centerPositions");
}
groupOffsets = CudaArray::create<int>(cu, groupOffsetVec.size(), "groupOffsets");
groupOffsets->upload(groupOffsetVec);
groupForces = CudaArray::create<long long>(cu, numGroups*3, "groupForces");
cu.addAutoclearBuffer(*groupForces);
// Record the bonds.
int groupsPerBond = force.getNumGroupsPerBond();
vector<int> bondGroupVec(numBonds*groupsPerBond);
params = new CudaParameterSet(cu, force.getNumPerBondParameters(), numBonds, "customCentroidBondParams");
vector<vector<float> > paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
vector<int> groups;
vector<double> parameters;
force.getBondParameters(i, groups, parameters);
for (int j = 0; j < groups.size(); j++)
bondGroupVec[i+j*numBonds] = groups[j];
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
bondGroups = CudaArray::create<int>(cu, bondGroupVec.size(), "bondGroups");
bondGroups->upload(bondGroupVec);
// Record the arguments to the force kernel.
groupForcesArgs.push_back(&groupForces->getDevicePointer());
groupForcesArgs.push_back(NULL); // Energy buffer hasn't been created yet
groupForcesArgs.push_back(¢erPositions->getDevicePointer());
groupForcesArgs.push_back(&bondGroups->getDevicePointer());
// Record the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream extraArgs;
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string arrayName = "table"+cu.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
tabulatedFunctions.back()->upload(f);
extraArgs << ", const float";
if (width > 1)
extraArgs << width;
extraArgs << "* __restrict__ " << arrayName;
groupForcesArgs.push_back(&tabulatedFunctions.back()->getDevicePointer());
}
// Record information about parameters.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
map<string, string> variables;
for (int i = 0; i < groupsPerBond; i++) {
string index = cu.intToString(i+1);
variables["x"+index] = "pos"+index+".x";
variables["y"+index] = "pos"+index+".y";
variables["z"+index] = "pos"+index+".z";
}
for (int i = 0; i < force.getNumPerBondParameters(); i++) {
const string& name = force.getPerBondParameterName(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals->upload(globalParamValues);
extraArgs << ", const float* __restrict__ globals";
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cu.intToString(i)+"]";
variables[name] = value;
}
groupForcesArgs.push_back(&globals->getDevicePointer());
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
// and dihedrals the expression depends on.
map<string, vector<int> > distances;
map<string, vector<int> > angles;
map<string, vector<int> > dihedrals;
Lepton::ParsedExpression energyExpression = CustomCentroidBondForceImpl::prepareExpression(force, functions, distances, angles, dihedrals);
map<string, Lepton::ParsedExpression> forceExpressions;
set<string> computedDeltas;
vector<string> atomNames, posNames;
for (int i = 0; i < groupsPerBond; i++) {
string index = cu.intToString(i+1);
atomNames.push_back("P"+index);
posNames.push_back("pos"+index);
}
stringstream compute;
for (int i = 0; i < groupsPerBond; i++) {
compute<<"int group"<<(i+1)<<" = bondGroups[index+"<<(i*numBonds)<<"];\n";
compute<<"real4 pos"<<(i+1)<<" = centerPositions[group"<<(i+1)<<"];\n";
}
int index = 0;
for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName = atomNames[groups[0]]+atomNames[groups[1]];
if (computedDeltas.count(deltaName) == 0) {
compute<<"real4 delta"<<deltaName<<" = delta("<<posNames[groups[0]]<<", "<<posNames[groups[1]]<<");\n";
computedDeltas.insert(deltaName);
}
compute<<"real r_"<<deltaName<<" = sqrt(delta"<<deltaName<<".w);\n";
variables[iter->first] = "r_"+deltaName;
forceExpressions["real dEdDistance"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[1]]+atomNames[groups[0]];
string deltaName2 = atomNames[groups[1]]+atomNames[groups[2]];
string angleName = "angle_"+atomNames[groups[0]]+atomNames[groups[1]]+atomNames[groups[2]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[groups[1]]<<", "<<posNames[groups[0]]<<");\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[groups[1]]<<", "<<posNames[groups[2]]<<");\n";
computedDeltas.insert(deltaName2);
}
compute<<"real "<<angleName<<" = computeAngle(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
variables[iter->first] = angleName;
forceExpressions["real dEdAngle"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[0]]+atomNames[groups[1]];
string deltaName2 = atomNames[groups[2]]+atomNames[groups[1]];
string deltaName3 = atomNames[groups[2]]+atomNames[groups[3]];
string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
string dihedralName = "dihedral_"+atomNames[groups[0]]+atomNames[groups[1]]+atomNames[groups[2]]+atomNames[groups[3]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[groups[0]]<<", "<<posNames[groups[1]]<<");\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[groups[2]]<<", "<<posNames[groups[1]]<<");\n";
computedDeltas.insert(deltaName2);
}
if (computedDeltas.count(deltaName3) == 0) {
compute<<"real4 delta"<<deltaName3<<" = delta("<<posNames[groups[2]]<<", "<<posNames[groups[3]]<<");\n";
computedDeltas.insert(deltaName3);
}
compute<<"real4 "<<crossName1<<" = computeCross(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
compute<<"real4 "<<crossName2<<" = computeCross(delta"<<deltaName2<<", delta"<<deltaName3<<");\n";
compute<<"real "<<dihedralName<<" = computeAngle("<<crossName1<<", "<<crossName2<<");\n";
compute<<dihedralName<<" *= (delta"<<deltaName1<<".x*"<<crossName2<<".x + delta"<<deltaName1<<".y*"<<crossName2<<".y + delta"<<deltaName1<<".z*"<<crossName2<<".z < 0 ? -1 : 1);\n";
variables[iter->first] = dihedralName;
forceExpressions["real dEdDihedral"+cu.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
// Now evaluate the expressions.
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
extraArgs<<", const "<<buffer.getType()<<"* __restrict__ globalParams"<<i;
compute<<buffer.getType()<<" bondParams"<<(i+1)<<" = globalParams"<<i<<"[index];\n";
groupForcesArgs.push_back(&buffer.getMemory());
}
forceExpressions["energy += "] = energyExpression;
compute << cu.getExpressionUtilities().createExpressions(forceExpressions, variables, functionList, functionDefinitions, "temp");
// Finally, apply forces to groups.
vector<string> forceNames;
for (int i = 0; i < groupsPerBond; i++) {
string istr = cu.intToString(i+1);
string forceName = "force"+istr;
forceNames.push_back(forceName);
compute<<"real3 "<<forceName<<" = make_real3(0);\n";
compute<<"{\n";
Lepton::ParsedExpression forceExpressionX = energyExpression.differentiate("x"+istr).optimize();
Lepton::ParsedExpression forceExpressionY = energyExpression.differentiate("y"+istr).optimize();
Lepton::ParsedExpression forceExpressionZ = energyExpression.differentiate("z"+istr).optimize();
map<string, Lepton::ParsedExpression> expressions;
if (!isZeroExpression(forceExpressionX))
expressions[forceName+".x -= "] = forceExpressionX;
if (!isZeroExpression(forceExpressionY))
expressions[forceName+".y -= "] = forceExpressionY;
if (!isZeroExpression(forceExpressionZ))
expressions[forceName+".z -= "] = forceExpressionZ;
if (expressions.size() > 0)
compute<<cu.getExpressionUtilities().createExpressions(expressions, variables, functionList, functionDefinitions, "coordtemp");
compute<<"}\n";
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName = atomNames[groups[0]]+atomNames[groups[1]];
string value = "(dEdDistance"+cu.intToString(index)+"/r_"+deltaName+")*trim(delta"+deltaName+")";
compute<<forceNames[groups[0]]<<" += "<<"-"<<value<<";\n";
compute<<forceNames[groups[1]]<<" += "<<value<<";\n";
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[1]]+atomNames[groups[0]];
string deltaName2 = atomNames[groups[1]]+atomNames[groups[2]];
compute<<"{\n";
compute<<"real3 crossProd = cross(delta"<<deltaName2<<", delta"<<deltaName1<<");\n";
compute<<"real lengthCross = max(SQRT(dot(crossProd, crossProd)), 1e-6f);\n";
compute<<"real3 deltaCross0 = -cross(trim(delta"<<deltaName1<<"), crossProd)*dEdAngle"<<cu.intToString(index)<<"/(delta"<<deltaName1<<".w*lengthCross);\n";
compute<<"real3 deltaCross2 = cross(trim(delta"<<deltaName2<<"), crossProd)*dEdAngle"<<cu.intToString(index)<<"/(delta"<<deltaName2<<".w*lengthCross);\n";
compute<<"real3 deltaCross1 = -(deltaCross0+deltaCross2);\n";
compute<<forceNames[groups[0]]<<" += deltaCross0;\n";
compute<<forceNames[groups[1]]<<" += deltaCross1;\n";
compute<<forceNames[groups[2]]<<" += deltaCross2;\n";
compute<<"}\n";
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[0]]+atomNames[groups[1]];
string deltaName2 = atomNames[groups[2]]+atomNames[groups[1]];
string deltaName3 = atomNames[groups[2]]+atomNames[groups[3]];
string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
compute<<"{\n";
compute<<"real r = sqrt(delta"<<deltaName2<<".w);\n";
compute<<"real4 ff;\n";
compute<<"ff.x = (-dEdDihedral"<<cu.intToString(index)<<"*r)/"<<crossName1<<".w;\n";
compute<<"ff.y = (delta"<<deltaName1<<".x*delta"<<deltaName2<<".x + delta"<<deltaName1<<".y*delta"<<deltaName2<<".y + delta"<<deltaName1<<".z*delta"<<deltaName2<<".z)/delta"<<deltaName2<<".w;\n";
compute<<"ff.z = (delta"<<deltaName3<<".x*delta"<<deltaName2<<".x + delta"<<deltaName3<<".y*delta"<<deltaName2<<".y + delta"<<deltaName3<<".z*delta"<<deltaName2<<".z)/delta"<<deltaName2<<".w;\n";
compute<<"ff.w = (dEdDihedral"<<cu.intToString(index)<<"*r)/"<<crossName2<<".w;\n";
compute<<"real3 internalF0 = ff.x*trim("<<crossName1<<");\n";
compute<<"real3 internalF3 = ff.w*trim("<<crossName2<<");\n";
compute<<"real3 s = ff.y*internalF0 - ff.z*internalF3;\n";
compute<<forceNames[groups[0]]<<" += internalF0;\n";
compute<<forceNames[groups[1]]<<" += s-internalF0;\n";
compute<<forceNames[groups[2]]<<" += -s-internalF3;\n";
compute<<forceNames[groups[3]]<<" += internalF3;\n";
compute<<"}\n";
}
// Save the forces to global memory.
for (int i = 0; i < groupsPerBond; i++) {
compute<<"atomicAdd(&groupForce[group"<<(i+1)<<"], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".x*0x100000000)));\n";
compute<<"atomicAdd(&groupForce[group"<<(i+1)<<"+NUM_GROUPS], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".y*0x100000000)));\n";
compute<<"atomicAdd(&groupForce[group"<<(i+1)<<"+NUM_GROUPS*2], static_cast<unsigned long long>((long long) (force"<<(i+1)<<".z*0x100000000)));\n";
compute<<"__threadfence_block();\n";
}
map<string, string> replacements;
replacements["M_PI"] = cu.doubleToString(M_PI);
replacements["NUM_GROUPS"] = cu.intToString(numGroups);
replacements["NUM_BONDS"] = cu.intToString(numBonds);
replacements["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
replacements["EXTRA_ARGS"] = extraArgs.str();
replacements["COMPUTE_FORCE"] = compute.str();
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::customCentroidBond, replacements));
computeCentersKernel = cu.getKernel(module, "computeGroupCenters");
groupForcesKernel = cu.getKernel(module, "computeGroupForces");
applyForcesKernel = cu.getKernel(module, "applyForcesToAtoms");
}
double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numBonds == 0)
return 0.0;
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
void* computeCentersArgs[] = {&cu.getPosq().getDevicePointer(), &groupParticles->getDevicePointer(), &groupWeights->getDevicePointer(),
&groupOffsets->getDevicePointer(), ¢erPositions->getDevicePointer()};
cu.executeKernel(computeCentersKernel, computeCentersArgs, CudaContext::TileSize*numGroups);
groupForcesArgs[1] = &cu.getEnergyBuffer().getDevicePointer();
cu.executeKernel(groupForcesKernel, &groupForcesArgs[0], numBonds);
void* applyForcesArgs[] = {&groupParticles->getDevicePointer(), &groupWeights->getDevicePointer(), &groupOffsets->getDevicePointer(),
&groupForces->getDevicePointer(), &cu.getForce().getDevicePointer()};
cu.executeKernel(applyForcesKernel, applyForcesArgs, CudaContext::TileSize*numGroups);
return 0.0;
}
void CudaCalcCustomCentroidBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force) {
cu.setAsCurrent();
if (numBonds != force.getNumBonds())
throw OpenMMException("updateParametersInContext: The number of bonds has changed");
if (numBonds == 0)
return;
// Record the per-bond parameters.
vector<vector<float> > paramVector(numBonds);
vector<int> particles;
vector<double> parameters;
for (int i = 0; i < numBonds; i++) {
force.getBondParameters(i, particles, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cu.invalidateMolecules();
}
class CudaCustomCompoundBondForceInfo : public CudaForceInfo {
public:
CudaCustomCompoundBondForceInfo(const CustomCompoundBondForce& force) : force(force) {
...
...
@@ -4304,7 +4749,6 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream tableArgs;
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
...
...
@@ -4507,7 +4951,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
cu.getBondedUtilities().addInteraction(atoms, compute.str(), force.getForceGroup());
map<string, string> replacements;
replacements["M_PI"] = cu.doubleToString(M_PI);
cu.getBondedUtilities().addPrefixCode(cu.replaceStrings(CudaKernelSources::customCompoundBond, replacements));
;
cu.getBondedUtilities().addPrefixCode(cu.replaceStrings(CudaKernelSources::customCompoundBond, replacements));
}
double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
fd473eea
...
...
@@ -375,32 +375,38 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
bool
rebuild
=
false
;
do
{
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
&
findBlockBoundsArgs
[
0
],
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
forceRebuildNeighborList
=
false
;
if
(
context
.
getComputeForceCount
()
==
1
)
rebuild
=
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
while
(
rebuild
);
lastCutoff
=
kernels
.
cutoffDistance
;
}
void
CudaNonbondedUtilities
::
computeInteractions
(
int
forceGroups
)
{
void
CudaNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
if
((
forceGroups
&
groupFlags
)
==
0
)
return
;
KernelSet
&
kernels
=
groupKernels
[
forceGroups
];
if
(
kernels
.
hasForces
)
{
context
.
executeKernel
(
kernels
.
forceKernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
if
(
context
.
getComputeForceCount
()
==
1
)
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
CUfunction
&
kernel
=
(
includeForces
?
(
includeEnergy
?
kernels
.
forceEnergyKernel
:
kernels
.
forceKernel
)
:
kernels
.
energyKernel
);
if
(
kernel
==
NULL
)
kernel
=
createInteractionKernel
(
kernels
.
source
,
parameters
,
arguments
,
true
,
true
,
forceGroups
,
includeForces
,
includeEnergy
);
context
.
executeKernel
(
kernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
}
void
CudaNonbondedUtilities
::
updateNeighborListSize
()
{
bool
CudaNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
return
;
return
false
;
unsigned
int
*
pinnedInteractionCount
=
(
unsigned
int
*
)
context
.
getPinnedBuffer
();
interactionCount
->
download
(
pinnedInteractionCount
);
if
(
pinnedInteractionCount
[
0
]
<=
(
unsigned
int
)
maxTiles
)
return
;
return
false
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
...
...
@@ -422,6 +428,7 @@ void CudaNonbondedUtilities::updateNeighborListSize() {
forceArgs
[
17
]
=
&
interactingAtoms
->
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
->
getDevicePointer
();
forceRebuildNeighborList
=
true
;
return
true
;
}
void
CudaNonbondedUtilities
::
setUsePadding
(
bool
padding
)
{
...
...
@@ -450,8 +457,8 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
}
kernels
.
hasForces
=
(
source
.
size
()
>
0
);
kernels
.
cutoffDistance
=
cutoff
;
if
(
kernels
.
hasForces
)
kernels
.
forceKernel
=
createInteractionKernel
(
source
,
parameters
,
arguments
,
true
,
true
,
groups
)
;
kernels
.
source
=
source
;
kernels
.
forceKernel
=
kernels
.
energyKernel
=
kernels
.
forceEnergyKernel
=
NULL
;
if
(
useCutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
double
paddedCutoff
=
cutoff
+
padding
;
...
...
@@ -474,7 +481,7 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
groupKernels
[
groups
]
=
kernels
;
}
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
)
{
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
,
bool
includeForces
,
bool
includeEnergy
)
{
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
...
...
@@ -650,6 +657,10 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
if
(
useShuffle
)
defines
[
"ENABLE_SHUFFLE"
]
=
"1"
;
if
(
includeForces
)
defines
[
"INCLUDE_FORCES"
]
=
"1"
;
if
(
includeEnergy
)
defines
[
"INCLUDE_ENERGY"
]
=
"1"
;
defines
[
"THREAD_BLOCK_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
double
maxCutoff
=
0.0
;
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
...
...
platforms/cuda/src/CudaParallelKernels.cpp
View file @
fd473eea
...
...
@@ -623,6 +623,10 @@ void CudaParallelCalcNonbondedForceKernel::copyParametersToContext(ContextImpl&
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
void
CudaParallelCalcNonbondedForceKernel
::
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
dynamic_cast
<
const
CudaCalcNonbondedForceKernel
&>
(
kernels
[
0
].
getImpl
()).
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
class
CudaParallelCalcCustomNonbondedForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcCustomNonbondedForceKernel
&
kernel
,
bool
includeForce
,
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
fd473eea
...
...
@@ -80,6 +80,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory
(
CalcCustomGBForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomExternalForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomHbondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
...
...
platforms/cuda/src/kernels/customCentroidBond.cu
0 → 100644
View file @
fd473eea
/**
* Compute the center of each group.
*/
extern
"C"
__global__
void
computeGroupCenters
(
const
real4
*
__restrict__
posq
,
const
int
*
__restrict__
groupParticles
,
const
real
*
__restrict__
groupWeights
,
const
int
*
__restrict__
groupOffsets
,
real4
*
__restrict__
centerPositions
)
{
__shared__
volatile
real3
temp
[
64
];
for
(
int
group
=
blockIdx
.
x
;
group
<
NUM_GROUPS
;
group
+=
gridDim
.
x
)
{
// The threads in this block work together to compute the center one group.
int
firstIndex
=
groupOffsets
[
group
];
int
lastIndex
=
groupOffsets
[
group
+
1
];
real3
center
=
make_real3
(
0
,
0
,
0
);
for
(
int
index
=
threadIdx
.
x
;
index
<
lastIndex
-
firstIndex
;
index
+=
blockDim
.
x
)
{
int
atom
=
groupParticles
[
firstIndex
+
index
];
real
weight
=
groupWeights
[
firstIndex
+
index
];
real4
pos
=
posq
[
atom
];
center
.
x
+=
weight
*
pos
.
x
;
center
.
y
+=
weight
*
pos
.
y
;
center
.
z
+=
weight
*
pos
.
z
;
}
// Sum the values.
int
thread
=
threadIdx
.
x
;
temp
[
thread
].
x
=
center
.
x
;
temp
[
thread
].
y
=
center
.
y
;
temp
[
thread
].
z
=
center
.
z
;
__syncthreads
();
if
(
thread
<
32
)
{
temp
[
thread
].
x
+=
temp
[
thread
+
32
].
x
;
temp
[
thread
].
y
+=
temp
[
thread
+
32
].
y
;
temp
[
thread
].
z
+=
temp
[
thread
+
32
].
z
;
if
(
thread
<
16
)
{
temp
[
thread
].
x
+=
temp
[
thread
+
16
].
x
;
temp
[
thread
].
y
+=
temp
[
thread
+
16
].
y
;
temp
[
thread
].
z
+=
temp
[
thread
+
16
].
z
;
}
if
(
thread
<
8
)
{
temp
[
thread
].
x
+=
temp
[
thread
+
8
].
x
;
temp
[
thread
].
y
+=
temp
[
thread
+
8
].
y
;
temp
[
thread
].
z
+=
temp
[
thread
+
8
].
z
;
}
if
(
thread
<
4
)
{
temp
[
thread
].
x
+=
temp
[
thread
+
4
].
x
;
temp
[
thread
].
y
+=
temp
[
thread
+
4
].
y
;
temp
[
thread
].
z
+=
temp
[
thread
+
4
].
z
;
}
if
(
thread
<
2
)
{
temp
[
thread
].
x
+=
temp
[
thread
+
2
].
x
;
temp
[
thread
].
y
+=
temp
[
thread
+
2
].
y
;
temp
[
thread
].
z
+=
temp
[
thread
+
2
].
z
;
}
}
if
(
thread
==
0
)
centerPositions
[
group
]
=
make_real4
(
temp
[
0
].
x
+
temp
[
1
].
x
,
temp
[
0
].
y
+
temp
[
1
].
y
,
temp
[
0
].
z
+
temp
[
1
].
z
,
0
);
}
}
/**
* Convert a real4 to a real3 by removing its last element.
*/
inline
__device__
real3
trim
(
real4
v
)
{
return
make_real3
(
v
.
x
,
v
.
y
,
v
.
z
);
}
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
inline
__device__
real4
delta
(
real4
vec1
,
real4
vec2
)
{
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0
);
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
return
result
;
}
/**
* Compute the angle between two vectors. The w component of each vector should contain the squared magnitude.
*/
__device__
real
computeAngle
(
real4
vec1
,
real4
vec2
)
{
real
dotProduct
=
vec1
.
x
*
vec2
.
x
+
vec1
.
y
*
vec2
.
y
+
vec1
.
z
*
vec2
.
z
;
real
cosine
=
dotProduct
*
RSQRT
(
vec1
.
w
*
vec2
.
w
);
real
angle
;
if
(
cosine
>
0.99
f
||
cosine
<
-
0.99
f
)
{
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real3
crossProduct
=
cross
(
vec1
,
vec2
);
real
scale
=
vec1
.
w
*
vec2
.
w
;
angle
=
ASIN
(
SQRT
(
dot
(
crossProduct
,
crossProduct
)
/
scale
));
if
(
cosine
<
0.0
f
)
angle
=
M_PI
-
angle
;
}
else
angle
=
ACOS
(
cosine
);
return
angle
;
}
/**
* Compute the cross product of two vectors, setting the fourth component to the squared magnitude.
*/
inline
__device__
real4
computeCross
(
real4
vec1
,
real4
vec2
)
{
real3
cp
=
cross
(
vec1
,
vec2
);
return
make_real4
(
cp
.
x
,
cp
.
y
,
cp
.
z
,
cp
.
x
*
cp
.
x
+
cp
.
y
*
cp
.
y
+
cp
.
z
*
cp
.
z
);
}
/**
* Compute the forces on groups based on the bonds.
*/
extern
"C"
__global__
void
computeGroupForces
(
unsigned
long
long
*
__restrict__
groupForce
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
centerPositions
,
const
int
*
__restrict__
bondGroups
EXTRA_ARGS
)
{
mixed
energy
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_BONDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
COMPUTE_FORCE
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
/**
* Apply the forces from the group centers to the individual atoms.
*/
extern
"C"
__global__
void
applyForcesToAtoms
(
const
int
*
__restrict__
groupParticles
,
const
real
*
__restrict__
groupWeights
,
const
int
*
__restrict__
groupOffsets
,
const
long
long
*
__restrict__
groupForce
,
unsigned
long
long
*
__restrict__
atomForce
)
{
for
(
int
group
=
blockIdx
.
x
;
group
<
NUM_GROUPS
;
group
+=
gridDim
.
x
)
{
long
long
fx
=
groupForce
[
group
];
long
long
fy
=
groupForce
[
group
+
NUM_GROUPS
];
long
long
fz
=
groupForce
[
group
+
NUM_GROUPS
*
2
];
int
firstIndex
=
groupOffsets
[
group
];
int
lastIndex
=
groupOffsets
[
group
+
1
];
for
(
int
index
=
threadIdx
.
x
;
index
<
lastIndex
-
firstIndex
;
index
+=
blockDim
.
x
)
{
int
atom
=
groupParticles
[
firstIndex
+
index
];
real
weight
=
groupWeights
[
firstIndex
+
index
];
atomicAdd
(
&
atomForce
[
atom
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
fx
*
weight
)));
atomicAdd
(
&
atomForce
[
atom
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
fy
*
weight
)));
atomicAdd
(
&
atomForce
[
atom
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
fz
*
weight
)));
}
}
}
platforms/cuda/src/kernels/customGBEnergyN2.cu
View file @
fd473eea
...
...
@@ -13,7 +13,7 @@ typedef struct {
/**
* Compute a force based on pair interactions.
*/
extern
"C"
__global__
void
computeN2Energy
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
extern
"C"
__global__
void
computeN2Energy
(
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
unsigned
int
*
__restrict__
exclusions
,
const
ushort2
*
__restrict__
exclusionTiles
,
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
...
...
@@ -27,7 +27,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
// First loop: process tiles that contain exclusions.
...
...
platforms/cuda/src/kernels/customGBEnergyPerParticle.cu
View file @
fd473eea
...
...
@@ -2,9 +2,9 @@
* Reduce the derivatives computed in the N^2 energy kernel, and compute all per-particle energy terms.
*/
extern
"C"
__global__
void
computePerParticleEnergy
(
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
extern
"C"
__global__
void
computePerParticleEnergy
(
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
;
mixed
energy
=
0
;
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Load the derivatives
...
...
platforms/cuda/src/kernels/customHbondForce.cu
View file @
fd473eea
...
...
@@ -66,12 +66,12 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
/**
* Compute forces on donors.
*/
extern
"C"
__global__
void
computeDonorForces
(
unsigned
long
long
*
__restrict__
force
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
extern
"C"
__global__
void
computeDonorForces
(
unsigned
long
long
*
__restrict__
force
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
exclusions
,
const
int4
*
__restrict__
donorAtoms
,
const
int4
*
__restrict__
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
PARAMETER_ARGUMENTS
)
{
extern
__shared__
real4
posBuffer
[];
real
energy
=
0
;
mixed
energy
=
0
;
real3
f1
=
make_real3
(
0
);
real3
f2
=
make_real3
(
0
);
real3
f3
=
make_real3
(
0
);
...
...
@@ -155,7 +155,7 @@ extern "C" __global__ void computeDonorForces(unsigned long long* __restrict__ f
/**
* Compute forces on acceptors.
*/
extern
"C"
__global__
void
computeAcceptorForces
(
unsigned
long
long
*
__restrict__
force
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
extern
"C"
__global__
void
computeAcceptorForces
(
unsigned
long
long
*
__restrict__
force
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
exclusions
,
const
int4
*
__restrict__
donorAtoms
,
const
int4
*
__restrict__
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
PARAMETER_ARGUMENTS
)
{
...
...
platforms/cuda/src/kernels/customManyParticle.cu
View file @
fd473eea
...
...
@@ -78,7 +78,7 @@ __constant__ float globals[NUM_GLOBALS];
* Compute the interaction.
*/
extern
"C"
__global__
void
computeInteraction
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
#ifdef USE_CUTOFF
,
const
int
*
__restrict__
neighbors
,
const
int
*
__restrict__
neighborStartIndex
...
...
@@ -90,7 +90,7 @@ extern "C" __global__ void computeInteraction(
,
int
*
__restrict__
exclusions
,
int
*
__restrict__
exclusionStartIndex
#endif
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
.0
f
;
mixed
energy
=
0
;
// Loop over particles to be the first one in the set.
...
...
platforms/cuda/src/kernels/customNonbondedGroups.cu
View file @
fd473eea
...
...
@@ -9,14 +9,14 @@ typedef struct {
}
AtomData
;
extern
"C"
__global__
void
computeInteractionGroups
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
groupData
,
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
groupData
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
// global warpIndex
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
real
energy
=
0
.0
f
;
mixed
energy
=
0
;
__shared__
AtomData
localData
[
LOCAL_MEMORY_SIZE
];
const
unsigned
int
startTile
=
FIRST_TILE
+
warp
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
;
...
...
platforms/cuda/src/kernels/ewald.cu
View file @
fd473eea
...
...
@@ -6,7 +6,7 @@ __device__ real2 multofReal2(real2 a, real2 b) {
* Precompute the cosine and sine sums which appear in each force term.
*/
extern
"C"
__global__
void
calculateEwaldCosSinSums
(
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
real2
*
__restrict__
cosSinSum
,
real4
periodicBoxSize
)
{
extern
"C"
__global__
void
calculateEwaldCosSinSums
(
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
real2
*
__restrict__
cosSinSum
,
real4
periodicBoxSize
)
{
const
unsigned
int
ksizex
=
2
*
KMAX_X
-
1
;
const
unsigned
int
ksizey
=
2
*
KMAX_Y
-
1
;
const
unsigned
int
ksizez
=
2
*
KMAX_Z
-
1
;
...
...
@@ -14,7 +14,7 @@ extern "C" __global__ void calculateEwaldCosSinSums(real* __restrict__ energyBuf
real3
reciprocalBoxSize
=
make_real3
(
2
*
M_PI
/
periodicBoxSize
.
x
,
2
*
M_PI
/
periodicBoxSize
.
y
,
2
*
M_PI
/
periodicBoxSize
.
z
);
real
reciprocalCoefficient
=
ONE_4PI_EPS0
*
4
*
M_PI
/
(
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
real
energy
=
0
;
mixed
energy
=
0
;
while
(
index
<
(
KMAX_Y
-
1
)
*
ksizez
+
KMAX_Z
)
index
+=
blockDim
.
x
*
gridDim
.
x
;
while
(
index
<
totalK
)
{
...
...
platforms/cuda/src/kernels/gbsaObc1.cu
View file @
fd473eea
...
...
@@ -33,9 +33,9 @@ extern "C" __global__ void reduceBornSum(float alpha, float beta, float gamma, c
* Reduce the Born force.
*/
extern
"C"
__global__
void
reduceBornForce
(
long
long
*
__restrict__
bornForce
,
real
*
__restrict__
energyBuffer
,
extern
"C"
__global__
void
reduceBornForce
(
long
long
*
__restrict__
bornForce
,
mixed
*
__restrict__
energyBuffer
,
const
float2
*
__restrict__
params
,
const
real
*
__restrict__
bornRadii
,
const
real
*
__restrict__
obcChain
)
{
real
energy
=
0
;
mixed
energy
=
0
;
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Get summed Born force
...
...
@@ -402,7 +402,7 @@ typedef struct {
*/
extern
"C"
__global__
void
computeGBSAForce1
(
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
global_bornForce
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
global_bornRadii
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
global_bornRadii
,
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
...
...
@@ -415,7 +415,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
__shared__
AtomData2
localData
[
FORCE_WORK_GROUP_SIZE
];
// First loop: process tiles that contain exclusions.
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
fd473eea
...
...
@@ -100,7 +100,7 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
*
*/
extern
"C"
__global__
void
computeNonbonded
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
tileflags
*
__restrict__
exclusions
,
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
tileflags
*
__restrict__
exclusions
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
#ifdef USE_CUTOFF
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
...
...
@@ -112,7 +112,7 @@ extern "C" __global__ void computeNonbonded(
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
// global warpIndex
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
real
energy
=
0
.0
f
;
mixed
energy
=
0
;
// used shared memory if the device cannot shuffle
#ifndef ENABLE_SHUFFLE
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
...
...
@@ -175,6 +175,7 @@ extern "C" __global__ void computeNonbonded(
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
0.5
f
*
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
force
.
x
-=
delta
.
x
*
dEdR
;
force
.
y
-=
delta
.
y
*
dEdR
;
...
...
@@ -184,6 +185,7 @@ extern "C" __global__ void computeNonbonded(
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
#endif
#endif
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
#endif
...
...
@@ -241,6 +243,7 @@ extern "C" __global__ void computeNonbonded(
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
...
...
@@ -270,11 +273,12 @@ extern "C" __global__ void computeNonbonded(
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
#endif
#endif // end USE_SYMMETRIC
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
#endif
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
#endif
// cycles the indices
// 0 1 2 3 4 5 6 7 -> 1 2 3 4 5 6 7 0
...
...
@@ -282,6 +286,7 @@ extern "C" __global__ void computeNonbonded(
}
const
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
// write results for off diagonal tiles
#ifdef INCLUDE_FORCES
#ifdef ENABLE_SHUFFLE
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
shflForce
.
y
*
0x100000000
)));
...
...
@@ -290,13 +295,16 @@ extern "C" __global__ void computeNonbonded(
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
#endif
#endif
}
// Write results for on and off diagonal tiles
#ifdef INCLUDE_FORCES
const
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
z
*
0x100000000
)));
#endif
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
...
...
@@ -441,6 +449,7 @@ extern "C" __global__ void computeNonbonded(
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
...
...
@@ -472,6 +481,7 @@ extern "C" __global__ void computeNonbonded(
#endif // end USE_SYMMETRIC
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
...
...
@@ -509,6 +519,7 @@ extern "C" __global__ void computeNonbonded(
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
...
...
@@ -540,12 +551,14 @@ extern "C" __global__ void computeNonbonded(
#endif // end USE_SYMMETRIC
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
#endif
#endif
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
}
// Write results.
#ifdef INCLUDE_FORCES
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
z
*
0x100000000
)));
...
...
@@ -565,8 +578,11 @@ extern "C" __global__ void computeNonbonded(
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
#endif
}
#endif
}
pos
++
;
}
#ifdef INCLUDE_ENERGY
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
#endif
}
\ No newline at end of file
platforms/cuda/src/kernels/pme.cu
View file @
fd473eea
...
...
@@ -115,7 +115,7 @@ extern "C" __global__ void finishSpreadCharge(long long* __restrict__ originalPm
// convolutes on the halfcomplex_pmeGrid, which is of size NX*NY*(NZ/2+1) as F(Q) is conjugate symmetric
extern
"C"
__global__
void
reciprocalConvolution
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
reciprocalConvolution
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
mixed
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
...
...
@@ -150,14 +150,14 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
extern
"C"
__global__
void
gridEvaluateEnergy
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
gridEvaluateEnergy
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
mixed
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
real
energy
=
0
;
mixed
energy
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
gridSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
// real indices
int
kx
=
index
/
(
GRID_SIZE_Y
*
(
GRID_SIZE_Z
));
...
...
platforms/cuda/staticTarget/CMakeLists.txt
View file @
fd473eea
...
...
@@ -19,7 +19,7 @@ SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE
IF
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-F/Library/Frameworks -framework CUDA"
)
ELSE
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
COMPILE
_FLAGS
}
"
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
LINK
_FLAGS
}
"
)
ENDIF
(
APPLE
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
STATIC_TARGET
}
)
platforms/cuda/tests/CMakeLists.txt
View file @
fd473eea
...
...
@@ -28,7 +28,7 @@ FOREACH(TEST_PROG ${TEST_PROGS})
IF
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-F/Library/Frameworks -framework CUDA"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ELSE
(
APPLE
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
COMPILE
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
SET_TARGET_PROPERTIES
(
${
TEST_ROOT
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
LINK
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
"
)
ENDIF
(
APPLE
)
ADD_TEST
(
${
TEST_ROOT
}
Single
${
EXECUTABLE_OUTPUT_PATH
}
/
${
TEST_ROOT
}
single
)
...
...
platforms/cuda/tests/CudaTests.h
0 → 100644
View file @
fd473eea
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#ifdef WIN32
#define _USE_MATH_DEFINES // Needed to get M_PI
#endif
#include "CudaPlatform.h"
#include <string>
OpenMM
::
CudaPlatform
platform
;
void
initializeTests
(
int
argc
,
char
*
argv
[])
{
if
(
argc
>
1
)
platform
.
setPropertyDefaultValue
(
"CudaPrecision"
,
std
::
string
(
argv
[
1
]));
}
platforms/cuda/tests/TestCudaAndersenThermostat.cpp
View file @
fd473eea
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 20
08-2012
Stanford University and the Authors. *
* Portions copyright (c) 20
15
Stanford University and the Authors.
*
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -29,191 +29,8 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of AndersenThermostat.
*/
#include "CudaTests.h"
#include "TestAndersenThermostat.h"
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/AndersenThermostat.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
CudaPlatform
platform
;
void
testTemperature
()
{
const
int
numParticles
=
8
;
const
double
temp
=
100.0
;
const
double
collisionFreq
=
10.0
;
const
int
numSteps
=
5000
;
System
system
;
VerletIntegrator
integrator
(
0.003
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
2.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
1.0
:
-
1.0
),
1.0
,
5.0
);
}
system
.
addForce
(
forceField
);
AndersenThermostat
*
thermstat
=
new
AndersenThermostat
(
temp
,
collisionFreq
);
system
.
addForce
(
thermstat
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
positions
[
i
]
=
Vec3
((
i
%
2
==
0
?
2
:
-
2
),
(
i
%
4
<
2
?
2
:
-
2
),
(
i
<
4
?
2
:
-
2
));
context
.
setPositions
(
positions
);
context
.
setVelocitiesToTemperature
(
temp
);
// Let it equilibrate.
integrator
.
step
(
10000
);
// Now run it for a while and see if the temperature is correct.
double
ke
=
0.0
;
for
(
int
i
=
0
;
i
<
numSteps
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Energy
);
ke
+=
state
.
getKineticEnergy
();
integrator
.
step
(
10
);
}
ke
/=
numSteps
;
double
expected
=
0.5
*
numParticles
*
3
*
BOLTZ
*
temp
;
ASSERT_USUALLY_EQUAL_TOL
(
expected
,
ke
,
0.1
);
}
void
testConstraints
()
{
const
int
numParticles
=
8
;
const
double
temp
=
100.0
;
const
double
collisionFreq
=
10.0
;
const
int
numSteps
=
15000
;
System
system
;
VerletIntegrator
integrator
(
0.004
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
2.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
1.0
:
-
1.0
),
1.0
,
5.0
);
}
system
.
addForce
(
forceField
);
system
.
addConstraint
(
0
,
1
,
1
);
system
.
addConstraint
(
1
,
2
,
1
);
system
.
addConstraint
(
2
,
3
,
1
);
system
.
addConstraint
(
3
,
0
,
1
);
system
.
addConstraint
(
4
,
5
,
1
);
system
.
addConstraint
(
5
,
6
,
1
);
system
.
addConstraint
(
6
,
7
,
1
);
system
.
addConstraint
(
7
,
4
,
1
);
AndersenThermostat
*
thermstat
=
new
AndersenThermostat
(
temp
,
collisionFreq
);
system
.
addForce
(
thermstat
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
1
,
0
,
0
);
positions
[
2
]
=
Vec3
(
1
,
1
,
0
);
positions
[
3
]
=
Vec3
(
0
,
1
,
0
);
positions
[
4
]
=
Vec3
(
1
,
0
,
1
);
positions
[
5
]
=
Vec3
(
1
,
1
,
1
);
positions
[
6
]
=
Vec3
(
0
,
1
,
1
);
positions
[
7
]
=
Vec3
(
0
,
0
,
1
);
context
.
setPositions
(
positions
);
context
.
setVelocitiesToTemperature
(
temp
);
// Let it equilibrate.
integrator
.
step
(
5000
);
// Now run it for a while and see if the temperature is correct.
double
ke
=
0.0
;
for
(
int
i
=
0
;
i
<
numSteps
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Energy
);
ke
+=
state
.
getKineticEnergy
();
integrator
.
step
(
1
);
}
ke
/=
numSteps
;
double
expected
=
0.5
*
(
numParticles
*
3
-
system
.
getNumConstraints
())
*
BOLTZ
*
temp
;
ASSERT_USUALLY_EQUAL_TOL
(
expected
,
ke
,
0.1
);
}
void
testRandomSeed
()
{
const
int
numParticles
=
8
;
const
double
temp
=
100.0
;
const
double
collisionFreq
=
10.0
;
System
system
;
VerletIntegrator
integrator
(
0.01
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
2.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
1.0
:
-
1.0
),
1.0
,
5.0
);
}
system
.
addForce
(
forceField
);
AndersenThermostat
*
thermostat
=
new
AndersenThermostat
(
temp
,
collisionFreq
);
system
.
addForce
(
thermostat
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
positions
[
i
]
=
Vec3
((
i
%
2
==
0
?
2
:
-
2
),
(
i
%
4
<
2
?
2
:
-
2
),
(
i
<
4
?
2
:
-
2
));
velocities
[
i
]
=
Vec3
(
0
,
0
,
0
);
}
// Try twice with the same random seed.
thermostat
->
setRandomNumberSeed
(
5
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
integrator
.
step
(
10
);
State
state1
=
context
.
getState
(
State
::
Positions
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
integrator
.
step
(
10
);
State
state2
=
context
.
getState
(
State
::
Positions
);
// Try twice with a different random seed.
thermostat
->
setRandomNumberSeed
(
10
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
integrator
.
step
(
10
);
State
state3
=
context
.
getState
(
State
::
Positions
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
integrator
.
step
(
10
);
State
state4
=
context
.
getState
(
State
::
Positions
);
// Compare the results.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
ASSERT
(
state1
.
getPositions
()[
i
][
j
]
==
state2
.
getPositions
()[
i
][
j
]);
ASSERT
(
state3
.
getPositions
()[
i
][
j
]
==
state4
.
getPositions
()[
i
][
j
]);
ASSERT
(
state1
.
getPositions
()[
i
][
j
]
!=
state3
.
getPositions
()[
i
][
j
]);
}
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
argc
>
1
)
platform
.
setPropertyDefaultValue
(
"CudaPrecision"
,
string
(
argv
[
1
]));
testTemperature
();
testConstraints
();
testRandomSeed
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
void
runPlatformTests
()
{
}
platforms/cuda/tests/TestCudaBrownianIntegrator.cpp
View file @
fd473eea
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 20
08-2012
Stanford University and the Authors. *
* Portions copyright (c) 20
15
Stanford University and the Authors.
*
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -29,252 +29,8 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/System.h"
#include "CudaTests.h"
#include "TestBrownianIntegrator.h"
/**
* This tests the CUDA implementation of BrownianIntegrator.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/BrownianIntegrator.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
CudaPlatform
platform
;
void
testSingleBond
()
{
System
system
;
system
.
addParticle
(
2.0
);
system
.
addParticle
(
2.0
);
double
dt
=
0.01
;
BrownianIntegrator
integrator
(
0
,
0.1
,
dt
);
HarmonicBondForce
*
forceField
=
new
HarmonicBondForce
();
forceField
->
addBond
(
0
,
1
,
1.5
,
1
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
-
1
,
0
,
0
);
positions
[
1
]
=
Vec3
(
1
,
0
,
0
);
context
.
setPositions
(
positions
);
// This is simply an overdamped harmonic oscillator, so compare it to the analytical solution.
double
rate
=
2
*
1.0
/
(
0.1
*
2.0
);
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
);
double
time
=
state
.
getTime
();
double
expectedDist
=
1.5
+
0.5
*
std
::
exp
(
-
rate
*
time
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
0.5
*
expectedDist
,
0
,
0
),
state
.
getPositions
()[
0
],
0.02
);
ASSERT_EQUAL_VEC
(
Vec3
(
0.5
*
expectedDist
,
0
,
0
),
state
.
getPositions
()[
1
],
0.02
);
if
(
i
>
0
)
{
double
expectedSpeed
=
-
0.5
*
rate
*
std
::
exp
(
-
rate
*
(
time
-
0.5
*
dt
));
ASSERT_EQUAL_VEC
(
Vec3
(
-
0.5
*
expectedSpeed
,
0
,
0
),
state
.
getVelocities
()[
0
],
0.11
);
ASSERT_EQUAL_VEC
(
Vec3
(
0.5
*
expectedSpeed
,
0
,
0
),
state
.
getVelocities
()[
1
],
0.11
);
}
integrator
.
step
(
1
);
}
}
void
testTemperature
()
{
const
int
numParticles
=
8
;
const
int
numBonds
=
numParticles
-
1
;
const
double
temp
=
10.0
;
System
system
;
BrownianIntegrator
integrator
(
temp
,
2.0
,
0.01
);
HarmonicBondForce
*
forceField
=
new
HarmonicBondForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
system
.
addParticle
(
2.0
);
for
(
int
i
=
0
;
i
<
numBonds
;
++
i
)
forceField
->
addBond
(
i
,
i
+
1
,
1.0
,
5.0
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
positions
[
i
]
=
Vec3
(
i
,
0
,
0
);
context
.
setPositions
(
positions
);
// Let it equilibrate.
integrator
.
step
(
10000
);
// Now run it for a while and see if the temperature is correct.
double
pe
=
0.0
;
const
int
steps
=
50000
;
for
(
int
i
=
0
;
i
<
steps
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Energy
);
pe
+=
state
.
getPotentialEnergy
();
integrator
.
step
(
1
);
}
pe
/=
steps
;
double
expected
=
0.5
*
numBonds
*
BOLTZ
*
temp
;
ASSERT_USUALLY_EQUAL_TOL
(
expected
,
pe
,
0.1
*
expected
);
}
void
testConstraints
()
{
const
int
numParticles
=
8
;
const
int
numConstraints
=
5
;
const
double
temp
=
20.0
;
System
system
;
BrownianIntegrator
integrator
(
temp
,
2.0
,
0.001
);
integrator
.
setConstraintTolerance
(
1e-5
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
10.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
0.2
:
-
0.2
),
0.5
,
5.0
);
}
system
.
addConstraint
(
0
,
1
,
1.0
);
system
.
addConstraint
(
1
,
2
,
1.0
);
system
.
addConstraint
(
2
,
3
,
1.0
);
system
.
addConstraint
(
4
,
5
,
1.0
);
system
.
addConstraint
(
6
,
7
,
1.0
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
positions
[
i
]
=
Vec3
(
i
,
0
,
0
);
velocities
[
i
]
=
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
);
}
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
// Simulate it and see whether the constraints remain satisfied.
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Positions
);
for
(
int
j
=
0
;
j
<
numConstraints
;
++
j
)
{
int
particle1
,
particle2
;
double
distance
;
system
.
getConstraintParameters
(
j
,
particle1
,
particle2
,
distance
);
Vec3
p1
=
state
.
getPositions
()[
particle1
];
Vec3
p2
=
state
.
getPositions
()[
particle2
];
double
dist
=
std
::
sqrt
((
p1
[
0
]
-
p2
[
0
])
*
(
p1
[
0
]
-
p2
[
0
])
+
(
p1
[
1
]
-
p2
[
1
])
*
(
p1
[
1
]
-
p2
[
1
])
+
(
p1
[
2
]
-
p2
[
2
])
*
(
p1
[
2
]
-
p2
[
2
]));
ASSERT_EQUAL_TOL
(
distance
,
dist
,
1e-4
);
}
integrator
.
step
(
1
);
}
}
void
testConstrainedMasslessParticles
()
{
System
system
;
system
.
addParticle
(
0.0
);
system
.
addParticle
(
1.0
);
system
.
addConstraint
(
0
,
1
,
1.5
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
-
1
,
0
,
0
);
positions
[
1
]
=
Vec3
(
1
,
0
,
0
);
BrownianIntegrator
integrator
(
300.0
,
2.0
,
0.01
);
bool
failed
=
false
;
try
{
// This should throw an exception.
Context
context
(
system
,
integrator
,
platform
);
}
catch
(
exception
&
ex
)
{
failed
=
true
;
}
ASSERT
(
failed
);
// Now make both particles massless, which should work.
system
.
setParticleMass
(
1
,
0.0
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
context
.
setVelocitiesToTemperature
(
300.0
);
integrator
.
step
(
1
);
State
state
=
context
.
getState
(
State
::
Velocities
);
ASSERT_EQUAL
(
0.0
,
state
.
getVelocities
()[
0
][
0
]);
}
void
testRandomSeed
()
{
const
int
numParticles
=
8
;
const
double
temp
=
100.0
;
const
double
collisionFreq
=
10.0
;
System
system
;
BrownianIntegrator
integrator
(
temp
,
2.0
,
0.001
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
2.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
1.0
:
-
1.0
),
1.0
,
5.0
);
}
system
.
addForce
(
forceField
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
positions
[
i
]
=
Vec3
((
i
%
2
==
0
?
2
:
-
2
),
(
i
%
4
<
2
?
2
:
-
2
),
(
i
<
4
?
2
:
-
2
));
velocities
[
i
]
=
Vec3
(
0
,
0
,
0
);
}
// Try twice with the same random seed.
integrator
.
setRandomNumberSeed
(
5
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
integrator
.
step
(
10
);
State
state1
=
context
.
getState
(
State
::
Positions
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
integrator
.
step
(
10
);
State
state2
=
context
.
getState
(
State
::
Positions
);
// Try twice with a different random seed.
integrator
.
setRandomNumberSeed
(
10
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
integrator
.
step
(
10
);
State
state3
=
context
.
getState
(
State
::
Positions
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
integrator
.
step
(
10
);
State
state4
=
context
.
getState
(
State
::
Positions
);
// Compare the results.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
ASSERT
(
state1
.
getPositions
()[
i
][
j
]
==
state2
.
getPositions
()[
i
][
j
]);
ASSERT
(
state3
.
getPositions
()[
i
][
j
]
==
state4
.
getPositions
()[
i
][
j
]);
ASSERT
(
state1
.
getPositions
()[
i
][
j
]
!=
state3
.
getPositions
()[
i
][
j
]);
}
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
argc
>
1
)
platform
.
setPropertyDefaultValue
(
"CudaPrecision"
,
string
(
argv
[
1
]));
testSingleBond
();
testTemperature
();
testConstraints
();
testConstrainedMasslessParticles
();
testRandomSeed
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
void
runPlatformTests
()
{
}
platforms/cuda/tests/TestCudaCMAPTorsionForce.cpp
View file @
fd473eea
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c)
2010-
2015 Stanford University and the Authors. *
* Portions copyright (c) 2015 Stanford University and the Authors.
*
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -29,149 +29,8 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of CMAPTorsionForce.
*/
#include "CudaTests.h"
#include "TestCMAPTorsionForce.h"
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/CMAPTorsionForce.h"
#include "openmm/PeriodicTorsionForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
CudaPlatform
platform
;
void
testCMAPTorsions
()
{
const
int
mapSize
=
36
;
// Create two systems: one with a pair of periodic torsions, and one with a CMAP torsion
// that approximates the same force.
System
system1
;
for
(
int
i
=
0
;
i
<
5
;
i
++
)
system1
.
addParticle
(
1.0
);
PeriodicTorsionForce
*
periodic
=
new
PeriodicTorsionForce
();
periodic
->
addTorsion
(
0
,
1
,
2
,
3
,
2
,
M_PI
/
4
,
1.5
);
periodic
->
addTorsion
(
1
,
2
,
3
,
4
,
3
,
M_PI
/
3
,
2.0
);
system1
.
addForce
(
periodic
);
System
system2
;
for
(
int
i
=
0
;
i
<
5
;
i
++
)
system2
.
addParticle
(
1.0
);
CMAPTorsionForce
*
cmap
=
new
CMAPTorsionForce
();
vector
<
double
>
mapEnergy
(
mapSize
*
mapSize
);
for
(
int
i
=
0
;
i
<
mapSize
;
i
++
)
{
double
angle1
=
i
*
2
*
M_PI
/
mapSize
;
double
energy1
=
1.5
*
(
1
+
cos
(
2
*
angle1
-
M_PI
/
4
));
for
(
int
j
=
0
;
j
<
mapSize
;
j
++
)
{
double
angle2
=
j
*
2
*
M_PI
/
mapSize
;
double
energy2
=
2.0
*
(
1
+
cos
(
3
*
angle2
-
M_PI
/
3
));
mapEnergy
[
i
+
j
*
mapSize
]
=
energy1
+
energy2
;
}
}
cmap
->
addMap
(
mapSize
,
mapEnergy
);
cmap
->
addTorsion
(
0
,
0
,
1
,
2
,
3
,
1
,
2
,
3
,
4
);
system2
.
addForce
(
cmap
);
// Set the atoms in various positions, and verify that both systems give equal forces and energy.
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
Vec3
>
positions
(
5
);
VerletIntegrator
integrator1
(
0.01
);
VerletIntegrator
integrator2
(
0.01
);
Context
c1
(
system1
,
integrator1
,
platform
);
Context
c2
(
system2
,
integrator2
,
platform
);
for
(
int
i
=
0
;
i
<
50
;
i
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
positions
.
size
();
j
++
)
positions
[
j
]
=
Vec3
(
5.0
*
genrand_real2
(
sfmt
),
5.0
*
genrand_real2
(
sfmt
),
5.0
*
genrand_real2
(
sfmt
));
c1
.
setPositions
(
positions
);
c2
.
setPositions
(
positions
);
State
s1
=
c1
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
s2
=
c2
.
getState
(
State
::
Forces
|
State
::
Energy
);
for
(
int
i
=
0
;
i
<
system1
.
getNumParticles
();
i
++
)
ASSERT_EQUAL_VEC
(
s1
.
getForces
()[
i
],
s2
.
getForces
()[
i
],
0.05
);
ASSERT_EQUAL_TOL
(
s1
.
getPotentialEnergy
(),
s2
.
getPotentialEnergy
(),
1e-3
);
}
}
void
testChangingParameters
()
{
// Create a system with two maps and one torsion.
const
int
mapSize
=
8
;
System
system
;
for
(
int
i
=
0
;
i
<
5
;
i
++
)
system
.
addParticle
(
1.0
);
CMAPTorsionForce
*
cmap
=
new
CMAPTorsionForce
();
vector
<
double
>
mapEnergy1
(
mapSize
*
mapSize
);
vector
<
double
>
mapEnergy2
(
mapSize
*
mapSize
);
for
(
int
i
=
0
;
i
<
mapSize
;
i
++
)
{
double
angle1
=
i
*
2
*
M_PI
/
mapSize
;
double
energy1
=
cos
(
angle1
);
for
(
int
j
=
0
;
j
<
mapSize
;
j
++
)
{
double
angle2
=
j
*
2
*
M_PI
/
mapSize
;
double
energy2
=
10
*
sin
(
angle2
);
mapEnergy1
[
i
+
j
*
mapSize
]
=
energy1
+
energy2
;
mapEnergy2
[
i
+
j
*
mapSize
]
=
energy1
-
energy2
;
}
}
cmap
->
addMap
(
mapSize
,
mapEnergy1
);
cmap
->
addMap
(
mapSize
,
mapEnergy2
);
cmap
->
addTorsion
(
0
,
0
,
1
,
2
,
3
,
1
,
2
,
3
,
4
);
system
.
addForce
(
cmap
);
// Set particle positions so angle1=0 and angle2=PI/4.
vector
<
Vec3
>
positions
(
5
);
positions
[
0
]
=
Vec3
(
0
,
0
,
1
);
positions
[
1
]
=
Vec3
(
0
,
0
,
0
);
positions
[
2
]
=
Vec3
(
1
,
0
,
0
);
positions
[
3
]
=
Vec3
(
1
,
0
,
1
);
positions
[
4
]
=
Vec3
(
0.5
,
-
0.5
,
1
);
VerletIntegrator
integrator
(
0.01
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
// Check that the energy is correct.
double
energy
=
context
.
getState
(
State
::
Energy
).
getPotentialEnergy
();
ASSERT_EQUAL_TOL
(
1
+
10
*
sin
(
M_PI
/
4
),
energy
,
1e-5
);
// Modify the parameters.
cmap
->
setTorsionParameters
(
0
,
1
,
0
,
1
,
2
,
3
,
1
,
2
,
3
,
4
);
for
(
int
i
=
0
;
i
<
mapSize
*
mapSize
;
i
++
)
mapEnergy2
[
i
]
*=
2.0
;
cmap
->
setMapParameters
(
1
,
mapSize
,
mapEnergy2
);
cmap
->
updateParametersInContext
(
context
);
// See if the results are correct.
energy
=
context
.
getState
(
State
::
Energy
).
getPotentialEnergy
();
ASSERT_EQUAL_TOL
(
2
-
20
*
sin
(
M_PI
/
4
),
energy
,
1e-5
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
argc
>
1
)
platform
.
setPropertyDefaultValue
(
"CudaPrecision"
,
string
(
argv
[
1
]));
testCMAPTorsions
();
testChangingParameters
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
void
runPlatformTests
()
{
}
Prev
1
2
3
4
5
6
7
8
9
10
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment