Commit 1c938ceb authored by Jason Swails's avatar Jason Swails
Browse files

Merge branch 'master' into amber-switching

 Conflicts:
	wrappers/python/simtk/openmm/app/amberprmtopfile.py

In fixing the merge conflict, I went ahead and fixed up the switchDistance logic
to match what I did in CharmmPsfFile.
parents a1113e7b 167ae8a0
...@@ -111,6 +111,26 @@ void CustomNonbondedForceImpl::initialize(ContextImpl& context) { ...@@ -111,6 +111,26 @@ void CustomNonbondedForceImpl::initialize(ContextImpl& context) {
if (cutoff > 0.5*boxVectors[0][0] || cutoff > 0.5*boxVectors[1][1] || cutoff > 0.5*boxVectors[2][2]) if (cutoff > 0.5*boxVectors[0][0] || cutoff > 0.5*boxVectors[1][1] || cutoff > 0.5*boxVectors[2][2])
throw OpenMMException("CustomNonbondedForce: The cutoff distance cannot be greater than half the periodic box size."); throw OpenMMException("CustomNonbondedForce: The cutoff distance cannot be greater than half the periodic box size.");
} }
// Check that all interaction groups only specify particles that have been defined.
for (int group = 0; group < owner.getNumInteractionGroups(); group++) {
set<int> set1, set2;
owner.getInteractionGroupParameters(group, set1, set2);
for (set<int>::iterator it = set1.begin(); it != set1.end(); ++it)
if ((*it < 0) || (*it >= owner.getNumParticles())) {
stringstream msg;
msg << "CustomNonbondedForce: Interaction group " << group << " set1 contains a particle index (" << *it << ") "
<< "not present in system (" << owner.getNumParticles() << " particles).";
throw OpenMMException(msg.str());
}
for (set<int>::iterator it = set2.begin(); it != set2.end(); ++it)
if ((*it < 0) || (*it >= owner.getNumParticles())) {
stringstream msg;
msg << "CustomNonbondedForce: Interaction group " << group << " set2 contains a particle index (" << *it << ") "
<< "not present in system (" << owner.getNumParticles() << " particles).";
throw OpenMMException(msg.str());
}
}
kernel.getAs<CalcCustomNonbondedForceKernel>().initialize(context.getSystem(), owner); kernel.getAs<CalcCustomNonbondedForceKernel>().initialize(context.getSystem(), owner);
} }
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/internal/GBVIForceImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
#include "openmm/kernels.h"
#include <vector>
#include <cmath>
#include <cstdio>
#include <sstream>
using namespace OpenMM;
using std::vector;
GBVIForceImpl::GBVIForceImpl(const GBVIForce& owner) : owner(owner) {
}
void GBVIForceImpl::initialize(ContextImpl& context) {
kernel = context.getPlatform().createKernel(CalcGBVIForceKernel::Name(), context);
if (owner.getNumParticles() != context.getSystem().getNumParticles())
throw OpenMMException("GBVIForce must have exactly as many particles as the System it belongs to.");
const System& system = context.getSystem();
int numberOfParticles = owner.getNumParticles();
int numberOfBonds = owner.getNumBonds();
// load 1-2 atom pairs along w/ bond distance using HarmonicBondForce & constraints
// numberOfBonds < 1, indicating they were not set by the user
if( numberOfBonds < 1 && numberOfParticles > 1 ){
(void) fprintf( stderr, "Warning: no covalent bonds set for GB/VI force!\n" );
// getBondsFromForces( context );
// numberOfBonds = owner.getNumBonds();
}
std::vector< std::vector<int> > bondIndices;
bondIndices.resize( numberOfBonds );
std::vector<double> bondLengths;
bondLengths.resize( numberOfBonds );
for (int i = 0; i < numberOfBonds; i++) {
int particle1, particle2;
double bondLength;
owner.getBondParameters(i, particle1, particle2, bondLength);
if (particle1 < 0 || particle1 >= owner.getNumParticles()) {
std::stringstream msg;
msg << "GBVISoftcoreForce: Illegal particle index: ";
msg << particle1;
throw OpenMMException(msg.str());
}
if (particle2 < 0 || particle2 >= owner.getNumParticles()) {
std::stringstream msg;
msg << "GBVISoftcoreForce: Illegal particle index: ";
msg << particle2;
throw OpenMMException(msg.str());
}
if (bondLength < 0 ) {
std::stringstream msg;
msg << "GBVISoftcoreForce: negative bondlength: ";
msg << bondLength;
throw OpenMMException(msg.str());
}
bondIndices[i].push_back( particle1 );
bondIndices[i].push_back( particle2 );
bondLengths[i] = bondLength;
}
if (owner.getNonbondedMethod() == GBVIForce::CutoffPeriodic) {
Vec3 boxVectors[3];
system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
double cutoff = owner.getCutoffDistance();
if (cutoff > 0.5*boxVectors[0][0] || cutoff > 0.5*boxVectors[1][1] || cutoff > 0.5*boxVectors[2][2])
throw OpenMMException("GBVIForce: The cutoff distance cannot be greater than half the periodic box size.");
}
vector<double> scaledRadii;
scaledRadii.resize(numberOfParticles);
findScaledRadii( numberOfParticles, bondIndices, bondLengths, scaledRadii);
kernel.getAs<CalcGBVIForceKernel>().initialize(context.getSystem(), owner, scaledRadii);
}
/*
int GBVIForceImpl::getBondsFromForces(ContextImpl& context) {
// load 1-2 atom pairs along w/ bond distance using HarmonicBondForce & constraints
const System& system = context.getSystem();
for (int i = 0; i < system.getNumForces(); i++) {
if (dynamic_cast<const HarmonicBondForce*>(&system.getForce(i)) != NULL) {
const HarmonicBondForce& force = dynamic_cast<const HarmonicBondForce&>(system.getForce(i));
for (int j = 0; j < force.getNumBonds(); ++j) {
int particle1, particle2;
double length, k;
force.getBondParameters(j, particle1, particle2, length, k);
owner.addBond( particle1, particle2, length );
}
break;
}
}
// Also treat constrained distances as bonds if mass of one particle is < (2 + epsilon) (~2=deuterium)
for (int j = 0; j < system.getNumConstraints(); j++) {
int particle1, particle2;
double distance;
system.getConstraintParameters(j, particle1, particle2, distance);
double mass1 = system.getParticleMass( particle1 );
double mass2 = system.getParticleMass( particle2 );
if( mass1 < 2.1 || mass2 < 2.1 ){
owner.addBond( particle1, particle2, distance );
}
}
return 0;
}
*/
void GBVIForceImpl::findScaledRadii( int numberOfParticles, const std::vector<std::vector<int> >& bondIndices,
const std::vector<double> & bondLengths, std::vector<double> & scaledRadii) const {
// load 1-2 indicies for each atom
std::vector<std::vector<int> > bonded12(numberOfParticles);
for (int i = 0; i < (int) bondIndices.size(); ++i) {
bonded12[bondIndices[i][0]].push_back(i);
bonded12[bondIndices[i][1]].push_back(i);
}
int errors = 0;
// compute scaled radii (Eq. 5 of Labute paper [JCC 29 p. 1693-1698 2008])
for (int j = 0; j < (int) bonded12.size(); ++j){
double charge;
double gamma;
double radiusJ;
double scaledRadiusJ;
owner.getParticleParameters(j, charge, radiusJ, gamma);
if( bonded12[j].size() == 0 && numberOfParticles > 1 ){
(void) fprintf( stderr, "Warning GBVIForceImpl::findScaledRadii atom %d has no covalent bonds; using atomic radius=%.3f.\n", j, radiusJ );
scaledRadiusJ = radiusJ;
// errors++;
} else {
double rJ2 = radiusJ*radiusJ;
// loop over bonded neighbors of atom j, applying Eq. 5 in Labute
scaledRadiusJ = 0.0;
for (int i = 0; i < (int) bonded12[j].size(); ++i){
int index = bonded12[j][i];
int bondedAtomIndex = (j == bondIndices[index][0]) ? bondIndices[index][1] : bondIndices[index][0];
double radiusI;
owner.getParticleParameters(bondedAtomIndex, charge, radiusI, gamma);
double rI2 = radiusI*radiusI;
double a_ij = (radiusI - bondLengths[index]);
a_ij *= a_ij;
a_ij = (rJ2 - a_ij)/(2.0*bondLengths[index]);
double a_ji = radiusJ - bondLengths[index];
a_ji *= a_ji;
a_ji = (rI2 - a_ji)/(2.0*bondLengths[index]);
scaledRadiusJ += a_ij*a_ij*(3.0*radiusI - a_ij) + a_ji*a_ji*( 3.0*radiusJ - a_ji );
}
scaledRadiusJ = (radiusJ*radiusJ*radiusJ) - 0.125*scaledRadiusJ;
if( scaledRadiusJ > 0.0 ){
scaledRadiusJ = 0.95*pow( scaledRadiusJ, (1.0/3.0) );
} else {
scaledRadiusJ = 0.0;
}
}
scaledRadii[j] = scaledRadiusJ;
}
// abort if errors
if( errors ){
throw OpenMMException("GBVIForceImpl::findScaledRadii errors -- aborting");
}
}
double GBVIForceImpl::calcForcesAndEnergy(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
if ((groups&(1<<owner.getForceGroup())) != 0)
return kernel.getAs<CalcGBVIForceKernel>().execute(context, includeForces, includeEnergy);
return 0.0;
}
std::vector<std::string> GBVIForceImpl::getKernelNames() {
std::vector<std::string> names;
names.push_back(CalcGBVIForceKernel::Name());
return names;
}
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013 Stanford University and the Authors. * * Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -47,3 +47,19 @@ Integrator::~Integrator() { ...@@ -47,3 +47,19 @@ Integrator::~Integrator() {
context->integratorDeleted(); context->integratorDeleted();
} }
} }
double Integrator::getStepSize() const {
return stepSize;
}
void Integrator::setStepSize(double size) {
stepSize = size;
}
double Integrator::getConstraintTolerance() const {
return constraintTol;
}
void Integrator::setConstraintTolerance(double tol) {
constraintTol = tol;
}
...@@ -12,9 +12,11 @@ ...@@ -12,9 +12,11 @@
# libOpenMMCPU_static.a # libOpenMMCPU_static.a
#---------------------------------------------------- #----------------------------------------------------
IF(BUILD_TESTING) SET(OPENMM_BUILD_CPU_TESTS TRUE CACHE BOOL "Whether to build CPU platform test cases")
MARK_AS_ADVANCED(OPENMM_BUILD_CPU_TESTS)
IF(BUILD_TESTING AND OPENMM_BUILD_CPU_TESTS)
SUBDIRS(tests) SUBDIRS(tests)
ENDIF(BUILD_TESTING) ENDIF(BUILD_TESTING AND OPENMM_BUILD_CPU_TESTS)
# The source is organized into subdirectories, but we handle them all from # The source is organized into subdirectories, but we handle them all from
# this CMakeLists file rather than letting CMake visit them as SUBDIRS. # this CMakeLists file rather than letting CMake visit them as SUBDIRS.
......
...@@ -113,12 +113,10 @@ public: ...@@ -113,12 +113,10 @@ public:
} }
/** /**
* Find the index of the first particle in voxel (y,z) whose x coordinate in >= the specified value. * Find the index of the first particle in voxel (y,z) whose x coordinate is >= the specified value.
*/ */
int findLowerBound(int y, int z, double x) const { int findLowerBound(int y, int z, double x, int lower, int upper) const {
const vector<pair<float, int> >& bin = bins[y][z]; const vector<pair<float, int> >& bin = bins[y][z];
int lower = 0;
int upper = bin.size();
while (lower < upper) { while (lower < upper) {
int middle = (lower+upper)/2; int middle = (lower+upper)/2;
if (bin[middle].first < x) if (bin[middle].first < x)
...@@ -130,12 +128,10 @@ public: ...@@ -130,12 +128,10 @@ public:
} }
/** /**
* Find the index of the first particle in voxel (y,z) whose x coordinate in greater than the specified value. * Find the index of the first particle in voxel (y,z) whose x coordinate is greater than the specified value.
*/ */
int findUpperBound(int y, int z, double x) const { int findUpperBound(int y, int z, double x, int lower, int upper) const {
const vector<pair<float, int> >& bin = bins[y][z]; const vector<pair<float, int> >& bin = bins[y][z];
int lower = 0;
int upper = bin.size();
while (lower < upper) { while (lower < upper) {
int middle = (lower+upper)/2; int middle = (lower+upper)/2;
if (bin[middle].first > x) if (bin[middle].first > x)
...@@ -211,7 +207,7 @@ public: ...@@ -211,7 +207,7 @@ public:
// Loop over voxels along the y axis. // Loop over voxels along the y axis.
int boxz = (int) floor((float) z/nz); float boxz = floor((float) z/nz);
int starty = centerVoxelIndex.y-dIndexY; int starty = centerVoxelIndex.y-dIndexY;
int endy = centerVoxelIndex.y+dIndexY; int endy = centerVoxelIndex.y+dIndexY;
float yoffset = (float) (usePeriodic ? boxz*periodicBoxVectors[2][1] : 0); float yoffset = (float) (usePeriodic ? boxz*periodicBoxVectors[2][1] : 0);
...@@ -228,7 +224,7 @@ public: ...@@ -228,7 +224,7 @@ public:
voxelIndex.y = y; voxelIndex.y = y;
if (usePeriodic) if (usePeriodic)
voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y)); voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y));
int boxy = (int) floor((float) y/ny); float boxy = floor((float) y/ny);
float xoffset = (float) (usePeriodic ? boxy*periodicBoxVectors[1][0]+boxz*periodicBoxVectors[2][0] : 0); float xoffset = (float) (usePeriodic ? boxy*periodicBoxVectors[1][0]+boxz*periodicBoxVectors[2][0] : 0);
// Identify the range of atoms within this bin we need to search. When using periodic boundary // Identify the range of atoms within this bin we need to search. When using periodic boundary
...@@ -264,22 +260,25 @@ public: ...@@ -264,22 +260,25 @@ public:
int numRanges; int numRanges;
int rangeStart[2]; int rangeStart[2];
int rangeEnd[2]; int rangeEnd[2];
rangeStart[0] = findLowerBound(voxelIndex.y, voxelIndex.z, minx); int binSize = bins[voxelIndex.y][voxelIndex.z].size();
rangeStart[0] = findLowerBound(voxelIndex.y, voxelIndex.z, minx, 0, binSize);
if (needPeriodic) { if (needPeriodic) {
numRanges = 2; numRanges = 2;
rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx); rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx, rangeStart[0], binSize);
if (rangeStart[0] > 0) { if (rangeStart[0] > 0 && rangeEnd[0] < binSize)
numRanges = 1;
else if (rangeStart[0] > 0) {
rangeStart[1] = 0; rangeStart[1] = 0;
rangeEnd[1] = min(findUpperBound(voxelIndex.y, voxelIndex.z, maxx-periodicBoxSize[0]), rangeStart[0]); rangeEnd[1] = min(findUpperBound(voxelIndex.y, voxelIndex.z, maxx-periodicBoxSize[0], 0, rangeStart[0]), rangeStart[0]);
} }
else { else {
rangeStart[1] = max(findLowerBound(voxelIndex.y, voxelIndex.z, minx+periodicBoxSize[0]), rangeEnd[0]); rangeStart[1] = max(findLowerBound(voxelIndex.y, voxelIndex.z, minx+periodicBoxSize[0], rangeEnd[0], binSize), rangeEnd[0]);
rangeEnd[1] = bins[voxelIndex.y][voxelIndex.z].size(); rangeEnd[1] = bins[voxelIndex.y][voxelIndex.z].size();
} }
} }
else { else {
numRanges = 1; numRanges = 1;
rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx); rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx, rangeStart[0], binSize);
} }
bool periodicRectangular = (needPeriodic && !triclinic); bool periodicRectangular = (needPeriodic && !triclinic);
......
#ifndef OPENMM_GBVIFORCE_PROXY_H_
#define OPENMM_GBVIFORCE_PROXY_H_
/* -------------------------------------------------------------------------- * /* -------------------------------------------------------------------------- *
* OpenMM * * OpenMM *
* -------------------------------------------------------------------------- * * -------------------------------------------------------------------------- *
...@@ -9,7 +6,7 @@ ...@@ -9,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2010 Stanford University and the Authors. * * Portions copyright (c) 2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -32,22 +29,8 @@ ...@@ -32,22 +29,8 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. * * USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "openmm/internal/windowsExport.h" #include "CpuTests.h"
#include "openmm/serialization/SerializationProxy.h" #include "TestCompoundIntegrator.h"
namespace OpenMM {
/**
* This is a proxy for serializing GBVIForce objects.
*/
class OPENMM_EXPORT GBVIForceProxy : public SerializationProxy {
public:
GBVIForceProxy();
void serialize(const void* object, SerializationNode& node) const;
void* deserialize(const SerializationNode& node) const;
};
} // namespace OpenMM
#endif /*OPENMM_GBVIFORCE_PROXY_H_*/ void runPlatformTests() {
}
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <map> #include <map>
#include <queue> #include <queue>
#include <string> #include <string>
#include <utility>
#define __CL_ENABLE_EXCEPTIONS #define __CL_ENABLE_EXCEPTIONS
#ifdef _MSC_VER #ifdef _MSC_VER
// Prevent Windows from defining macros that interfere with other code. // Prevent Windows from defining macros that interfere with other code.
...@@ -538,6 +539,11 @@ public: ...@@ -538,6 +539,11 @@ public:
*/ */
void invalidateMolecules(); void invalidateMolecules();
private: private:
/**
* Compute a sorted list of device indices in decreasing order of desirability
*/
std::vector<int> getDevicePrecedence();
struct Molecule; struct Molecule;
struct MoleculeGroup; struct MoleculeGroup;
class VirtualSiteInfo; class VirtualSiteInfo;
......
...@@ -62,6 +62,14 @@ public: ...@@ -62,6 +62,14 @@ public:
CudaArray& getStepSize() { CudaArray& getStepSize() {
return *stepSize; return *stepSize;
} }
/**
* Set the size to use for the next step.
*/
void setNextStepSize(double size);
/**
* Get the size that was used for the last step.
*/
double getLastStepSize();
/** /**
* Apply constraints to the atom positions. * Apply constraints to the atom positions.
* *
...@@ -154,6 +162,7 @@ private: ...@@ -154,6 +162,7 @@ private:
CudaArray* vsiteLocalCoordsParams; CudaArray* vsiteLocalCoordsParams;
int randomPos; int randomPos;
int lastSeed, numVsites; int lastSeed, numVsites;
double2 lastStepSize;
struct ShakeCluster; struct ShakeCluster;
struct ConstraintOrderer; struct ConstraintOrderer;
}; };
......
...@@ -592,7 +592,7 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel { ...@@ -592,7 +592,7 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public: public:
CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform), CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform),
cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL), cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL),
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL), fft(NULL), pmeio(NULL) { pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), pmeio(NULL) {
} }
~CudaCalcNonbondedForceKernel(); ~CudaCalcNonbondedForceKernel();
/** /**
...@@ -657,6 +657,7 @@ private: ...@@ -657,6 +657,7 @@ private:
CudaArray* pmeBsplineModuliZ; CudaArray* pmeBsplineModuliZ;
CudaArray* pmeAtomRange; CudaArray* pmeAtomRange;
CudaArray* pmeAtomGridIndex; CudaArray* pmeAtomGridIndex;
CudaArray* pmeEnergyBuffer;
CudaSort* sort; CudaSort* sort;
Kernel cpuPme; Kernel cpuPme;
PmeIO* pmeio; PmeIO* pmeio;
...@@ -1123,7 +1124,6 @@ public: ...@@ -1123,7 +1124,6 @@ public:
double computeKineticEnergy(ContextImpl& context, const VerletIntegrator& integrator); double computeKineticEnergy(ContextImpl& context, const VerletIntegrator& integrator);
private: private:
CudaContext& cu; CudaContext& cu;
double prevStepSize;
CUfunction kernel1, kernel2; CUfunction kernel1, kernel2;
}; };
...@@ -1354,7 +1354,7 @@ private: ...@@ -1354,7 +1354,7 @@ private:
void recordChangedParameters(ContextImpl& context); void recordChangedParameters(ContextImpl& context);
bool evaluateCondition(int step); bool evaluateCondition(int step);
CudaContext& cu; CudaContext& cu;
double prevStepSize, energy; double energy;
float energyFloat; float energyFloat;
int numGlobalVariables; int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints; bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints;
......
...@@ -121,7 +121,7 @@ public: ...@@ -121,7 +121,7 @@ public:
ContextImpl* context; ContextImpl* context;
std::vector<CudaContext*> contexts; std::vector<CudaContext*> contexts;
std::vector<double> contextEnergy; std::vector<double> contextEnergy;
bool removeCM, peerAccessSupported, useCpuPme; bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme;
int cmMotionFrequency; int cmMotionFrequency;
int stepCount, computeForceCount; int stepCount, computeForceCount;
double time; double time;
......
...@@ -122,49 +122,48 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -122,49 +122,48 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
CHECK_RESULT(cuDeviceGetCount(&numDevices)); CHECK_RESULT(cuDeviceGetCount(&numDevices));
if (deviceIndex < -1 || deviceIndex >= numDevices) if (deviceIndex < -1 || deviceIndex >= numDevices)
throw OpenMMException("Illegal value for CudaDeviceIndex: "+intToString(deviceIndex)); throw OpenMMException("Illegal value for CudaDeviceIndex: "+intToString(deviceIndex));
vector<int> devicePrecedence;
if (deviceIndex == -1) { if (deviceIndex == -1) {
// Try to figure out which device is the fastest. devicePrecedence = getDevicePrecedence();
} else {
int bestSpeed = -1; devicePrecedence.push_back(deviceIndex);
int bestCompute = -1; }
for (int i = 0; i < numDevices; i++) {
CHECK_RESULT(cuDeviceGet(&device, i)); this->deviceIndex = -1;
int major, minor, clock, multiprocessors; for (int i = 0; i < static_cast<int>(devicePrecedence.size()); i++) {
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device)); int trialDeviceIndex = devicePrecedence[i];
if (major == 1 && minor < 2) CHECK_RESULT(cuDeviceGet(&device, trialDeviceIndex));
continue; // 1.0 and 1.1 are not supported defaultOptimizationOptions = "--use_fast_math";
CHECK_RESULT(cuDeviceGetAttribute(&clock, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device)); unsigned int flags = CU_CTX_MAP_HOST;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device)); if (useBlockingSync)
int speed = clock*multiprocessors; flags += CU_CTX_SCHED_BLOCKING_SYNC;
if (major > bestCompute || (major == bestCompute && speed > bestSpeed)) { else
deviceIndex = i; flags += CU_CTX_SCHED_SPIN;
bestSpeed = speed;
bestCompute = major; if (cuCtxCreate(&context, flags, device) == CUDA_SUCCESS) {
} this->deviceIndex = trialDeviceIndex;
break;
} }
} }
if (deviceIndex == -1) if (this->deviceIndex == -1)
throw OpenMMException("No compatible CUDA device is available"); if (deviceIndex != -1)
CHECK_RESULT(cuDeviceGet(&device, deviceIndex)); throw OpenMMException("The requested CUDA device could not be loaded");
this->deviceIndex = deviceIndex; else
throw OpenMMException("No compatible CUDA device is available");
int major, minor; int major, minor;
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device)); CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
// This is a workaround to support GTX 980 with CUDA 6.5. It reports its compute capability #if __CUDA_API_VERSION < 7000
// as 5.2, but the compiler doesn't support anything beyond 5.0. We can remove this once // This is a workaround to support GTX 980 with CUDA 6.5. It reports
// CUDA 7.0 is released. // its compute capability as 5.2, but the compiler doesn't support
if (major == 5) // anything beyond 5.0.
minor = 0; if (major == 5)
minor = 0;
#endif
gpuArchitecture = intToString(major)+intToString(minor); gpuArchitecture = intToString(major)+intToString(minor);
computeCapability = major+0.1*minor; computeCapability = major+0.1*minor;
if ((useDoublePrecision || useMixedPrecision) && computeCapability < 1.3)
throw OpenMMException("This device does not support double precision");
defaultOptimizationOptions = "--use_fast_math";
unsigned int flags = CU_CTX_MAP_HOST;
if (useBlockingSync)
flags += CU_CTX_SCHED_BLOCKING_SYNC;
else
flags += CU_CTX_SCHED_SPIN;
CHECK_RESULT(cuCtxCreate(&context, flags, device));
contextIsValid = true; contextIsValid = true;
CHECK_RESULT(cuCtxSetCacheConfig(CU_FUNC_CACHE_PREFER_SHARED)); CHECK_RESULT(cuCtxSetCacheConfig(CU_FUNC_CACHE_PREFER_SHARED));
if (contextIndex > 0) { if (contextIndex > 0) {
...@@ -245,9 +244,9 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -245,9 +244,9 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines["ATAN"] = useDoublePrecision ? "atan" : "atanf"; compilationDefines["ATAN"] = useDoublePrecision ? "atan" : "atanf";
compilationDefines["ERF"] = useDoublePrecision ? "erf" : "erff"; compilationDefines["ERF"] = useDoublePrecision ? "erf" : "erff";
compilationDefines["ERFC"] = useDoublePrecision ? "erfc" : "erfcf"; compilationDefines["ERFC"] = useDoublePrecision ? "erfc" : "erfcf";
// Set defines for applying periodic boundary conditions. // Set defines for applying periodic boundary conditions.
Vec3 boxVectors[3]; Vec3 boxVectors[3];
system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]); system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
boxIsTriclinic = (boxVectors[0][1] != 0.0 || boxVectors[0][2] != 0.0 || boxIsTriclinic = (boxVectors[0][1] != 0.0 || boxVectors[0][2] != 0.0 ||
...@@ -307,11 +306,11 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -307,11 +306,11 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
} }
// Create the work thread used for parallelization when running on multiple devices. // Create the work thread used for parallelization when running on multiple devices.
thread = new WorkThread(); thread = new WorkThread();
// Create utilities objects. // Create utilities objects.
bonded = new CudaBondedUtilities(*this); bonded = new CudaBondedUtilities(*this);
nonbonded = new CudaNonbondedUtilities(*this); nonbonded = new CudaNonbondedUtilities(*this);
integration = new CudaIntegrationUtilities(*this, system); integration = new CudaIntegrationUtilities(*this, system);
...@@ -427,7 +426,7 @@ string CudaContext::replaceStrings(const string& input, const std::map<std::stri ...@@ -427,7 +426,7 @@ string CudaContext::replaceStrings(const string& input, const std::map<std::stri
if (index != result.npos) { if (index != result.npos) {
if ((index == 0 || symbolChars.find(result[index-1]) == symbolChars.end()) && (index == result.size()-size || symbolChars.find(result[index+size]) == symbolChars.end())) { if ((index == 0 || symbolChars.find(result[index-1]) == symbolChars.end()) && (index == result.size()-size || symbolChars.find(result[index+size]) == symbolChars.end())) {
// We have found a complete symbol, not part of a longer symbol. // We have found a complete symbol, not part of a longer symbol.
result.replace(index, size, iter->second); result.replace(index, size, iter->second);
index += iter->second.size(); index += iter->second.size();
} }
...@@ -462,11 +461,11 @@ static bool compileInWindows(const string &command) { ...@@ -462,11 +461,11 @@ static bool compileInWindows(const string &command) {
return -1; return -1;
} }
WaitForSingleObject(pi.hProcess, INFINITE); WaitForSingleObject(pi.hProcess, INFINITE);
DWORD exitCode = -1; DWORD exitCode = -1;
if(!GetExitCodeProcess(pi.hProcess, &exitCode)) { if(!GetExitCodeProcess(pi.hProcess, &exitCode)) {
throw(OpenMMException("Could not get nvcc.exe's exit code\n")); throw(OpenMMException("Could not get nvcc.exe's exit code\n"));
} else { } else {
if(exitCode == 0) if(exitCode == 0)
return 0; return 0;
else else
return -1; return -1;
...@@ -522,9 +521,9 @@ CUmodule CudaContext::createModule(const string source, const map<string, string ...@@ -522,9 +521,9 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
if (!defines.empty()) if (!defines.empty())
src << endl; src << endl;
src << source << endl; src << source << endl;
// See whether we already have PTX for this kernel cached. // See whether we already have PTX for this kernel cached.
CSHA1 sha1; CSHA1 sha1;
sha1.Update((const UINT_8*) src.str().c_str(), src.str().size()); sha1.Update((const UINT_8*) src.str().c_str(), src.str().size());
sha1.Final(); sha1.Final();
...@@ -539,9 +538,9 @@ CUmodule CudaContext::createModule(const string source, const map<string, string ...@@ -539,9 +538,9 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
CUmodule module; CUmodule module;
if (cuModuleLoad(&module, cacheFile.str().c_str()) == CUDA_SUCCESS) if (cuModuleLoad(&module, cacheFile.str().c_str()) == CUDA_SUCCESS)
return module; return module;
// Select names for the various temporary files. // Select names for the various temporary files.
stringstream tempFileName; stringstream tempFileName;
tempFileName << "openmmTempKernel" << this; // Include a pointer to this context as part of the filename to avoid collisions. tempFileName << "openmmTempKernel" << this; // Include a pointer to this context as part of the filename to avoid collisions.
#ifdef WIN32 #ifdef WIN32
...@@ -555,12 +554,12 @@ CUmodule CudaContext::createModule(const string source, const map<string, string ...@@ -555,12 +554,12 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
int res = 0; int res = 0;
// If the runtime compiler plugin is available, use it. // If the runtime compiler plugin is available, use it.
if (hasCompilerKernel) { if (hasCompilerKernel) {
string ptx = compilerKernel.getAs<CudaCompilerKernel>().createModule(src.str(), "-arch=compute_"+gpuArchitecture+" "+options, *this); string ptx = compilerKernel.getAs<CudaCompilerKernel>().createModule(src.str(), "-arch=compute_"+gpuArchitecture+" "+options, *this);
// If possible, write the PTX out to a temporary file so we can cache it for later use. // If possible, write the PTX out to a temporary file so we can cache it for later use.
bool wroteCache = false; bool wroteCache = false;
try { try {
ofstream out(outputFile.c_str()); ofstream out(outputFile.c_str());
...@@ -574,7 +573,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string ...@@ -574,7 +573,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
} }
if (!wroteCache) { if (!wroteCache) {
// An error occurred. Possibly we don't have permission to write to the temp directory. Just try to load the module directly. // An error occurred. Possibly we don't have permission to write to the temp directory. Just try to load the module directly.
CHECK_RESULT2(cuModuleLoadDataEx(&module, &ptx[0], 0, NULL, NULL), "Error loading CUDA module"); CHECK_RESULT2(cuModuleLoadDataEx(&module, &ptx[0], 0, NULL, NULL), "Error loading CUDA module");
return module; return module;
} }
...@@ -883,7 +882,7 @@ private: ...@@ -883,7 +882,7 @@ private:
void CudaContext::findMoleculeGroups() { void CudaContext::findMoleculeGroups() {
// The first time this is called, we need to identify all the molecules in the system. // The first time this is called, we need to identify all the molecules in the system.
if (moleculeGroups.size() == 0) { if (moleculeGroups.size() == 0) {
// Add a ForceInfo that makes sure reordering doesn't break virtual sites. // Add a ForceInfo that makes sure reordering doesn't break virtual sites.
...@@ -966,7 +965,7 @@ void CudaContext::findMoleculeGroups() { ...@@ -966,7 +965,7 @@ void CudaContext::findMoleculeGroups() {
if (!forces[k]->areParticlesIdentical(mol.atoms[i], mol2.atoms[i])) if (!forces[k]->areParticlesIdentical(mol.atoms[i], mol2.atoms[i]))
identical = false; identical = false;
} }
// See if the constraints are identical. // See if the constraints are identical.
for (int i = 0; i < (int) mol.constraints.size() && identical; i++) { for (int i = 0; i < (int) mol.constraints.size() && identical; i++) {
...@@ -1047,11 +1046,11 @@ void CudaContext::invalidateMolecules() { ...@@ -1047,11 +1046,11 @@ void CudaContext::invalidateMolecules() {
} }
if (valid) if (valid)
return; return;
// The list of which molecules are identical is no longer valid. We need to restore the // The list of which molecules are identical is no longer valid. We need to restore the
// atoms to their original order, rebuild the list of identical molecules, and sort them // atoms to their original order, rebuild the list of identical molecules, and sort them
// again. // again.
vector<int4> newCellOffsets(numAtoms); vector<int4> newCellOffsets(numAtoms);
if (useDoublePrecision) { if (useDoublePrecision) {
vector<double4> oldPosq(paddedNumAtoms); vector<double4> oldPosq(paddedNumAtoms);
...@@ -1196,6 +1195,8 @@ void CudaContext::reorderAtomsImpl() { ...@@ -1196,6 +1195,8 @@ void CudaContext::reorderAtomsImpl() {
molPos[i].x *= invNumAtoms; molPos[i].x *= invNumAtoms;
molPos[i].y *= invNumAtoms; molPos[i].y *= invNumAtoms;
molPos[i].z *= invNumAtoms; molPos[i].z *= invNumAtoms;
if (molPos[i].x != molPos[i].x)
throw OpenMMException("Particle coordinate is nan");
} }
if (nonbonded->getUsePeriodic()) { if (nonbonded->getUsePeriodic()) {
// Move each molecule position into the same box. // Move each molecule position into the same box.
...@@ -1391,3 +1392,41 @@ void CudaContext::WorkThread::flush() { ...@@ -1391,3 +1392,41 @@ void CudaContext::WorkThread::flush() {
pthread_cond_wait(&queueEmptyCondition, &queueLock); pthread_cond_wait(&queueEmptyCondition, &queueLock);
pthread_mutex_unlock(&queueLock); pthread_mutex_unlock(&queueLock);
} }
vector<int> CudaContext::getDevicePrecedence() {
int numDevices;
CUdevice thisDevice;
string errorMessage = "Error initializing Context";
vector<pair<pair<int, int>, int> > devices;
CHECK_RESULT(cuDeviceGetCount(&numDevices));
for (int i = 0; i < numDevices; i++) {
CHECK_RESULT(cuDeviceGet(&thisDevice, i));
int major, minor, clock, multiprocessors, speed;
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, thisDevice));
if (major == 1 && minor < 2)
continue;
if ((useDoublePrecision || useMixedPrecision) && (major+0.1*minor < 1.3))
continue;
CHECK_RESULT(cuDeviceGetAttribute(&clock, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, thisDevice));
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, thisDevice));
speed = clock*multiprocessors;
pair<int, int> deviceProperties = std::make_pair(major, speed);
devices.push_back(std::make_pair(deviceProperties, -i));
}
// sort first by compute capability (higher is better), then speed
// (higher is better), and finally device index (lower is better)
std::sort(devices.begin(), devices.end());
std::reverse(devices.begin(), devices.end());
vector<int> precedence;
for (int i = 0; i < static_cast<int>(devices.size()); i++) {
precedence.push_back(-devices[i].second);
}
return precedence;
}
...@@ -109,7 +109,8 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -109,7 +109,8 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
} }
out << ");\n"; out << ");\n";
out << "APPLY_PERIODIC_TO_DELTA(periodicDistance_delta)\n"; out << "APPLY_PERIODIC_TO_DELTA(periodicDistance_delta)\n";
out << tempType << " periodicDistance_rinv = RSQRT(periodicDistance_delta.x*periodicDistance_delta.x + periodicDistance_delta.y*periodicDistance_delta.y + periodicDistance_delta.z*periodicDistance_delta.z);\n"; out << tempType << " periodicDistance_r2 = periodicDistance_delta.x*periodicDistance_delta.x + periodicDistance_delta.y*periodicDistance_delta.y + periodicDistance_delta.z*periodicDistance_delta.z;\n";
out << tempType << " periodicDistance_rinv = RSQRT(periodicDistance_r2);\n";
for (int j = 0; j < nodes.size(); j++) { for (int j = 0; j < nodes.size(); j++) {
const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder(); const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
int argIndex = -1; int argIndex = -1;
...@@ -123,17 +124,17 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -123,17 +124,17 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
if (argIndex == -1) if (argIndex == -1)
out << nodeNames[j] << " = RECIP(periodicDistance_rinv);\n"; out << nodeNames[j] << " = RECIP(periodicDistance_rinv);\n";
else if (argIndex == 0) else if (argIndex == 0)
out << nodeNames[j] << " = periodicDistance_delta.x*periodicDistance_rinv;\n"; out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? periodicDistance_delta.x*periodicDistance_rinv : 0);\n";
else if (argIndex == 1) else if (argIndex == 1)
out << nodeNames[j] << " = periodicDistance_delta.y*periodicDistance_rinv;\n"; out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? periodicDistance_delta.y*periodicDistance_rinv : 0);\n";
else if (argIndex == 2) else if (argIndex == 2)
out << nodeNames[j] << " = periodicDistance_delta.z*periodicDistance_rinv;\n"; out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? periodicDistance_delta.z*periodicDistance_rinv : 0);\n";
else if (argIndex == 3) else if (argIndex == 3)
out << nodeNames[j] << " = -periodicDistance_delta.x*periodicDistance_rinv;\n"; out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? -periodicDistance_delta.x*periodicDistance_rinv : 0);\n";
else if (argIndex == 4) else if (argIndex == 4)
out << nodeNames[j] << " = -periodicDistance_delta.y*periodicDistance_rinv;\n"; out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? -periodicDistance_delta.y*periodicDistance_rinv : 0);\n";
else if (argIndex == 5) else if (argIndex == 5)
out << nodeNames[j] << " = -periodicDistance_delta.z*periodicDistance_rinv;\n"; out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? -periodicDistance_delta.z*periodicDistance_rinv : 0);\n";
} }
} }
else { else {
......
...@@ -106,21 +106,21 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S ...@@ -106,21 +106,21 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), vsiteLocalCoordsAtoms(NULL), vsiteLocalCoordsParams(NULL) { vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), vsiteLocalCoordsAtoms(NULL), vsiteLocalCoordsParams(NULL) {
// Create workspace arrays. // Create workspace arrays.
lastStepSize = make_double2(0.0, 0.0);
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) { if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) {
posDelta = CudaArray::create<double4>(context, context.getPaddedNumAtoms(), "posDelta"); posDelta = CudaArray::create<double4>(context, context.getPaddedNumAtoms(), "posDelta");
vector<double4> deltas(posDelta->getSize(), make_double4(0.0, 0.0, 0.0, 0.0)); vector<double4> deltas(posDelta->getSize(), make_double4(0.0, 0.0, 0.0, 0.0));
posDelta->upload(deltas); posDelta->upload(deltas);
stepSize = CudaArray::create<double2>(context, 1, "stepSize"); stepSize = CudaArray::create<double2>(context, 1, "stepSize");
vector<double2> step(1, make_double2(0.0, 0.0)); stepSize->upload(&lastStepSize);
stepSize->upload(step);
} }
else { else {
posDelta = CudaArray::create<float4>(context, context.getPaddedNumAtoms(), "posDelta"); posDelta = CudaArray::create<float4>(context, context.getPaddedNumAtoms(), "posDelta");
vector<float4> deltas(posDelta->getSize(), make_float4(0.0f, 0.0f, 0.0f, 0.0f)); vector<float4> deltas(posDelta->getSize(), make_float4(0.0f, 0.0f, 0.0f, 0.0f));
posDelta->upload(deltas); posDelta->upload(deltas);
stepSize = CudaArray::create<float2>(context, 1, "stepSize"); stepSize = CudaArray::create<float2>(context, 1, "stepSize");
vector<float2> step(1, make_float2(0.0f, 0.0f)); float2 lastStepSizeFloat = make_float2(0.0f, 0.0f);
stepSize->upload(step); stepSize->upload(&lastStepSizeFloat);
} }
// Record the set of constraints and how many constraints each atom is involved in. // Record the set of constraints and how many constraints each atom is involved in.
...@@ -650,6 +650,29 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() { ...@@ -650,6 +650,29 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
delete vsiteLocalCoordsParams; delete vsiteLocalCoordsParams;
} }
void CudaIntegrationUtilities::setNextStepSize(double size) {
if (size != lastStepSize.x || size != lastStepSize.y) {
lastStepSize = make_double2(size, size);
if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
stepSize->upload(&lastStepSize);
else {
float2 lastStepSizeFloat = make_float2((float) size, (float) size);
stepSize->upload(&lastStepSizeFloat);
}
}
}
double CudaIntegrationUtilities::getLastStepSize() {
if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
stepSize->download(&lastStepSize);
else {
float2 lastStepSizeFloat;
stepSize->download(&lastStepSizeFloat);
lastStepSize = make_double2(lastStepSizeFloat.x, lastStepSizeFloat.y);
}
return lastStepSize.y;
}
void CudaIntegrationUtilities::applyConstraints(double tol) { void CudaIntegrationUtilities::applyConstraints(double tol) {
applyConstraints(false, tol); applyConstraints(false, tol);
} }
......
...@@ -1458,16 +1458,24 @@ private: ...@@ -1458,16 +1458,24 @@ private:
class CudaCalcNonbondedForceKernel::SyncStreamPostComputation : public CudaContext::ForcePostComputation { class CudaCalcNonbondedForceKernel::SyncStreamPostComputation : public CudaContext::ForcePostComputation {
public: public:
SyncStreamPostComputation(CudaContext& cu, CUevent event, int forceGroup) : cu(cu), event(event), forceGroup(forceGroup) { SyncStreamPostComputation(CudaContext& cu, CUevent event, CUfunction addEnergyKernel, CudaArray& pmeEnergyBuffer, int forceGroup) : cu(cu), event(event),
addEnergyKernel(addEnergyKernel), pmeEnergyBuffer(pmeEnergyBuffer), forceGroup(forceGroup) {
} }
double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) { double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
if ((groups&(1<<forceGroup)) != 0) if ((groups&(1<<forceGroup)) != 0)
cuStreamWaitEvent(cu.getCurrentStream(), event, 0); cuStreamWaitEvent(cu.getCurrentStream(), event, 0);
if (includeEnergy) {
int bufferSize = pmeEnergyBuffer.getSize();
void* args[] = {&pmeEnergyBuffer.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), &bufferSize};
cu.executeKernel(addEnergyKernel, args, bufferSize);
}
return 0.0; return 0.0;
} }
private: private:
CudaContext& cu; CudaContext& cu;
CUevent event; CUevent event;
CUfunction addEnergyKernel;
CudaArray& pmeEnergyBuffer;
int forceGroup; int forceGroup;
}; };
...@@ -1493,6 +1501,8 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() { ...@@ -1493,6 +1501,8 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
delete pmeAtomRange; delete pmeAtomRange;
if (pmeAtomGridIndex != NULL) if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex; delete pmeAtomGridIndex;
if (pmeEnergyBuffer != NULL)
delete pmeEnergyBuffer;
if (sort != NULL) if (sort != NULL)
delete sort; delete sort;
if (fft != NULL) if (fft != NULL)
...@@ -1681,6 +1691,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1681,6 +1691,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ"); pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange"); pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex"); pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
pmeEnergyBuffer = new CudaArray(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
cu.clearBuffer(*pmeEnergyBuffer);
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms()); sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
int cufftVersion; int cufftVersion;
cufftGetVersion(&cufftVersion); cufftGetVersion(&cufftVersion);
...@@ -1714,7 +1727,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1714,7 +1727,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (recipForceGroup < 0) if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup(); recipForceGroup = force.getForceGroup();
cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup)); cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup));
cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, recipForceGroup)); cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), *pmeEnergyBuffer, recipForceGroup));
} }
hasInitializedFFT = true; hasInitializedFFT = true;
...@@ -1889,7 +1902,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF ...@@ -1889,7 +1902,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
} }
if (includeEnergy) { if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocalPmeGrid->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), void* computeEnergyArgs[] = {&reciprocalPmeGrid->getDevicePointer(), usePmeStream ? &pmeEnergyBuffer->getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(), &pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]}; cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalEnergyKernel, computeEnergyArgs, cu.getNumAtoms()); cu.executeKernel(pmeEvalEnergyKernel, computeEnergyArgs, cu.getNumAtoms());
...@@ -5674,7 +5687,6 @@ void CudaIntegrateVerletStepKernel::initialize(const System& system, const Verle ...@@ -5674,7 +5687,6 @@ void CudaIntegrateVerletStepKernel::initialize(const System& system, const Verle
CUmodule module = cu.createModule(CudaKernelSources::verlet, defines, ""); CUmodule module = cu.createModule(CudaKernelSources::verlet, defines, "");
kernel1 = cu.getKernel(module, "integrateVerletPart1"); kernel1 = cu.getKernel(module, "integrateVerletPart1");
kernel2 = cu.getKernel(module, "integrateVerletPart2"); kernel2 = cu.getKernel(module, "integrateVerletPart2");
prevStepSize = -1.0;
} }
void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIntegrator& integrator) { void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIntegrator& integrator) {
...@@ -5683,19 +5695,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn ...@@ -5683,19 +5695,7 @@ void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIn
int numAtoms = cu.getNumAtoms(); int numAtoms = cu.getNumAtoms();
int paddedNumAtoms = cu.getPaddedNumAtoms(); int paddedNumAtoms = cu.getPaddedNumAtoms();
double dt = integrator.getStepSize(); double dt = integrator.getStepSize();
if (dt != prevStepSize) { cu.getIntegrationUtilities().setNextStepSize(dt);
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
vector<double2> stepSizeVec(1);
stepSizeVec[0] = make_double2(dt, dt);
cu.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
}
else {
vector<float2> stepSizeVec(1);
stepSizeVec[0] = make_float2((float) dt, (float) dt);
cu.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
}
prevStepSize = dt;
}
// Call the first integration kernel. // Call the first integration kernel.
...@@ -5752,6 +5752,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev ...@@ -5752,6 +5752,7 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
double temperature = integrator.getTemperature(); double temperature = integrator.getTemperature();
double friction = integrator.getFriction(); double friction = integrator.getFriction();
double stepSize = integrator.getStepSize(); double stepSize = integrator.getStepSize();
cu.getIntegrationUtilities().setNextStepSize(stepSize);
if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) { if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
// Calculate the integration parameters. // Calculate the integration parameters.
...@@ -5766,8 +5767,6 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev ...@@ -5766,8 +5767,6 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
p[1] = fscale; p[1] = fscale;
p[2] = noisescale; p[2] = noisescale;
params->upload(p); params->upload(p);
double2 ss = make_double2(0, stepSize);
integration.getStepSize().upload(&ss);
} }
else { else {
vector<float> p(params->getSize()); vector<float> p(params->getSize());
...@@ -5775,8 +5774,6 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev ...@@ -5775,8 +5774,6 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
p[1] = (float) fscale; p[1] = (float) fscale;
p[2] = (float) noisescale; p[2] = (float) noisescale;
params->upload(p); params->upload(p);
float2 ss = make_float2(0, (float) stepSize);
integration.getStepSize().upload(&ss);
} }
prevTemp = temperature; prevTemp = temperature;
prevFriction = friction; prevFriction = friction;
...@@ -5929,20 +5926,13 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons ...@@ -5929,20 +5926,13 @@ double CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
// Update the time and step count. // Update the time and step count.
double dt, time; double dt = cu.getIntegrationUtilities().getLastStepSize();
double time = cu.getTime()+dt;
if (useDouble) { if (useDouble) {
double2 stepSize;
cu.getIntegrationUtilities().getStepSize().download(&stepSize);
dt = stepSize.y;
time = cu.getTime()+dt;
if (dt == maxStepSize) if (dt == maxStepSize)
time = maxTime; // Avoid round-off error time = maxTime; // Avoid round-off error
} }
else { else {
float2 stepSize;
cu.getIntegrationUtilities().getStepSize().download(&stepSize);
dt = stepSize.y;
time = cu.getTime()+dt;
if (dt == maxStepSizeFloat) if (dt == maxStepSizeFloat)
time = maxTime; // Avoid round-off error time = maxTime; // Avoid round-off error
} }
...@@ -6023,20 +6013,13 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co ...@@ -6023,20 +6013,13 @@ double CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
// Update the time and step count. // Update the time and step count.
double dt, time; double dt = cu.getIntegrationUtilities().getLastStepSize();
double time = cu.getTime()+dt;
if (useDouble) { if (useDouble) {
double2 stepSize;
cu.getIntegrationUtilities().getStepSize().download(&stepSize);
dt = stepSize.y;
time = cu.getTime()+dt;
if (dt == maxStepSize) if (dt == maxStepSize)
time = maxTime; // Avoid round-off error time = maxTime; // Avoid round-off error
} }
else { else {
float2 stepSize;
cu.getIntegrationUtilities().getStepSize().download(&stepSize);
dt = stepSize.y;
time = cu.getTime()+dt;
if (dt == maxStepSizeFloat) if (dt == maxStepSizeFloat)
time = maxTime; // Avoid round-off error time = maxTime; // Avoid round-off error
} }
...@@ -6139,7 +6122,6 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo ...@@ -6139,7 +6122,6 @@ void CudaIntegrateCustomStepKernel::initialize(const System& system, const Custo
summedValue = new CudaArray(cu, 1, elementSize, "summedValue"); summedValue = new CudaArray(cu, 1, elementSize, "summedValue");
perDofValues = new CudaParameterSet(cu, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables", false, cu.getUseDoublePrecision() || cu.getUseMixedPrecision()); perDofValues = new CudaParameterSet(cu, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables", false, cu.getUseDoublePrecision() || cu.getUseMixedPrecision());
cu.addReorderListener(new ReorderListener(cu, *perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent)); cu.addReorderListener(new ReorderListener(cu, *perDofValues, localPerDofValuesFloat, localPerDofValuesDouble, deviceValuesAreCurrent));
prevStepSize = -1.0;
SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed()); SimTKOpenMMUtilities::setRandomNumberSeed(integrator.getRandomNumberSeed());
} }
...@@ -6553,9 +6535,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context, ...@@ -6553,9 +6535,7 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
} }
localValuesAreCurrent = false; localValuesAreCurrent = false;
double stepSize = integrator.getStepSize(); double stepSize = integrator.getStepSize();
if (stepSize != prevStepSize) { recordGlobalValue(stepSize, GlobalTarget(DT, dtVariableIndex));
recordGlobalValue(stepSize, GlobalTarget(DT, dtVariableIndex));
}
for (int i = 0; i < (int) parameterNames.size(); i++) { for (int i = 0; i < (int) parameterNames.size(); i++) {
double value = context.getParameter(parameterNames[i]); double value = context.getParameter(parameterNames[i]);
if (value != globalValuesDouble[parameterVariableIndex[i]]) { if (value != globalValuesDouble[parameterVariableIndex[i]]) {
...@@ -6760,17 +6740,10 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context, ...@@ -6760,17 +6740,10 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
void CudaIntegrateCustomStepKernel::recordGlobalValue(double value, GlobalTarget target) { void CudaIntegrateCustomStepKernel::recordGlobalValue(double value, GlobalTarget target) {
switch (target.type) { switch (target.type) {
case DT: case DT:
if (value != globalValuesDouble[dtVariableIndex])
deviceGlobalsAreCurrent = false;
globalValuesDouble[dtVariableIndex] = value; globalValuesDouble[dtVariableIndex] = value;
deviceGlobalsAreCurrent = false; cu.getIntegrationUtilities().setNextStepSize(value);
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
double size[] = {0, value};
cu.getIntegrationUtilities().getStepSize().upload(size);
}
else {
float size[] = {0, (float) value};
cu.getIntegrationUtilities().getStepSize().upload(size);
}
prevStepSize = value;
break; break;
case VARIABLE: case VARIABLE:
case PARAMETER: case PARAMETER:
......
...@@ -179,7 +179,7 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const { ...@@ -179,7 +179,7 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
} }
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty, CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0) { const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty) : context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false) {
bool blocking = (blockingProperty == "true"); bool blocking = (blockingProperty == "true");
vector<string> devices; vector<string> devices;
size_t searchPos = 0, nextPos; size_t searchPos = 0, nextPos;
...@@ -247,8 +247,11 @@ CudaPlatform::PlatformData::~PlatformData() { ...@@ -247,8 +247,11 @@ CudaPlatform::PlatformData::~PlatformData() {
} }
void CudaPlatform::PlatformData::initializeContexts(const System& system) { void CudaPlatform::PlatformData::initializeContexts(const System& system) {
if (hasInitializedContexts)
return;
for (int i = 0; i < (int) contexts.size(); i++) for (int i = 0; i < (int) contexts.size(); i++)
contexts[i]->initialize(); contexts[i]->initialize();
hasInitializedContexts = true;
} }
void CudaPlatform::PlatformData::syncContexts() { void CudaPlatform::PlatformData::syncContexts() {
......
...@@ -188,7 +188,7 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__ ...@@ -188,7 +188,7 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
energy += eterm*(grid.x*grid.x + grid.y*grid.y); energy += eterm*(grid.x*grid.x + grid.y*grid.y);
} }
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] = 0.5f*energy;
} }
extern "C" __global__ extern "C" __global__
...@@ -286,3 +286,9 @@ void addForces(const real4* __restrict__ forces, unsigned long long* __restrict_ ...@@ -286,3 +286,9 @@ void addForces(const real4* __restrict__ forces, unsigned long long* __restrict_
forceBuffers[atom+2*PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (f.z*0x100000000)); forceBuffers[atom+2*PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (f.z*0x100000000));
} }
} }
extern "C" __global__
void addEnergy(const mixed* __restrict__ pmeEnergyBuffer, mixed* __restrict__ energyBuffer, int bufferSize) {
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < bufferSize; i += blockDim.x*gridDim.x)
energyBuffer[i] += pmeEnergyBuffer[i];
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestCompoundIntegrator.h"
void runPlatformTests() {
}
...@@ -62,6 +62,14 @@ public: ...@@ -62,6 +62,14 @@ public:
OpenCLArray& getStepSize() { OpenCLArray& getStepSize() {
return *stepSize; return *stepSize;
} }
/**
* Set the size to use for the next step.
*/
void setNextStepSize(double size);
/**
* Get the size that was used for the last step.
*/
double getLastStepSize();
/** /**
* Apply constraints to the atom positions. * Apply constraints to the atom positions.
* *
...@@ -153,6 +161,7 @@ private: ...@@ -153,6 +161,7 @@ private:
int randomPos; int randomPos;
int lastSeed, numVsites; int lastSeed, numVsites;
bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels, ccmaUseDirectBuffer, hasOverlappingVsites; bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels, ccmaUseDirectBuffer, hasOverlappingVsites;
mm_double2 lastStepSize;
struct ShakeCluster; struct ShakeCluster;
struct ConstraintOrderer; struct ConstraintOrderer;
}; };
......
...@@ -570,7 +570,7 @@ public: ...@@ -570,7 +570,7 @@ public:
OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcNonbondedForceKernel(name, platform), OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcNonbondedForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), pmeGrid(NULL), hasInitializedKernel(false), cl(cl), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), pmeGrid(NULL),
pmeGrid2(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeBsplineTheta(NULL), pmeGrid2(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeBsplineTheta(NULL),
pmeAtomRange(NULL), pmeAtomGridIndex(NULL), sort(NULL), fft(NULL), pmeio(NULL) { pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), pmeio(NULL) {
} }
~OpenCLCalcNonbondedForceKernel(); ~OpenCLCalcNonbondedForceKernel();
/** /**
...@@ -636,12 +636,14 @@ private: ...@@ -636,12 +636,14 @@ private:
OpenCLArray* pmeBsplineTheta; OpenCLArray* pmeBsplineTheta;
OpenCLArray* pmeAtomRange; OpenCLArray* pmeAtomRange;
OpenCLArray* pmeAtomGridIndex; OpenCLArray* pmeAtomGridIndex;
OpenCLArray* pmeEnergyBuffer;
OpenCLSort* sort; OpenCLSort* sort;
cl::CommandQueue pmeQueue; cl::CommandQueue pmeQueue;
cl::Event pmeSyncEvent; cl::Event pmeSyncEvent;
OpenCLFFT3D* fft; OpenCLFFT3D* fft;
Kernel cpuPme; Kernel cpuPme;
PmeIO* pmeio; PmeIO* pmeio;
SyncQueuePostComputation* syncQueue;
cl::Kernel ewaldSumsKernel; cl::Kernel ewaldSumsKernel;
cl::Kernel ewaldForcesKernel; cl::Kernel ewaldForcesKernel;
cl::Kernel pmeGridIndexKernel; cl::Kernel pmeGridIndexKernel;
...@@ -1103,7 +1105,6 @@ public: ...@@ -1103,7 +1105,6 @@ public:
double computeKineticEnergy(ContextImpl& context, const VerletIntegrator& integrator); double computeKineticEnergy(ContextImpl& context, const VerletIntegrator& integrator);
private: private:
OpenCLContext& cl; OpenCLContext& cl;
double prevStepSize;
bool hasInitializedKernels; bool hasInitializedKernels;
cl::Kernel kernel1, kernel2; cl::Kernel kernel1, kernel2;
}; };
...@@ -1342,7 +1343,7 @@ private: ...@@ -1342,7 +1343,7 @@ private:
void recordChangedParameters(ContextImpl& context); void recordChangedParameters(ContextImpl& context);
bool evaluateCondition(int step); bool evaluateCondition(int step);
OpenCLContext& cl; OpenCLContext& cl;
double prevStepSize, energy; double energy;
float energyFloat; float energyFloat;
int numGlobalVariables; int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints; bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints;
......
...@@ -106,7 +106,7 @@ public: ...@@ -106,7 +106,7 @@ public:
ContextImpl* context; ContextImpl* context;
std::vector<OpenCLContext*> contexts; std::vector<OpenCLContext*> contexts;
std::vector<double> contextEnergy; std::vector<double> contextEnergy;
bool removeCM, useCpuPme; bool hasInitializedContexts, removeCM, useCpuPme;
int cmMotionFrequency; int cmMotionFrequency;
int stepCount, computeForceCount; int stepCount, computeForceCount;
double time; double time;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment