Unverified Commit edbc8407 authored by peastman's avatar peastman Committed by GitHub
Browse files

Common compute framework to unify CUDA and OpenCL code (#2488)

* Began creating common compute framework to unify code between CUDA and OpenCL

* Began OpenCL implementation of common compute framework

* Common implementation of CMMotionRemover

* CUDA implementation of common compute interface

* Converted HarmonicBondForce to common compute API

* Converted standard bonded forces to common compute API

* Converted ExpressionUtilities to common compute API

* Created ComputeParameterSet

* Converted custom bonded forces to common compute API

* Converted CustomCentroidBondForce to common compute API

* Converted CustomManyParticleForce to common compute API

* Moved lots of duplicate code from CudaContext and OpenCLContext to ComputeContext

* Converted GayBerneForce to common compute API

* Removed obsolete kernels

* Converted verlet integrators to common compute API

* Converted Langevin and Brownian integrators to common compute API

* Converted CustomIntegrator to common compute API

* Converted CustomNonbondedForce to common compute API

* Removed uses of a deprecated API

* Fixed failing test cases

* Converted GBSAOBCForce to common compute API

* Began converting CustomGBForce to common compute API

* Finished converting CustomGBForce to common compute API

* Merged duplicated code in CudaIntegrationUtilities and OpenCLIntegrationUtilities

* Converted RMSDForce and AndersenThermostat to common compute API

* Converted CustomHbondForce to common compute API

* Merged scripts for encoding kernel sources

* Converted Drude plugin to common compute API

* Fixed errors in CMake scripts

* Attempt at fixing errors on Windows

* Added discussion of common compute API to developer guide

* Added Windows export macro for common classes

* Fixed error in CMMotionRemover

* Ubdated travis to newer Ubuntu version

* Fixed errors on CPU OpenCL

* Fixed Windows linking errors

* Added missing pragma for 32 bit atomics

* Replaced long long with mm_long

* More fixes to Windows linking

* Bug fix
parent 38beeefe
#ifndef OPENMM_NONBONDEDUTILITIES_H_
#define OPENMM_NONBONDEDUTILITIES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/common/ArrayInterface.h"
#include "openmm/common/ComputeParameterInfo.h"
#include <string>
#include <vector>
namespace OpenMM {
/**
* This class provides a generic interface for calculating nonbonded interactions. Clients only need
* to provide the code for evaluating a single interaction and the list of parameters it depends on.
* A complete kernel is then synthesized using an appropriate algorithm to evaluate all interactions on
* all atoms. Call addInteraction() to define a nonbonded interaction, and addParameter() to define
* per-particle parameters that the interaction depends on.
*
* During each force or energy evaluation, the following sequence of steps takes place:
*
* 1. Data structures (e.g. neighbor lists) are calculated to allow nonbonded interactions to be evaluated
* quickly.
*
* 2. calcForcesAndEnergy() is called on each ForceImpl in the System.
*
* 3. Finally, the default interaction kernel is invoked to calculate all interactions that were added
* to it.
*
* This sequence means that the default interaction kernel may depend on quantities that were calculated
* by ForceImpls during calcForcesAndEnergy().
*/
class OPENMM_EXPORT_COMMON NonbondedUtilities {
public:
virtual ~NonbondedUtilities() {
}
/**
* Add a nonbonded interaction to be evaluated by the default interaction kernel.
*
* @param usesCutoff specifies whether a cutoff should be applied to this interaction
* @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction
* @param usesExclusions specifies whether this interaction uses exclusions. If this is true, it must have identical exclusions to every other interaction.
* @param cutoffDistance the cutoff distance for this interaction (ignored if usesCutoff is false)
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param kernel the code to evaluate the interaction
* @param forceGroup the force group in which the interaction should be calculated
*/
virtual void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup) = 0;
/**
* Add a per-atom parameter that the default interaction kernel may depend on.
*/
virtual void addParameter(ComputeParameterInfo parameter) = 0;
/**
* Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel.
*/
virtual void addArgument(ComputeParameterInfo parameter) = 0;
/**
* Register that the interaction kernel will be computing the derivative of the potential energy
* with respect to a parameter.
*
* @param param the name of the parameter
* @return the variable that will be used to accumulate the derivative. Any code you pass to addInteraction() should
* add its contributions to this variable.
*/
virtual std::string addEnergyParameterDerivative(const std::string& param) = 0;
/**
* Get the number of force buffers required for nonbonded forces.
*/
virtual int getNumForceBuffers() const = 0;
/**
* Get whether a cutoff is being used.
*/
virtual bool getUseCutoff() = 0;
/**
* Get whether periodic boundary conditions are being used.
*/
virtual bool getUsePeriodic() = 0;
/**
* Get the number of thread blocks used for computing nonbonded forces.
*/
virtual int getNumForceThreadBlocks() = 0;
/**
* Get the size of each thread block used for computing nonbonded forces.
*/
virtual int getForceThreadBlockSize() = 0;
/**
* Get the maximum cutoff distance used by any interaction.
*/
virtual double getMaxCutoffDistance() = 0;
/**
* Given a nonbonded cutoff, get the padded cutoff distance used in computing
* the neighbor list.
*/
virtual double padCutoff(double cutoff) = 0;
/**
* Get the array containing the center of each atom block.
*/
virtual ArrayInterface& getBlockCenters() = 0;
/**
* Get the array containing the dimensions of each atom block.
*/
virtual ArrayInterface& getBlockBoundingBoxes() = 0;
/**
* Get the array whose first element contains the number of tiles with interactions.
*/
virtual ArrayInterface& getInteractionCount() = 0;
/**
* Get the array containing tiles with interactions.
*/
virtual ArrayInterface& getInteractingTiles() = 0;
/**
* Get the array containing the atoms in each tile with interactions.
*/
virtual ArrayInterface& getInteractingAtoms() = 0;
/**
* Get the array containing exclusion flags.
*/
virtual ArrayInterface& getExclusions() = 0;
/**
* Get the array containing tiles with exclusions.
*/
virtual ArrayInterface& getExclusionTiles() = 0;
/**
* Get the array containing the index into the exclusion array for each tile.
*/
virtual ArrayInterface& getExclusionIndices() = 0;
/**
* Get the array listing where the exclusion data starts for each row.
*/
virtual ArrayInterface& getExclusionRowIndices() = 0;
/**
* Get the array containing a flag for whether the neighbor list was rebuilt
* on the most recent call to prepareInteractions().
*/
virtual ArrayInterface& getRebuildNeighborList() = 0;
};
} // namespace OpenMM
#endif /*OPENMM_NONBONDEDUTILITIES_H_*/
#ifndef OPENMM_WINDOWSEXPORTOPENCL_H_
#define OPENMM_WINDOWSEXPORTOPENCL_H_
/*
* Shared libraries are messy in Visual Studio. We have to distinguish three
* cases:
* (1) this header is being used to build the OpenMM shared library
* (dllexport)
* (2) this header is being used by a *client* of the OpenMM shared
* library (dllimport)
* (3) we are building the OpenMM static library, or the client is
* being compiled with the expectation of linking with the
* OpenMM static library (nothing special needed)
* In the CMake script for building this library, we define one of the symbols
* OPENMM_OPENCL_BUILDING_{SHARED|STATIC}_LIBRARY
* Client code normally has no special symbol defined, in which case we'll
* assume it wants to use the shared library. However, if the client defines
* the symbol OPENMM_USE_STATIC_LIBRARIES we'll suppress the dllimport so
* that the client code can be linked with static libraries. Note that
* the client symbol is not library dependent, while the library symbols
* affect only the OpenMM library, meaning that other libraries can
* be clients of this one. However, we are assuming all-static or all-shared.
*/
#ifdef _MSC_VER
// We don't want to hear about how sprintf is "unsafe".
#pragma warning(disable:4996)
// Keep MS VC++ quiet about lack of dll export of private members.
#pragma warning(disable:4251)
#if defined(OPENMM_OPENCL_BUILDING_SHARED_LIBRARY)
#define OPENMM_EXPORT_OPENCL __declspec(dllexport)
#elif defined(OPENMM_OPENCL_BUILDING_STATIC_LIBRARY) || defined(OPENMM_OPENCL_USE_STATIC_LIBRARIES)
#define OPENMM_EXPORT_OPENCL
#else
#define OPENMM_EXPORT_OPENCL __declspec(dllimport) // i.e., a client of a shared library
#endif
#else
#define OPENMM_EXPORT_OPENCL // Linux, Mac
#endif
#endif // OPENMM_WINDOWSEXPORTOPENCL_H_
#ifndef OPENMM_WINDOWSEXPORTCOMMON_H_
#define OPENMM_WINDOWSEXPORTCOMMON_H_
/*
* Shared libraries are messy in Visual Studio. We have to distinguish three
* cases:
* (1) this header is being used to build the OpenMM shared library
* (dllexport)
* (2) this header is being used by a *client* of the OpenMM shared
* library (dllimport)
* (3) we are building the OpenMM static library, or the client is
* being compiled with the expectation of linking with the
* OpenMM static library (nothing special needed)
* In the CMake script for building this library, we define one of the symbols
* OPENMM_COMMON_BUILDING_{SHARED|STATIC}_LIBRARY
* Client code normally has no special symbol defined, in which case we'll
* assume it wants to use the shared library. However, if the client defines
* the symbol OPENMM_USE_STATIC_LIBRARIES we'll suppress the dllimport so
* that the client code can be linked with static libraries. Note that
* the client symbol is not library dependent, while the library symbols
* affect only the OpenMM library, meaning that other libraries can
* be clients of this one. However, we are assuming all-static or all-shared.
*/
#ifdef _MSC_VER
// We don't want to hear about how sprintf is "unsafe".
#pragma warning(disable:4996)
// Keep MS VC++ quiet about lack of dll export of private members.
#pragma warning(disable:4251)
#if defined(OPENMM_COMMON_BUILDING_SHARED_LIBRARY)
#define OPENMM_EXPORT_COMMON __declspec(dllexport)
#elif defined(OPENMM_COMMON_BUILDING_STATIC_LIBRARY) || defined(OPENMM_COMMON_USE_STATIC_LIBRARIES)
#define OPENMM_EXPORT_COMMON
#else
#define OPENMM_EXPORT_COMMON __declspec(dllimport) // i.e., a client of a shared library
#endif
#else
#define OPENMM_EXPORT_COMMON // Linux, Mac
#endif
#endif // OPENMM_WINDOWSEXPORTCOMMON_H_
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors. *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -24,7 +24,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLDrudeKernelSources.h"
#include "CommonKernelSources.h"
using namespace OpenMM;
using namespace std;
......
#ifndef OPENMM_OPENCLDRUDEKERNELSOURCES_H_
#define OPENMM_OPENCLDRUDEKERNELSOURCES_H_
#ifndef OPENMM_COMMONKERNELSOURCES_H_
#define OPENMM_COMMONKERNELSOURCES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2010 Stanford University and the Authors. *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -27,21 +27,22 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/common/windowsExportCommon.h"
#include <string>
namespace OpenMM {
/**
* This class is a central holding place for the source code of OpenCL kernels.
* The CMake build script inserts declarations into it based on the .cu files in the
* This class is a central holding place for the source code of common kernels.
* The CMake build script inserts declarations into it based on the .cc files in the
* kernels subfolder.
*/
class OpenCLDrudeKernelSources {
class OPENMM_EXPORT_COMMON CommonKernelSources {
public:
@CL_FILE_DECLARATIONS@
@KERNEL_FILE_DECLARATIONS@
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLDRUDEKERNELSOURCES_H_*/
#endif /*OPENMM_COMMONKERNELSOURCES_H_*/
This source diff could not be displayed because it is too large. You can view the blob instead.
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/common/ComputeArray.h"
#include "openmm/common/ComputeContext.h"
using namespace OpenMM;
ComputeArray::ComputeArray() : impl(NULL) {
}
ComputeArray::~ComputeArray() {
if (impl != NULL)
delete impl;
}
ArrayInterface& ComputeArray::getArray() {
if (impl == NULL)
throw OpenMMException("ComputeArray has not been initialized");
return *impl;
}
void ComputeArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) {
if (impl != NULL)
throw OpenMMException("The array "+getName()+" has already been initialized");
impl = context.createArray();
impl->initialize(context, size, elementSize, name);
}
void ComputeArray::resize(int size) {
if (impl == NULL)
throw OpenMMException("ComputeArray has not been initialized");
impl->resize(size);
}
bool ComputeArray::isInitialized() const {
return (impl != NULL);
}
int ComputeArray::getSize() const {
if (impl == NULL)
throw OpenMMException("ComputeArray has not been initialized");
return impl->getSize();
}
int ComputeArray::getElementSize() const {
if (impl == NULL)
throw OpenMMException("ComputeArray has not been initialized");
return impl->getElementSize();
}
const std::string& ComputeArray::getName() const {
if (impl == NULL)
throw OpenMMException("ComputeArray has not been initialized");
return impl->getName();
}
ComputeContext& ComputeArray::getContext() {
if (impl == NULL)
throw OpenMMException("ComputeArray has not been initialized");
return impl->getContext();
}
void ComputeArray::upload(const void* data, bool blocking) {
if (impl == NULL)
throw OpenMMException("ComputeArray has not been initialized");
impl->upload(data, blocking);
}
void ComputeArray::download(void* data, bool blocking) const {
if (impl == NULL)
throw OpenMMException("ComputeArray has not been initialized");
impl->download(data, blocking);
}
void ComputeArray::copyTo(ArrayInterface& dest) const {
if (impl == NULL)
throw OpenMMException("ComputeArray has not been initialized");
impl->copyTo(dest);
}
\ No newline at end of file
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/common/ComputeContext.h"
#include "openmm/System.h"
#include "openmm/VirtualSite.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/ThreadPool.h"
#include "hilbert.h"
#include <algorithm>
#include <cmath>
#include <set>
#include <sstream>
#include <utility>
using namespace OpenMM;
using namespace std;
ComputeContext::ComputeContext(const System& system) : system(system), time(0.0), stepCount(0), computeForceCount(0), stepsSinceReorder(99999),
atomsWereReordered(false), forcesValid(false), thread(NULL) {
thread = new WorkThread();
}
ComputeContext::~ComputeContext() {
if (thread != NULL)
delete thread;
}
void ComputeContext::addForce(ComputeForceInfo* force) {
forces.push_back(force);
}
string ComputeContext::replaceStrings(const string& input, const std::map<std::string, std::string>& replacements) const {
static set<char> symbolChars;
if (symbolChars.size() == 0) {
symbolChars.insert('_');
for (char c = 'a'; c <= 'z'; c++)
symbolChars.insert(c);
for (char c = 'A'; c <= 'Z'; c++)
symbolChars.insert(c);
for (char c = '0'; c <= '9'; c++)
symbolChars.insert(c);
}
string result = input;
for (auto& pair : replacements) {
int index = 0;
int size = pair.first.size();
do {
index = result.find(pair.first, index);
if (index != result.npos) {
if ((index == 0 || symbolChars.find(result[index-1]) == symbolChars.end()) && (index == result.size()-size || symbolChars.find(result[index+size]) == symbolChars.end())) {
// We have found a complete symbol, not part of a longer symbol.
result.replace(index, size, pair.second);
index += pair.second.size();
}
else
index++;
}
} while (index != result.npos);
}
return result;
}
string ComputeContext::doubleToString(double value) const {
stringstream s;
s.precision(getUseDoublePrecision() ? 16 : 8);
s << scientific << value;
if (!getUseDoublePrecision())
s << "f";
return s.str();
}
string ComputeContext::intToString(int value) const {
stringstream s;
s << value;
return s.str();
}
/**
* This class ensures that atom reordering doesn't break virtual sites.
*/
class ComputeContext::VirtualSiteInfo : public ComputeForceInfo {
public:
VirtualSiteInfo(const System& system) {
for (int i = 0; i < system.getNumParticles(); i++) {
if (system.isVirtualSite(i)) {
const VirtualSite& vsite = system.getVirtualSite(i);
siteTypes.push_back(&typeid(vsite));
vector<int> particles;
particles.push_back(i);
for (int j = 0; j < vsite.getNumParticles(); j++)
particles.push_back(vsite.getParticle(j));
siteParticles.push_back(particles);
vector<double> weights;
if (dynamic_cast<const TwoParticleAverageSite*>(&vsite) != NULL) {
// A two particle average.
const TwoParticleAverageSite& site = dynamic_cast<const TwoParticleAverageSite&>(vsite);
weights.push_back(site.getWeight(0));
weights.push_back(site.getWeight(1));
}
else if (dynamic_cast<const ThreeParticleAverageSite*>(&vsite) != NULL) {
// A three particle average.
const ThreeParticleAverageSite& site = dynamic_cast<const ThreeParticleAverageSite&>(vsite);
weights.push_back(site.getWeight(0));
weights.push_back(site.getWeight(1));
weights.push_back(site.getWeight(2));
}
else if (dynamic_cast<const OutOfPlaneSite*>(&vsite) != NULL) {
// An out of plane site.
const OutOfPlaneSite& site = dynamic_cast<const OutOfPlaneSite&>(vsite);
weights.push_back(site.getWeight12());
weights.push_back(site.getWeight13());
weights.push_back(site.getWeightCross());
}
siteWeights.push_back(weights);
}
}
}
int getNumParticleGroups() {
return siteTypes.size();
}
void getParticlesInGroup(int index, std::vector<int>& particles) {
particles = siteParticles[index];
}
bool areGroupsIdentical(int group1, int group2) {
if (siteTypes[group1] != siteTypes[group2])
return false;
int numParticles = siteWeights[group1].size();
if (siteWeights[group2].size() != numParticles)
return false;
for (int i = 0; i < numParticles; i++)
if (siteWeights[group1][i] != siteWeights[group2][i])
return false;
return true;
}
private:
vector<const type_info*> siteTypes;
vector<vector<int> > siteParticles;
vector<vector<double> > siteWeights;
};
void ComputeContext::findMoleculeGroups() {
// The first time this is called, we need to identify all the molecules in the system.
if (moleculeGroups.size() == 0) {
// Add a ForceInfo that makes sure reordering doesn't break virtual sites.
addForce(new VirtualSiteInfo(system));
// First make a list of every other atom to which each atom is connect by a constraint or force group.
vector<vector<int> > atomBonds(system.getNumParticles());
for (int i = 0; i < system.getNumConstraints(); i++) {
int particle1, particle2;
double distance;
system.getConstraintParameters(i, particle1, particle2, distance);
atomBonds[particle1].push_back(particle2);
atomBonds[particle2].push_back(particle1);
}
for (auto force : forces) {
for (int j = 0; j < force->getNumParticleGroups(); j++) {
vector<int> particles;
force->getParticlesInGroup(j, particles);
for (int k = 0; k < (int) particles.size(); k++)
for (int m = 0; m < (int) particles.size(); m++)
if (k != m)
atomBonds[particles[k]].push_back(particles[m]);
}
}
// Now identify atoms by which molecule they belong to.
vector<vector<int> > atomIndices = ContextImpl::findMolecules(numAtoms, atomBonds);
int numMolecules = atomIndices.size();
vector<int> atomMolecule(numAtoms);
for (int i = 0; i < (int) atomIndices.size(); i++)
for (int j = 0; j < (int) atomIndices[i].size(); j++)
atomMolecule[atomIndices[i][j]] = i;
// Construct a description of each molecule.
molecules.resize(numMolecules);
for (int i = 0; i < numMolecules; i++) {
molecules[i].atoms = atomIndices[i];
molecules[i].groups.resize(forces.size());
}
for (int i = 0; i < system.getNumConstraints(); i++) {
int particle1, particle2;
double distance;
system.getConstraintParameters(i, particle1, particle2, distance);
molecules[atomMolecule[particle1]].constraints.push_back(i);
}
for (int i = 0; i < (int) forces.size(); i++)
for (int j = 0; j < forces[i]->getNumParticleGroups(); j++) {
vector<int> particles;
forces[i]->getParticlesInGroup(j, particles);
if (particles.size() > 0)
molecules[atomMolecule[particles[0]]].groups[i].push_back(j);
}
}
// Sort them into groups of identical molecules.
vector<Molecule> uniqueMolecules;
vector<vector<int> > moleculeInstances;
vector<vector<int> > moleculeOffsets;
for (int molIndex = 0; molIndex < (int) molecules.size(); molIndex++) {
Molecule& mol = molecules[molIndex];
// See if it is identical to another molecule.
bool isNew = true;
for (int j = 0; j < (int) uniqueMolecules.size() && isNew; j++) {
Molecule& mol2 = uniqueMolecules[j];
bool identical = (mol.atoms.size() == mol2.atoms.size() && mol.constraints.size() == mol2.constraints.size());
// See if the atoms are identical.
int atomOffset = mol2.atoms[0]-mol.atoms[0];
for (int i = 0; i < (int) mol.atoms.size() && identical; i++) {
if (mol.atoms[i] != mol2.atoms[i]-atomOffset || system.getParticleMass(mol.atoms[i]) != system.getParticleMass(mol2.atoms[i]))
identical = false;
for (int k = 0; k < (int) forces.size(); k++)
if (!forces[k]->areParticlesIdentical(mol.atoms[i], mol2.atoms[i]))
identical = false;
}
// See if the constraints are identical.
for (int i = 0; i < (int) mol.constraints.size() && identical; i++) {
int c1particle1, c1particle2, c2particle1, c2particle2;
double distance1, distance2;
system.getConstraintParameters(mol.constraints[i], c1particle1, c1particle2, distance1);
system.getConstraintParameters(mol2.constraints[i], c2particle1, c2particle2, distance2);
if (c1particle1 != c2particle1-atomOffset || c1particle2 != c2particle2-atomOffset || distance1 != distance2)
identical = false;
}
// See if the force groups are identical.
for (int i = 0; i < (int) forces.size() && identical; i++) {
if (mol.groups[i].size() != mol2.groups[i].size())
identical = false;
for (int k = 0; k < (int) mol.groups[i].size() && identical; k++) {
if (!forces[i]->areGroupsIdentical(mol.groups[i][k], mol2.groups[i][k]))
identical = false;
vector<int> p1, p2;
forces[i]->getParticlesInGroup(mol.groups[i][k], p1);
forces[i]->getParticlesInGroup(mol2.groups[i][k], p2);
for (int m = 0; m < p1.size(); m++)
if (p1[m] != p2[m]-atomOffset)
identical = false;
}
}
if (identical) {
moleculeInstances[j].push_back(molIndex);
moleculeOffsets[j].push_back(mol.atoms[0]);
isNew = false;
}
}
if (isNew) {
uniqueMolecules.push_back(mol);
moleculeInstances.push_back(vector<int>());
moleculeInstances[moleculeInstances.size()-1].push_back(molIndex);
moleculeOffsets.push_back(vector<int>());
moleculeOffsets[moleculeOffsets.size()-1].push_back(mol.atoms[0]);
}
}
moleculeGroups.resize(moleculeInstances.size());
for (int i = 0; i < (int) moleculeInstances.size(); i++)
{
moleculeGroups[i].instances = moleculeInstances[i];
moleculeGroups[i].offsets = moleculeOffsets[i];
vector<int>& atoms = uniqueMolecules[i].atoms;
moleculeGroups[i].atoms.resize(atoms.size());
for (int j = 0; j < (int) atoms.size(); j++)
moleculeGroups[i].atoms[j] = atoms[j]-atoms[0];
}
}
void ComputeContext::invalidateMolecules() {
for (int i = 0; i < forces.size(); i++)
if (invalidateMolecules(forces[i]))
return;
}
bool ComputeContext::invalidateMolecules(ComputeForceInfo* force) {
if (numAtoms == 0 || !getNonbondedUtilities().getUseCutoff())
return false;
bool valid = true;
int forceIndex = -1;
for (int i = 0; i < forces.size(); i++)
if (forces[i] == force)
forceIndex = i;
getThreadPool().execute([&] (ThreadPool& threads, int threadIndex) {
for (int group = 0; valid && group < (int) moleculeGroups.size(); group++) {
MoleculeGroup& mol = moleculeGroups[group];
vector<int>& instances = mol.instances;
vector<int>& offsets = mol.offsets;
vector<int>& atoms = mol.atoms;
int numMolecules = instances.size();
Molecule& m1 = molecules[instances[0]];
int offset1 = offsets[0];
int numThreads = threads.getNumThreads();
int start = max(1, threadIndex*numMolecules/numThreads);
int end = (threadIndex+1)*numMolecules/numThreads;
for (int j = start; j < end; j++) {
// See if the atoms are identical.
Molecule& m2 = molecules[instances[j]];
int offset2 = offsets[j];
for (int i = 0; i < (int) atoms.size() && valid; i++) {
if (!force->areParticlesIdentical(atoms[i]+offset1, atoms[i]+offset2))
valid = false;
}
// See if the force groups are identical.
if (valid && forceIndex > -1) {
for (int k = 0; k < (int) m1.groups[forceIndex].size() && valid; k++)
if (!force->areGroupsIdentical(m1.groups[forceIndex][k], m2.groups[forceIndex][k]))
valid = false;
}
}
}
});
getThreadPool().waitForThreads();
if (valid)
return false;
// The list of which molecules are identical is no longer valid. We need to restore the
// atoms to their original order, rebuild the list of identical molecules, and sort them
// again.
vector<mm_int4> newCellOffsets(numAtoms);
if (getUseDoublePrecision()) {
vector<mm_double4> oldPosq(paddedNumAtoms);
vector<mm_double4> newPosq(paddedNumAtoms, mm_double4(0,0,0,0));
vector<mm_double4> oldVelm(paddedNumAtoms);
vector<mm_double4> newVelm(paddedNumAtoms, mm_double4(0,0,0,0));
getPosq().download(oldPosq);
getVelm().download(oldVelm);
for (int i = 0; i < numAtoms; i++) {
int index = atomIndex[i];
newPosq[index] = oldPosq[i];
newVelm[index] = oldVelm[i];
newCellOffsets[index] = posCellOffsets[i];
}
getPosq().upload(newPosq);
getVelm().upload(newVelm);
}
else if (getUseMixedPrecision()) {
vector<mm_float4> oldPosq(paddedNumAtoms);
vector<mm_float4> newPosq(paddedNumAtoms, mm_float4(0,0,0,0));
vector<mm_float4> oldPosqCorrection(paddedNumAtoms);
vector<mm_float4> newPosqCorrection(paddedNumAtoms, mm_float4(0,0,0,0));
vector<mm_double4> oldVelm(paddedNumAtoms);
vector<mm_double4> newVelm(paddedNumAtoms, mm_double4(0,0,0,0));
getPosq().download(oldPosq);
getVelm().download(oldVelm);
for (int i = 0; i < numAtoms; i++) {
int index = atomIndex[i];
newPosq[index] = oldPosq[i];
newPosqCorrection[index] = oldPosqCorrection[i];
newVelm[index] = oldVelm[i];
newCellOffsets[index] = posCellOffsets[i];
}
getPosq().upload(newPosq);
getPosqCorrection().upload(newPosqCorrection);
getVelm().upload(newVelm);
}
else {
vector<mm_float4> oldPosq(paddedNumAtoms);
vector<mm_float4> newPosq(paddedNumAtoms, mm_float4(0,0,0,0));
vector<mm_float4> oldVelm(paddedNumAtoms);
vector<mm_float4> newVelm(paddedNumAtoms, mm_float4(0,0,0,0));
getPosq().download(oldPosq);
getVelm().download(oldVelm);
for (int i = 0; i < numAtoms; i++) {
int index = atomIndex[i];
newPosq[index] = oldPosq[i];
newVelm[index] = oldVelm[i];
newCellOffsets[index] = posCellOffsets[i];
}
getPosq().upload(newPosq);
getVelm().upload(newVelm);
}
for (int i = 0; i < numAtoms; i++) {
atomIndex[i] = i;
posCellOffsets[i] = newCellOffsets[i];
}
getAtomIndexArray().upload(atomIndex);
findMoleculeGroups();
for (auto listener : reorderListeners)
listener->execute();
reorderAtoms();
return true;
}
void ComputeContext::reorderAtoms() {
atomsWereReordered = false;
if (numAtoms == 0 || !getNonbondedUtilities().getUseCutoff() || stepsSinceReorder < 250) {
stepsSinceReorder++;
return;
}
atomsWereReordered = true;
stepsSinceReorder = 0;
if (getUseDoublePrecision())
reorderAtomsImpl<double, mm_double4, double, mm_double4>();
else if (getUseMixedPrecision())
reorderAtomsImpl<float, mm_float4, double, mm_double4>();
else
reorderAtomsImpl<float, mm_float4, float, mm_float4>();
}
template <class Real, class Real4, class Mixed, class Mixed4>
void ComputeContext::reorderAtomsImpl() {
// Find the range of positions and the number of bins along each axis.
vector<Real4> oldPosq(paddedNumAtoms);
vector<Real4> oldPosqCorrection(paddedNumAtoms);
vector<Mixed4> oldVelm(paddedNumAtoms);
getPosq().download(oldPosq);
getVelm().download(oldVelm);
if (getUseMixedPrecision())
getPosqCorrection().download(oldPosqCorrection);
Real minx = oldPosq[0].x, maxx = oldPosq[0].x;
Real miny = oldPosq[0].y, maxy = oldPosq[0].y;
Real minz = oldPosq[0].z, maxz = oldPosq[0].z;
Vec3 periodicBoxX, periodicBoxY, periodicBoxZ;
getPeriodicBoxVectors(periodicBoxX, periodicBoxY, periodicBoxZ);
Vec3 invPeriodicBoxSize(1.0/periodicBoxX[0], 1.0/periodicBoxY[1], 1.0/periodicBoxZ[2]);
if (getNonbondedUtilities().getUsePeriodic()) {
minx = miny = minz = 0.0;
maxx = periodicBoxX[0];
maxy = periodicBoxY[1];
maxz = periodicBoxZ[2];
}
else {
for (int i = 1; i < numAtoms; i++) {
const Real4& pos = oldPosq[i];
minx = min(minx, pos.x);
maxx = max(maxx, pos.x);
miny = min(miny, pos.y);
maxy = max(maxy, pos.y);
minz = min(minz, pos.z);
maxz = max(maxz, pos.z);
}
}
// Loop over each group of identical molecules and reorder them.
vector<int> originalIndex(numAtoms);
vector<Real4> newPosq(paddedNumAtoms, Real4(0,0,0,0));
vector<Real4> newPosqCorrection(paddedNumAtoms, Real4(0,0,0,0));
vector<Mixed4> newVelm(paddedNumAtoms, Mixed4(0,0,0,0));
vector<mm_int4> newCellOffsets(numAtoms);
for (auto& mol : moleculeGroups) {
// Find the center of each molecule.
int numMolecules = mol.offsets.size();
vector<int>& atoms = mol.atoms;
vector<Real4> molPos(numMolecules);
Real invNumAtoms = (Real) (1.0/atoms.size());
for (int i = 0; i < numMolecules; i++) {
molPos[i].x = 0.0f;
molPos[i].y = 0.0f;
molPos[i].z = 0.0f;
for (int j = 0; j < (int)atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i];
const Real4& pos = oldPosq[atom];
molPos[i].x += pos.x;
molPos[i].y += pos.y;
molPos[i].z += pos.z;
}
molPos[i].x *= invNumAtoms;
molPos[i].y *= invNumAtoms;
molPos[i].z *= invNumAtoms;
if (molPos[i].x != molPos[i].x)
throw OpenMMException("Particle coordinate is nan");
}
if (getNonbondedUtilities().getUsePeriodic()) {
// Move each molecule position into the same box.
for (int i = 0; i < numMolecules; i++) {
Real4 center = molPos[i];
int zcell = (int) floor(center.z*invPeriodicBoxSize[2]);
center.x -= zcell*periodicBoxZ[0];
center.y -= zcell*periodicBoxZ[1];
center.z -= zcell*periodicBoxZ[2];
int ycell = (int) floor(center.y*invPeriodicBoxSize[1]);
center.x -= ycell*periodicBoxY[0];
center.y -= ycell*periodicBoxY[1];
int xcell = (int) floor(center.x*invPeriodicBoxSize[0]);
center.x -= xcell*periodicBoxX[0];
if (xcell != 0 || ycell != 0 || zcell != 0) {
Real dx = molPos[i].x-center.x;
Real dy = molPos[i].y-center.y;
Real dz = molPos[i].z-center.z;
molPos[i] = center;
for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i];
Real4 p = oldPosq[atom];
p.x -= dx;
p.y -= dy;
p.z -= dz;
oldPosq[atom] = p;
posCellOffsets[atom].x -= xcell;
posCellOffsets[atom].y -= ycell;
posCellOffsets[atom].z -= zcell;
}
}
}
}
// Select a bin for each molecule, then sort them by bin.
bool useHilbert = (numMolecules > 5000 || atoms.size() > 8); // For small systems, a simple zigzag curve works better than a Hilbert curve.
Real binWidth;
if (useHilbert)
binWidth = (Real) (max(max(maxx-minx, maxy-miny), maxz-minz)/255.0);
else
binWidth = (Real) (0.2*getNonbondedUtilities().getMaxCutoffDistance());
Real invBinWidth = (Real) (1.0/binWidth);
int xbins = 1 + (int) ((maxx-minx)*invBinWidth);
int ybins = 1 + (int) ((maxy-miny)*invBinWidth);
vector<pair<int, int> > molBins(numMolecules);
bitmask_t coords[3];
for (int i = 0; i < numMolecules; i++) {
int x = (int) ((molPos[i].x-minx)*invBinWidth);
int y = (int) ((molPos[i].y-miny)*invBinWidth);
int z = (int) ((molPos[i].z-minz)*invBinWidth);
int bin;
if (useHilbert) {
coords[0] = x;
coords[1] = y;
coords[2] = z;
bin = (int) hilbert_c2i(3, 8, coords);
}
else {
int yodd = y&1;
int zodd = z&1;
bin = z*xbins*ybins;
bin += (zodd ? ybins-y : y)*xbins;
bin += (yodd ? xbins-x : x);
}
molBins[i] = pair<int, int>(bin, i);
}
sort(molBins.begin(), molBins.end());
// Reorder the atoms.
for (int i = 0; i < numMolecules; i++) {
for (int atom : atoms) {
int oldIndex = mol.offsets[molBins[i].second]+atom;
int newIndex = mol.offsets[i]+atom;
originalIndex[newIndex] = atomIndex[oldIndex];
newPosq[newIndex] = oldPosq[oldIndex];
if (getUseMixedPrecision())
newPosqCorrection[newIndex] = oldPosqCorrection[oldIndex];
newVelm[newIndex] = oldVelm[oldIndex];
newCellOffsets[newIndex] = posCellOffsets[oldIndex];
}
}
}
// Update the arrays.
for (int i = 0; i < numAtoms; i++) {
atomIndex[i] = originalIndex[i];
posCellOffsets[i] = newCellOffsets[i];
}
getPosq().upload(newPosq);
if (getUseMixedPrecision())
getPosqCorrection().upload(newPosqCorrection);
getVelm().upload(newVelm);
getAtomIndexArray().upload(atomIndex);
for (auto listener : reorderListeners)
listener->execute();
}
void ComputeContext::addReorderListener(ReorderListener* listener) {
reorderListeners.push_back(listener);
}
void ComputeContext::addPreComputation(ForcePreComputation* computation) {
preComputations.push_back(computation);
}
void ComputeContext::addPostComputation(ForcePostComputation* computation) {
postComputations.push_back(computation);
}
struct ComputeContext::WorkThread::ThreadData {
ThreadData(std::queue<ComputeContext::WorkTask*>& tasks, bool& waiting, bool& finished,
pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) :
tasks(tasks), waiting(waiting), finished(finished), queueLock(queueLock),
waitForTaskCondition(waitForTaskCondition), queueEmptyCondition(queueEmptyCondition) {
}
std::queue<ComputeContext::WorkTask*>& tasks;
bool& waiting;
bool& finished;
pthread_mutex_t& queueLock;
pthread_cond_t& waitForTaskCondition;
pthread_cond_t& queueEmptyCondition;
};
static void* threadBody(void* args) {
ComputeContext::WorkThread::ThreadData& data = *reinterpret_cast<ComputeContext::WorkThread::ThreadData*>(args);
while (!data.finished || data.tasks.size() > 0) {
pthread_mutex_lock(&data.queueLock);
while (data.tasks.empty() && !data.finished) {
data.waiting = true;
pthread_cond_signal(&data.queueEmptyCondition);
pthread_cond_wait(&data.waitForTaskCondition, &data.queueLock);
}
ComputeContext::WorkTask* task = NULL;
if (!data.tasks.empty()) {
data.waiting = false;
task = data.tasks.front();
data.tasks.pop();
}
pthread_mutex_unlock(&data.queueLock);
if (task != NULL) {
task->execute();
delete task;
}
}
data.waiting = true;
pthread_cond_signal(&data.queueEmptyCondition);
delete &data;
return 0;
}
ComputeContext::WorkThread::WorkThread() : waiting(true), finished(false) {
pthread_mutex_init(&queueLock, NULL);
pthread_cond_init(&waitForTaskCondition, NULL);
pthread_cond_init(&queueEmptyCondition, NULL);
ThreadData* data = new ThreadData(tasks, waiting, finished, queueLock, waitForTaskCondition, queueEmptyCondition);
pthread_create(&thread, NULL, threadBody, data);
}
ComputeContext::WorkThread::~WorkThread() {
pthread_mutex_lock(&queueLock);
finished = true;
pthread_cond_broadcast(&waitForTaskCondition);
pthread_mutex_unlock(&queueLock);
pthread_join(thread, NULL);
pthread_mutex_destroy(&queueLock);
pthread_cond_destroy(&waitForTaskCondition);
pthread_cond_destroy(&queueEmptyCondition);
}
void ComputeContext::WorkThread::addTask(ComputeContext::WorkTask* task) {
pthread_mutex_lock(&queueLock);
tasks.push(task);
waiting = false;
pthread_cond_signal(&waitForTaskCondition);
pthread_mutex_unlock(&queueLock);
}
bool ComputeContext::WorkThread::isWaiting() {
return waiting;
}
bool ComputeContext::WorkThread::isFinished() {
return finished;
}
void ComputeContext::WorkThread::flush() {
pthread_mutex_lock(&queueLock);
while (!waiting)
pthread_cond_wait(&queueEmptyCondition, &queueLock);
pthread_mutex_unlock(&queueLock);
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Portions copyright (c) 2012-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -24,23 +24,23 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLForceInfo.h"
#include "openmm/common/ComputeForceInfo.h"
using namespace OpenMM;
using namespace std;
bool OpenCLForceInfo::areParticlesIdentical(int particle1, int particle2) {
bool ComputeForceInfo::areParticlesIdentical(int particle1, int particle2) {
return true;
}
int OpenCLForceInfo::getNumParticleGroups() {
int ComputeForceInfo::getNumParticleGroups() {
return 0;
}
void OpenCLForceInfo::getParticlesInGroup(int index, vector<int>& particles) {
void ComputeForceInfo::getParticlesInGroup(int index, vector<int>& particles) {
return;
}
bool OpenCLForceInfo::areGroupsIdentical(int group1, int group2) {
bool ComputeForceInfo::areGroupsIdentical(int group1, int group2) {
return true;
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/common/ComputeParameterSet.h"
#include "openmm/OpenMMException.h"
#include <cmath>
#include <sstream>
using namespace OpenMM;
using namespace std;
ComputeParameterSet::ComputeParameterSet(ComputeContext& context, int numParameters, int numObjects, const string& name, bool arrayPerParameter, bool useDoublePrecision) :
context(context), numParameters(numParameters), numObjects(numObjects), name(name) {
int params = numParameters;
int bufferCount = 0;
elementSize = (useDoublePrecision ? sizeof(double) : sizeof(float));
string elementType = (useDoublePrecision ? "double" : "float");
if (!arrayPerParameter) {
while (params > 2) {
std::stringstream name;
name << "param" << (++bufferCount);
arrays.push_back(context.createArray());
arrays.back()->initialize(context, numObjects, elementSize*4, name.str());
params -= 4;
}
if (params > 1) {
std::stringstream name;
name << "param" << (++bufferCount);
arrays.push_back(context.createArray());
arrays.back()->initialize(context, numObjects, elementSize*2, name.str());
params -= 2;
}
}
while (params > 0) {
std::stringstream name;
name << "param" << (++bufferCount);
arrays.push_back(context.createArray());
arrays.back()->initialize(context, numObjects, elementSize, name.str());
params--;
}
for (ArrayInterface* array : arrays)
parameters.push_back(ComputeParameterInfo(*array, array->getName(), elementType, array->getElementSize()/elementSize));
}
ComputeParameterSet::~ComputeParameterSet() {
for (ArrayInterface* array : arrays)
delete array;
}
template <class T>
void ComputeParameterSet::getParameterValues(vector<vector<T> >& values) {
if (sizeof(T) != elementSize)
throw OpenMMException("Called getParameterValues() with vector of wrong type");
values.resize(numObjects);
for (int i = 0; i < numObjects; i++)
values[i].resize(numParameters);
int base = 0;
for (int i = 0; i < (int) arrays.size(); i++) {
if (arrays[i]->getElementSize() == 4*elementSize) {
vector<T> data(4*numObjects);
arrays[i]->download(data.data());
for (int j = 0; j < numObjects; j++) {
values[j][base] = data[4*j];
if (base+1 < numParameters)
values[j][base+1] = data[4*j+1];
if (base+2 < numParameters)
values[j][base+2] = data[4*j+2];
if (base+3 < numParameters)
values[j][base+3] = data[4*j+3];
}
base += 4;
}
else if (arrays[i]->getElementSize() == 2*elementSize) {
vector<T> data(2*numObjects);
arrays[i]->download(data.data());
for (int j = 0; j < numObjects; j++) {
values[j][base] = data[2*j];
if (base+1 < numParameters)
values[j][base+1] = data[2*j+1];
}
base += 2;
}
else if (arrays[i]->getElementSize() == elementSize) {
vector<T> data(numObjects);
arrays[i]->download(data.data());
for (int j = 0; j < numObjects; j++)
values[j][base] = data[j];
base++;
}
else
throw OpenMMException("Internal error: Unknown buffer type in ComputeParameterSet");
}
}
template <class T>
void ComputeParameterSet::setParameterValues(const vector<vector<T> >& values) {
if (sizeof(T) != elementSize)
throw OpenMMException("Called setParameterValues() with vector of wrong type");
int base = 0;
for (int i = 0; i < (int) arrays.size(); i++) {
if (arrays[i]->getElementSize() == 4*elementSize) {
vector<T> data(4*numObjects);
for (int j = 0; j < numObjects; j++) {
data[4*j] = values[j][base];
if (base+1 < numParameters)
data[4*j+1] = values[j][base+1];
if (base+2 < numParameters)
data[4*j+2] = values[j][base+2];
if (base+3 < numParameters)
data[4*j+3] = values[j][base+3];
}
arrays[i]->upload(data.data());
base += 4;
}
else if (arrays[i]->getElementSize() == 2*elementSize) {
vector<T> data(2*numObjects);
for (int j = 0; j < numObjects; j++) {
data[2*j] = values[j][base];
if (base+1 < numParameters)
data[2*j+1] = values[j][base+1];
}
arrays[i]->upload(data.data());
base += 2;
}
else if (arrays[i]->getElementSize() == elementSize) {
vector<T> data(numObjects);
for (int j = 0; j < numObjects; j++)
data[j] = values[j][base];
arrays[i]->upload(data.data());
base++;
}
else
throw OpenMMException("Internal error: Unknown buffer type in ComputeParameterSet");
}
}
string ComputeParameterSet::getParameterSuffix(int index, const std::string& extraSuffix) const {
const string suffixes[] = {".x", ".y", ".z", ".w"};
int buffer = -1;
for (int i = 0; buffer == -1 && i < (int) parameters.size(); i++) {
if (index*elementSize < parameters[i].getSize())
buffer = i;
else
index -= parameters[i].getSize()/elementSize;
}
if (buffer == -1)
throw OpenMMException("Internal error: Illegal argument to ComputeParameterSet::getParameterSuffix() ("+name+")");
stringstream suffix;
suffix << (buffer+1) << extraSuffix;
if (parameters[buffer].getSize() != elementSize)
suffix << suffixes[index];
return suffix.str();
}
/**
* Define template instantiations for float and double versions of getParameterValues() and setParameterValues().
*/
namespace OpenMM {
template void ComputeParameterSet::getParameterValues<float>(vector<vector<float> >& values);
template void ComputeParameterSet::setParameterValues<float>(const vector<vector<float> >& values);
template void ComputeParameterSet::getParameterValues<double>(vector<vector<double> >& values);
template void ComputeParameterSet::setParameterValues<double>(const vector<vector<double> >& values);
}
\ No newline at end of file
......@@ -24,7 +24,7 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaExpressionUtilities.h"
#include "openmm/common/ExpressionUtilities.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/SplineFitter.h"
#include "lepton/Operation.h"
......@@ -33,10 +33,10 @@ using namespace OpenMM;
using namespace Lepton;
using namespace std;
CudaExpressionUtilities::CudaExpressionUtilities(CudaContext& context) : context(context), fp1(1), fp2(2), fp3(3), periodicDistance(6) {
ExpressionUtilities::ExpressionUtilities(ComputeContext& context) : context(context), fp1(1), fp2(2), fp3(3), periodicDistance(6) {
}
string CudaExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const map<string, string>& variables,
string ExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const map<string, string>& variables,
const vector<const TabulatedFunction*>& functions, const vector<pair<string, string> >& functionNames, const string& prefix, const string& tempType) {
vector<pair<ExpressionTreeNode, string> > variableNodes;
for (map<string, string>::const_iterator iter = variables.begin(); iter != variables.end(); ++iter)
......@@ -44,7 +44,7 @@ string CudaExpressionUtilities::createExpressions(const map<string, ParsedExpres
return createExpressions(expressions, variableNodes, functions, functionNames, prefix, tempType);
}
string CudaExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const vector<pair<ExpressionTreeNode, string> >& variables,
string ExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const vector<pair<ExpressionTreeNode, string> >& variables,
const vector<const TabulatedFunction*>& functions, const vector<pair<string, string> >& functionNames, const string& prefix, const string& tempType) {
stringstream out;
vector<ParsedExpression> allExpressions;
......@@ -59,7 +59,7 @@ string CudaExpressionUtilities::createExpressions(const map<string, ParsedExpres
return out.str();
}
void CudaExpressionUtilities::processExpression(stringstream& out, const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, string> >& temps,
void ExpressionUtilities::processExpression(stringstream& out, const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, string> >& temps,
const vector<const TabulatedFunction*>& functions, const vector<pair<string, string> >& functionNames, const string& prefix, const vector<vector<double> >& functionParams,
const vector<ParsedExpression>& allExpressions, const string& tempType) {
for (int i = 0; i < (int) temps.size(); i++)
......@@ -662,7 +662,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
temps.push_back(make_pair(node, name));
}
string CudaExpressionUtilities::getTempName(const ExpressionTreeNode& node, const vector<pair<ExpressionTreeNode, string> >& temps) {
string ExpressionUtilities::getTempName(const ExpressionTreeNode& node, const vector<pair<ExpressionTreeNode, string> >& temps) {
for (int i = 0; i < (int) temps.size(); i++)
if (temps[i].first == node)
return temps[i].second;
......@@ -671,7 +671,7 @@ string CudaExpressionUtilities::getTempName(const ExpressionTreeNode& node, cons
throw OpenMMException(out.str());
}
void CudaExpressionUtilities::findRelatedCustomFunctions(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode,
void ExpressionUtilities::findRelatedCustomFunctions(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode,
vector<const Lepton::ExpressionTreeNode*>& nodes) {
if (searchNode.getOperation().getId() == Operation::CUSTOM && node.getOperation().getName() == searchNode.getOperation().getName()) {
// Make sure the arguments are identical.
......@@ -695,7 +695,7 @@ void CudaExpressionUtilities::findRelatedCustomFunctions(const ExpressionTreeNod
findRelatedCustomFunctions(node, searchNode.getChildren()[i], nodes);
}
void CudaExpressionUtilities::findRelatedPowers(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode, map<int, const ExpressionTreeNode*>& powers) {
void ExpressionUtilities::findRelatedPowers(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode, map<int, const ExpressionTreeNode*>& powers) {
if (searchNode.getOperation().getId() == Operation::POWER_CONSTANT && node.getChildren()[0] == searchNode.getChildren()[0]) {
double realPower = dynamic_cast<const Operation::PowerConstant*>(&searchNode.getOperation())->getValue();
int power = (int) realPower;
......@@ -712,7 +712,7 @@ void CudaExpressionUtilities::findRelatedPowers(const ExpressionTreeNode& node,
findRelatedPowers(node, searchNode.getChildren()[i], powers);
}
vector<float> CudaExpressionUtilities::computeFunctionCoefficients(const TabulatedFunction& function, int& width) {
vector<float> ExpressionUtilities::computeFunctionCoefficients(const TabulatedFunction& function, int& width) {
if (dynamic_cast<const Continuous1DFunction*>(&function) != NULL) {
// Compute the spline coefficients.
......@@ -827,7 +827,7 @@ vector<float> CudaExpressionUtilities::computeFunctionCoefficients(const Tabulat
throw OpenMMException("computeFunctionCoefficients: Unknown function type");
}
vector<vector<double> > CudaExpressionUtilities::computeFunctionParameters(const vector<const TabulatedFunction*>& functions) {
vector<vector<double> > ExpressionUtilities::computeFunctionParameters(const vector<const TabulatedFunction*>& functions) {
vector<vector<double> > params(functions.size());
for (int i = 0; i < (int) functions.size(); i++) {
if (dynamic_cast<const Continuous1DFunction*>(functions[i]) != NULL) {
......@@ -903,7 +903,7 @@ vector<vector<double> > CudaExpressionUtilities::computeFunctionParameters(const
return params;
}
Lepton::CustomFunction* CudaExpressionUtilities::getFunctionPlaceholder(const TabulatedFunction& function) {
Lepton::CustomFunction* ExpressionUtilities::getFunctionPlaceholder(const TabulatedFunction& function) {
if (dynamic_cast<const Continuous1DFunction*>(&function) != NULL)
return &fp1;
if (dynamic_cast<const Continuous2DFunction*>(&function) != NULL)
......@@ -919,11 +919,11 @@ Lepton::CustomFunction* CudaExpressionUtilities::getFunctionPlaceholder(const Ta
throw OpenMMException("getFunctionPlaceholder: Unknown function type");
}
Lepton::CustomFunction* CudaExpressionUtilities::getPeriodicDistancePlaceholder() {
Lepton::CustomFunction* ExpressionUtilities::getPeriodicDistancePlaceholder() {
return &periodicDistance;
}
void CudaExpressionUtilities::callFunction(stringstream& out, string singleFn, string doubleFn, const string& arg, const string& tempType) {
void ExpressionUtilities::callFunction(stringstream& out, string singleFn, string doubleFn, const string& arg, const string& tempType) {
bool isDouble = (tempType[0] == 'd');
bool isVector = (tempType[tempType.size()-1] == '3');
string fn = (isDouble ? doubleFn : singleFn);
......@@ -933,7 +933,7 @@ void CudaExpressionUtilities::callFunction(stringstream& out, string singleFn, s
out<<fn<<"("<<arg<<")";
}
void CudaExpressionUtilities::callFunction2(stringstream& out, string singleFn, string doubleFn, const string& arg1, const string& arg2, const string& tempType) {
void ExpressionUtilities::callFunction2(stringstream& out, string singleFn, string doubleFn, const string& arg1, const string& arg2, const string& tempType) {
bool isDouble = (tempType[0] == 'd');
bool isVector = (tempType[tempType.size()-1] == '3');
string fn = (isDouble ? doubleFn : singleFn);
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/common/IntegrationUtilities.h"
#include "openmm/common/ComputeContext.h"
#include "CommonKernelSources.h"
#include "openmm/internal/OSRngSeed.h"
#include "openmm/HarmonicAngleForce.h"
#include "openmm/VirtualSite.h"
#include "quern.h"
#include "ReferenceCCMAAlgorithm.h"
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <map>
using namespace OpenMM;
using namespace std;
struct IntegrationUtilities::ShakeCluster {
int centralID;
int peripheralID[3];
int size;
bool valid;
double distance;
double centralInvMass, peripheralInvMass;
ShakeCluster() : valid(true) {
}
ShakeCluster(int centralID, double invMass) : centralID(centralID), centralInvMass(invMass), size(0), valid(true) {
}
void addAtom(int id, double dist, double invMass) {
if (size == 3 || (size > 0 && abs(dist-distance)/distance > 1e-8) || (size > 0 && abs(invMass-peripheralInvMass)/peripheralInvMass > 1e-8))
valid = false;
else {
peripheralID[size++] = id;
distance = dist;
peripheralInvMass = invMass;
}
}
void markInvalid(map<int, ShakeCluster>& allClusters, vector<bool>& invalidForShake)
{
valid = false;
invalidForShake[centralID] = true;
for (int i = 0; i < size; i++) {
invalidForShake[peripheralID[i]] = true;
map<int, ShakeCluster>::iterator otherCluster = allClusters.find(peripheralID[i]);
if (otherCluster != allClusters.end() && otherCluster->second.valid)
otherCluster->second.markInvalid(allClusters, invalidForShake);
}
}
};
struct IntegrationUtilities::ConstraintOrderer : public binary_function<int, int, bool> {
const vector<int>& atom1;
const vector<int>& atom2;
const vector<int>& constraints;
ConstraintOrderer(const vector<int>& atom1, const vector<int>& atom2, const vector<int>& constraints) : atom1(atom1), atom2(atom2), constraints(constraints) {
}
bool operator()(int x, int y) {
int ix = constraints[x];
int iy = constraints[y];
if (atom1[ix] != atom1[iy])
return atom1[ix] < atom1[iy];
return atom2[ix] < atom2[iy];
}
};
IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System& system) : context(context),
randomPos(0), hasOverlappingVsites(false) {
// Create workspace arrays.
lastStepSize = mm_double2(0.0, 0.0);
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) {
posDelta.initialize<mm_double4>(context, context.getPaddedNumAtoms(), "posDelta");
vector<mm_double4> deltas(posDelta.getSize(), mm_double4(0.0, 0.0, 0.0, 0.0));
posDelta.upload(deltas);
stepSize.initialize<mm_double2>(context, 1, "stepSize");
stepSize.upload(&lastStepSize);
}
else {
posDelta.initialize<mm_float4>(context, context.getPaddedNumAtoms(), "posDelta");
vector<mm_float4> deltas(posDelta.getSize(), mm_float4(0.0f, 0.0f, 0.0f, 0.0f));
posDelta.upload(deltas);
stepSize.initialize<mm_float2>(context, 1, "stepSize");
mm_float2 lastStepSizeFloat = mm_float2(0.0f, 0.0f);
stepSize.upload(&lastStepSizeFloat);
}
// Record the set of constraints and how many constraints each atom is involved in.
vector<int> atom1;
vector<int> atom2;
vector<double> distance;
vector<int> constraintCount(context.getNumAtoms(), 0);
for (int i = 0; i < system.getNumConstraints(); i++) {
int p1, p2;
double d;
system.getConstraintParameters(i, p1, p2, d);
if (system.getParticleMass(p1) != 0 || system.getParticleMass(p2) != 0) {
atom1.push_back(p1);
atom2.push_back(p2);
distance.push_back(d);
constraintCount[p1]++;
constraintCount[p2]++;
}
}
// Identify clusters of three atoms that can be treated with SETTLE. First, for every
// atom that might be part of such a cluster, make a list of the two other atoms it is
// connected to.
int numAtoms = system.getNumParticles();
vector<map<int, float> > settleConstraints(numAtoms);
for (int i = 0; i < (int)atom1.size(); i++) {
if (constraintCount[atom1[i]] == 2 && constraintCount[atom2[i]] == 2) {
settleConstraints[atom1[i]][atom2[i]] = (float) distance[i];
settleConstraints[atom2[i]][atom1[i]] = (float) distance[i];
}
}
// Now remove the ones that don't actually form closed loops of three atoms.
vector<int> settleClusters;
for (int i = 0; i < (int)settleConstraints.size(); i++) {
if (settleConstraints[i].size() == 2) {
int partner1 = settleConstraints[i].begin()->first;
int partner2 = (++settleConstraints[i].begin())->first;
if (settleConstraints[partner1].size() != 2 || settleConstraints[partner2].size() != 2 ||
settleConstraints[partner1].find(partner2) == settleConstraints[partner1].end())
settleConstraints[i].clear();
else if (i < partner1 && i < partner2)
settleClusters.push_back(i);
}
else
settleConstraints[i].clear();
}
// Record the SETTLE clusters.
vector<bool> isShakeAtom(numAtoms, false);
if (settleClusters.size() > 0) {
vector<mm_int4> atoms;
vector<mm_float2> params;
for (int i = 0; i < (int) settleClusters.size(); i++) {
int atom1 = settleClusters[i];
int atom2 = settleConstraints[atom1].begin()->first;
int atom3 = (++settleConstraints[atom1].begin())->first;
float dist12 = settleConstraints[atom1].find(atom2)->second;
float dist13 = settleConstraints[atom1].find(atom3)->second;
float dist23 = settleConstraints[atom2].find(atom3)->second;
if (dist12 == dist13) {
// atom1 is the central atom
atoms.push_back(mm_int4(atom1, atom2, atom3, 0));
params.push_back(mm_float2(dist12, dist23));
}
else if (dist12 == dist23) {
// atom2 is the central atom
atoms.push_back(mm_int4(atom2, atom1, atom3, 0));
params.push_back(mm_float2(dist12, dist13));
}
else if (dist13 == dist23) {
// atom3 is the central atom
atoms.push_back(mm_int4(atom3, atom1, atom2, 0));
params.push_back(mm_float2(dist13, dist12));
}
else
continue; // We can't handle this with SETTLE
isShakeAtom[atom1] = true;
isShakeAtom[atom2] = true;
isShakeAtom[atom3] = true;
}
if (atoms.size() > 0) {
settleAtoms.initialize<mm_int4>(context, atoms.size(), "settleAtoms");
settleParams.initialize<mm_float2>(context, params.size(), "settleParams");
settleAtoms.upload(atoms);
settleParams.upload(params);
}
}
// Find clusters consisting of a central atom with up to three peripheral atoms.
map<int, ShakeCluster> clusters;
vector<bool> invalidForShake(numAtoms, false);
for (int i = 0; i < (int) atom1.size(); i++) {
if (isShakeAtom[atom1[i]])
continue; // This is being taken care of with SETTLE.
// Determine which is the central atom.
bool firstIsCentral;
if (constraintCount[atom1[i]] > 1)
firstIsCentral = true;
else if (constraintCount[atom2[i]] > 1)
firstIsCentral = false;
else if (atom1[i] < atom2[i])
firstIsCentral = true;
else
firstIsCentral = false;
int centralID, peripheralID;
if (firstIsCentral) {
centralID = atom1[i];
peripheralID = atom2[i];
}
else {
centralID = atom2[i];
peripheralID = atom1[i];
}
// Add it to the cluster.
if (clusters.find(centralID) == clusters.end()) {
clusters[centralID] = ShakeCluster(centralID, 1.0/system.getParticleMass(centralID));
}
ShakeCluster& cluster = clusters[centralID];
cluster.addAtom(peripheralID, distance[i], 1.0/system.getParticleMass(peripheralID));
if (constraintCount[peripheralID] != 1 || invalidForShake[atom1[i]] || invalidForShake[atom2[i]]) {
cluster.markInvalid(clusters, invalidForShake);
map<int, ShakeCluster>::iterator otherCluster = clusters.find(peripheralID);
if (otherCluster != clusters.end() && otherCluster->second.valid)
otherCluster->second.markInvalid(clusters, invalidForShake);
}
}
int validShakeClusters = 0;
for (map<int, ShakeCluster>::iterator iter = clusters.begin(); iter != clusters.end(); ++iter) {
ShakeCluster& cluster = iter->second;
if (cluster.valid) {
cluster.valid = !invalidForShake[cluster.centralID] && cluster.size == constraintCount[cluster.centralID];
for (int i = 0; i < cluster.size; i++)
if (invalidForShake[cluster.peripheralID[i]])
cluster.valid = false;
if (cluster.valid)
++validShakeClusters;
}
}
// Record the SHAKE clusters.
if (validShakeClusters > 0) {
vector<mm_int4> atoms;
vector<mm_float4> params;
int index = 0;
for (map<int, ShakeCluster>::const_iterator iter = clusters.begin(); iter != clusters.end(); ++iter) {
const ShakeCluster& cluster = iter->second;
if (!cluster.valid)
continue;
atoms.push_back(mm_int4(cluster.centralID, cluster.peripheralID[0], (cluster.size > 1 ? cluster.peripheralID[1] : -1), (cluster.size > 2 ? cluster.peripheralID[2] : -1)));
params.push_back(mm_float4((float) cluster.centralInvMass, (float) (0.5/(cluster.centralInvMass+cluster.peripheralInvMass)), (float) (cluster.distance*cluster.distance), (float) cluster.peripheralInvMass));
isShakeAtom[cluster.centralID] = true;
isShakeAtom[cluster.peripheralID[0]] = true;
if (cluster.size > 1)
isShakeAtom[cluster.peripheralID[1]] = true;
if (cluster.size > 2)
isShakeAtom[cluster.peripheralID[2]] = true;
++index;
}
shakeAtoms.initialize<mm_int4>(context, atoms.size(), "shakeAtoms");
shakeParams.initialize<mm_float4>(context, params.size(), "shakeParams");
shakeAtoms.upload(atoms);
shakeParams.upload(params);
}
// Find connected constraints for CCMA.
vector<int> ccmaConstraints;
for (unsigned i = 0; i < atom1.size(); i++)
if (!isShakeAtom[atom1[i]])
ccmaConstraints.push_back(i);
// Record the connections between constraints.
int numCCMA = (int) ccmaConstraints.size();
if (numCCMA > 0) {
// Record information needed by ReferenceCCMAAlgorithm.
vector<pair<int, int> > refIndices(numCCMA);
vector<double> refDistance(numCCMA);
for (int i = 0; i < numCCMA; i++) {
int index = ccmaConstraints[i];
refIndices[i] = make_pair(atom1[index], atom2[index]);
refDistance[i] = distance[index];
}
vector<double> refMasses(numAtoms);
for (int i = 0; i < numAtoms; ++i)
refMasses[i] = system.getParticleMass(i);
// Look up angles for CCMA.
vector<ReferenceCCMAAlgorithm::AngleInfo> angles;
for (int i = 0; i < system.getNumForces(); i++) {
const HarmonicAngleForce* force = dynamic_cast<const HarmonicAngleForce*>(&system.getForce(i));
if (force != NULL) {
for (int j = 0; j < force->getNumAngles(); j++) {
int atom1, atom2, atom3;
double angle, k;
force->getAngleParameters(j, atom1, atom2, atom3, angle, k);
angles.push_back(ReferenceCCMAAlgorithm::AngleInfo(atom1, atom2, atom3, angle));
}
}
}
// Create a ReferenceCCMAAlgorithm. It will build and invert the constraint matrix for us.
ReferenceCCMAAlgorithm ccma(numAtoms, numCCMA, refIndices, refDistance, refMasses, angles, 0.1);
vector<vector<pair<int, double> > > matrix = ccma.getMatrix();
int maxRowElements = 0;
for (unsigned i = 0; i < matrix.size(); i++)
maxRowElements = max(maxRowElements, (int) matrix[i].size());
maxRowElements++;
// Build the list of constraints for each atom.
vector<vector<int> > atomConstraints(context.getNumAtoms());
for (int i = 0; i < numCCMA; i++) {
atomConstraints[atom1[ccmaConstraints[i]]].push_back(i);
atomConstraints[atom2[ccmaConstraints[i]]].push_back(i);
}
int maxAtomConstraints = 0;
for (unsigned i = 0; i < atomConstraints.size(); i++)
maxAtomConstraints = max(maxAtomConstraints, (int) atomConstraints[i].size());
// Sort the constraints.
vector<int> constraintOrder(numCCMA);
for (int i = 0; i < numCCMA; ++i)
constraintOrder[i] = i;
sort(constraintOrder.begin(), constraintOrder.end(), ConstraintOrderer(atom1, atom2, ccmaConstraints));
vector<int> inverseOrder(numCCMA);
for (int i = 0; i < numCCMA; ++i)
inverseOrder[constraintOrder[i]] = i;
for (int i = 0; i < (int)matrix.size(); ++i)
for (int j = 0; j < (int)matrix[i].size(); ++j)
matrix[i][j].first = inverseOrder[matrix[i][j].first];
// Record the CCMA data structures.
ccmaAtoms.initialize<mm_int2>(context, numCCMA, "CcmaAtoms");
ccmaAtomConstraints.initialize<int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints");
ccmaNumAtomConstraints.initialize<int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaConstraintMatrixColumn.initialize<int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConverged.initialize<int>(context, 2, "ccmaConverged");
vector<mm_int2> atomsVec(ccmaAtoms.getSize());
vector<int> atomConstraintsVec(ccmaAtomConstraints.getSize());
vector<int> numAtomConstraintsVec(ccmaNumAtomConstraints.getSize());
vector<int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn.getSize());
int elementSize = (context.getUseDoublePrecision() || context.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
ccmaDistance.initialize(context, numCCMA, 4*elementSize, "CcmaDistance");
ccmaDelta1.initialize(context, numCCMA, elementSize, "CcmaDelta1");
ccmaDelta2.initialize(context, numCCMA, elementSize, "CcmaDelta2");
ccmaReducedMass.initialize(context, numCCMA, elementSize, "CcmaReducedMass");
ccmaConstraintMatrixValue.initialize(context, numCCMA*maxRowElements, elementSize, "ConstraintMatrixValue");
vector<mm_double4> distanceVec(ccmaDistance.getSize());
vector<double> reducedMassVec(ccmaReducedMass.getSize());
vector<double> constraintMatrixValueVec(ccmaConstraintMatrixValue.getSize());
for (int i = 0; i < numCCMA; i++) {
int index = constraintOrder[i];
int c = ccmaConstraints[index];
atomsVec[i].x = atom1[c];
atomsVec[i].y = atom2[c];
distanceVec[i].w = distance[c];
reducedMassVec[i] = (0.5/(1.0/system.getParticleMass(atom1[c])+1.0/system.getParticleMass(atom2[c])));
for (unsigned int j = 0; j < matrix[index].size(); j++) {
constraintMatrixColumnVec[i+j*numCCMA] = matrix[index][j].first;
constraintMatrixValueVec[i+j*numCCMA] = matrix[index][j].second;
}
constraintMatrixColumnVec[i+matrix[index].size()*numCCMA] = numCCMA;
}
ccmaDistance.upload(distanceVec, true);
ccmaReducedMass.upload(reducedMassVec, true);
ccmaConstraintMatrixValue.upload(constraintMatrixValueVec, true);
for (unsigned int i = 0; i < atomConstraints.size(); i++) {
numAtomConstraintsVec[i] = atomConstraints[i].size();
for (unsigned int j = 0; j < atomConstraints[i].size(); j++) {
bool forward = (atom1[ccmaConstraints[atomConstraints[i][j]]] == i);
atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1);
}
}
ccmaAtoms.upload(atomsVec);
ccmaAtomConstraints.upload(atomConstraintsVec);
ccmaNumAtomConstraints.upload(numAtomConstraintsVec);
ccmaConstraintMatrixColumn.upload(constraintMatrixColumnVec);
}
// Build the list of virtual sites.
vector<mm_int4> vsite2AvgAtomVec;
vector<mm_double2> vsite2AvgWeightVec;
vector<mm_int4> vsite3AvgAtomVec;
vector<mm_double4> vsite3AvgWeightVec;
vector<mm_int4> vsiteOutOfPlaneAtomVec;
vector<mm_double4> vsiteOutOfPlaneWeightVec;
vector<int> vsiteLocalCoordsIndexVec;
vector<int> vsiteLocalCoordsAtomVec;
vector<int> vsiteLocalCoordsStartVec;
vector<double> vsiteLocalCoordsWeightVec;
vector<mm_double4> vsiteLocalCoordsPosVec;
for (int i = 0; i < numAtoms; i++) {
if (system.isVirtualSite(i)) {
if (dynamic_cast<const TwoParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
// A two particle average.
const TwoParticleAverageSite& site = dynamic_cast<const TwoParticleAverageSite&>(system.getVirtualSite(i));
vsite2AvgAtomVec.push_back(mm_int4(i, site.getParticle(0), site.getParticle(1), 0));
vsite2AvgWeightVec.push_back(mm_double2(site.getWeight(0), site.getWeight(1)));
}
else if (dynamic_cast<const ThreeParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
// A three particle average.
const ThreeParticleAverageSite& site = dynamic_cast<const ThreeParticleAverageSite&>(system.getVirtualSite(i));
vsite3AvgAtomVec.push_back(mm_int4(i, site.getParticle(0), site.getParticle(1), site.getParticle(2)));
vsite3AvgWeightVec.push_back(mm_double4(site.getWeight(0), site.getWeight(1), site.getWeight(2), 0.0));
}
else if (dynamic_cast<const OutOfPlaneSite*>(&system.getVirtualSite(i)) != NULL) {
// An out of plane site.
const OutOfPlaneSite& site = dynamic_cast<const OutOfPlaneSite&>(system.getVirtualSite(i));
vsiteOutOfPlaneAtomVec.push_back(mm_int4(i, site.getParticle(0), site.getParticle(1), site.getParticle(2)));
vsiteOutOfPlaneWeightVec.push_back(mm_double4(site.getWeight12(), site.getWeight13(), site.getWeightCross(), 0.0));
}
else if (dynamic_cast<const LocalCoordinatesSite*>(&system.getVirtualSite(i)) != NULL) {
// A local coordinates site.
const LocalCoordinatesSite& site = dynamic_cast<const LocalCoordinatesSite&>(system.getVirtualSite(i));
int numParticles = site.getNumParticles();
vector<double> origin, x, y;
site.getOriginWeights(origin);
site.getXWeights(x);
site.getYWeights(y);
vsiteLocalCoordsIndexVec.push_back(i);
vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
for (int j = 0; j < numParticles; j++) {
vsiteLocalCoordsAtomVec.push_back(site.getParticle(j));
vsiteLocalCoordsWeightVec.push_back(origin[j]);
vsiteLocalCoordsWeightVec.push_back(x[j]);
vsiteLocalCoordsWeightVec.push_back(y[j]);
}
Vec3 pos = site.getLocalPosition();
vsiteLocalCoordsPosVec.push_back(mm_double4(pos[0], pos[1], pos[2], 0.0));
}
}
}
vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
int num2Avg = vsite2AvgAtomVec.size();
int num3Avg = vsite3AvgAtomVec.size();
int numOutOfPlane = vsiteOutOfPlaneAtomVec.size();
int numLocalCoords = vsiteLocalCoordsPosVec.size();
numVsites = num2Avg+num3Avg+numOutOfPlane+numLocalCoords;
vsite2AvgAtoms.initialize<mm_int4>(context, max(1, num2Avg), "vsite2AvgAtoms");
vsite3AvgAtoms.initialize<mm_int4>(context, max(1, num3Avg), "vsite3AvgAtoms");
vsiteOutOfPlaneAtoms.initialize<mm_int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms");
vsiteLocalCoordsIndex.initialize<int>(context, max(1, (int) vsiteLocalCoordsIndexVec.size()), "vsiteLocalCoordsIndex");
vsiteLocalCoordsAtoms.initialize<int>(context, max(1, (int) vsiteLocalCoordsAtomVec.size()), "vsiteLocalCoordsAtoms");
vsiteLocalCoordsStartIndex.initialize<int>(context, max(1, (int) vsiteLocalCoordsStartVec.size()), "vsiteLocalCoordsStartIndex");
if (num2Avg > 0)
vsite2AvgAtoms.upload(vsite2AvgAtomVec);
if (num3Avg > 0)
vsite3AvgAtoms.upload(vsite3AvgAtomVec);
if (numOutOfPlane > 0)
vsiteOutOfPlaneAtoms.upload(vsiteOutOfPlaneAtomVec);
if (numLocalCoords > 0) {
vsiteLocalCoordsIndex.upload(vsiteLocalCoordsIndexVec);
vsiteLocalCoordsAtoms.upload(vsiteLocalCoordsAtomVec);
vsiteLocalCoordsStartIndex.upload(vsiteLocalCoordsStartVec);
}
int elementSize = (context.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
vsite2AvgWeights.initialize(context, max(1, num2Avg), 2*elementSize, "vsite2AvgWeights");
vsite3AvgWeights.initialize(context, max(1, num3Avg), 4*elementSize, "vsite3AvgWeights");
vsiteOutOfPlaneWeights.initialize(context, max(1, numOutOfPlane), 4*elementSize, "vsiteOutOfPlaneWeights");
vsiteLocalCoordsWeights.initialize(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), elementSize, "vsiteLocalCoordsWeights");
vsiteLocalCoordsPos.initialize(context, max(1, (int) vsiteLocalCoordsPosVec.size()), 4*elementSize, "vsiteLocalCoordsPos");
if (num2Avg > 0)
vsite2AvgWeights.upload(vsite2AvgWeightVec, true);
if (num3Avg > 0)
vsite3AvgWeights.upload(vsite3AvgWeightVec, true);
if (numOutOfPlane > 0)
vsiteOutOfPlaneWeights.upload(vsiteOutOfPlaneWeightVec, true);
if (numLocalCoords > 0) {
vsiteLocalCoordsWeights.upload(vsiteLocalCoordsWeightVec, true);
vsiteLocalCoordsPos.upload(vsiteLocalCoordsPosVec, true);
}
// If multiple virtual sites depend on the same particle, make sure the force distribution
// can be done safely.
vector<int> atomCounts(numAtoms, 0);
for (int i = 0; i < numAtoms; i++)
if (system.isVirtualSite(i))
for (int j = 0; j < system.getVirtualSite(i).getNumParticles(); j++)
atomCounts[system.getVirtualSite(i).getParticle(j)]++;
for (int i = 0; i < numAtoms; i++)
if (atomCounts[i] > 1)
hasOverlappingVsites = true;
if (hasOverlappingVsites && !context.getSupports64BitGlobalAtomics())
throw OpenMMException("This device does not support 64 bit atomics. Cannot have multiple virtual sites that depend on the same atom.");
// Create the kernels used by this class.
map<string, string> defines;
defines["NUM_CCMA_CONSTRAINTS"] = context.intToString(numCCMA);
defines["NUM_ATOMS"] = context.intToString(numAtoms);
defines["NUM_2_AVERAGE"] = context.intToString(num2Avg);
defines["NUM_3_AVERAGE"] = context.intToString(num3Avg);
defines["NUM_OUT_OF_PLANE"] = context.intToString(numOutOfPlane);
defines["NUM_LOCAL_COORDS"] = context.intToString(numLocalCoords);
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
if (hasOverlappingVsites)
defines["HAS_OVERLAPPING_VSITES"] = "1";
ComputeProgram program = context.compileProgram(CommonKernelSources::integrationUtilities, defines);
settlePosKernel = program->createKernel("applySettleToPositions");
settleVelKernel = program->createKernel("applySettleToVelocities");
shakePosKernel = program->createKernel("applyShakeToPositions");
shakeVelKernel = program->createKernel("applyShakeToVelocities");
ccmaDirectionsKernel = program->createKernel("computeCCMAConstraintDirections");
ccmaPosForceKernel = program->createKernel("computeCCMAPositionConstraintForce");
ccmaVelForceKernel = program->createKernel("computeCCMAVelocityConstraintForce");
ccmaMultiplyKernel = program->createKernel("multiplyByCCMAConstraintMatrix");
ccmaUpdateKernel = program->createKernel("updateCCMAAtomPositions");
vsitePositionKernel = program->createKernel("computeVirtualSites");
vsiteForceKernel = program->createKernel("distributeVirtualSiteForces");
vsiteSaveForcesKernel = program->createKernel("saveDistributedForces");
randomKernel = program->createKernel("generateRandomNumbers");
timeShiftKernel = program->createKernel("timeShiftVelocities");
// Set arguments for virtual site kernels.
vsitePositionKernel->addArg(context.getPosq());
if (context.getUseMixedPrecision())
vsitePositionKernel->addArg(context.getPosqCorrection());
else
vsitePositionKernel->addArg(NULL);
vsitePositionKernel->addArg(vsite2AvgAtoms);
vsitePositionKernel->addArg(vsite2AvgWeights);
vsitePositionKernel->addArg(vsite3AvgAtoms);
vsitePositionKernel->addArg(vsite3AvgWeights);
vsitePositionKernel->addArg(vsiteOutOfPlaneAtoms);
vsitePositionKernel->addArg(vsiteOutOfPlaneWeights);
vsitePositionKernel->addArg(vsiteLocalCoordsIndex);
vsitePositionKernel->addArg(vsiteLocalCoordsAtoms);
vsitePositionKernel->addArg(vsiteLocalCoordsWeights);
vsitePositionKernel->addArg(vsiteLocalCoordsPos);
vsitePositionKernel->addArg(vsiteLocalCoordsStartIndex);
vsiteForceKernel->addArg(context.getPosq());
if (context.getUseMixedPrecision())
vsiteForceKernel->addArg(context.getPosqCorrection());
else
vsiteForceKernel->addArg(NULL);
vsiteForceKernel->addArg(); // Skip argument 2: the force array hasn't been created yet.
vsiteForceKernel->addArg(vsite2AvgAtoms);
vsiteForceKernel->addArg(vsite2AvgWeights);
vsiteForceKernel->addArg(vsite3AvgAtoms);
vsiteForceKernel->addArg(vsite3AvgWeights);
vsiteForceKernel->addArg(vsiteOutOfPlaneAtoms);
vsiteForceKernel->addArg(vsiteOutOfPlaneWeights);
vsiteForceKernel->addArg(vsiteLocalCoordsIndex);
vsiteForceKernel->addArg(vsiteLocalCoordsAtoms);
vsiteForceKernel->addArg(vsiteLocalCoordsWeights);
vsiteForceKernel->addArg(vsiteLocalCoordsPos);
vsiteForceKernel->addArg(vsiteLocalCoordsStartIndex);
for (int i = 0; i < 3; i++)
vsiteSaveForcesKernel->addArg();
// Set arguments for constraint kernels.
if (settleAtoms.isInitialized()) {
settlePosKernel->addArg(settleAtoms.getSize());
settlePosKernel->addArg();
settlePosKernel->addArg(context.getPosq());
settlePosKernel->addArg(posDelta);
settlePosKernel->addArg(context.getVelm());
settlePosKernel->addArg(settleAtoms);
settlePosKernel->addArg(settleParams);
if (context.getUseMixedPrecision())
settlePosKernel->addArg(context.getPosqCorrection());
settleVelKernel->addArg(settleAtoms.getSize());
settleVelKernel->addArg();
settleVelKernel->addArg(context.getPosq());
settleVelKernel->addArg(posDelta);
settleVelKernel->addArg(context.getVelm());
settleVelKernel->addArg(settleAtoms);
settleVelKernel->addArg(settleParams);
if (context.getUseMixedPrecision())
settleVelKernel->addArg(context.getPosqCorrection());
}
if (shakeAtoms.isInitialized()) {
shakePosKernel->addArg(shakeAtoms.getSize());
shakePosKernel->addArg();
shakePosKernel->addArg(context.getPosq());
shakePosKernel->addArg(posDelta);
shakePosKernel->addArg(shakeAtoms);
shakePosKernel->addArg(shakeParams);
if (context.getUseMixedPrecision())
shakePosKernel->addArg(context.getPosqCorrection());
shakeVelKernel->addArg(shakeAtoms.getSize());
shakeVelKernel->addArg();
shakeVelKernel->addArg(context.getPosq());
shakeVelKernel->addArg(context.getVelm());
shakeVelKernel->addArg(shakeAtoms);
shakeVelKernel->addArg(shakeParams);
if (context.getUseMixedPrecision())
shakeVelKernel->addArg(context.getPosqCorrection());
}
if (ccmaAtoms.isInitialized()) {
ccmaDirectionsKernel->addArg(ccmaAtoms);
ccmaDirectionsKernel->addArg(ccmaDistance);
ccmaDirectionsKernel->addArg(context.getPosq());
ccmaDirectionsKernel->addArg(ccmaConverged);
if (context.getUseMixedPrecision())
ccmaDirectionsKernel->addArg(context.getPosqCorrection());
ccmaPosForceKernel->addArg(ccmaAtoms);
ccmaPosForceKernel->addArg(ccmaDistance);
ccmaPosForceKernel->addArg(posDelta);
ccmaPosForceKernel->addArg(ccmaReducedMass);
ccmaPosForceKernel->addArg(ccmaDelta1);
ccmaPosForceKernel->addArg(ccmaConverged);
ccmaPosForceKernel->addArg();
ccmaPosForceKernel->addArg();
ccmaPosForceKernel->addArg();
ccmaVelForceKernel->addArg(ccmaAtoms);
ccmaVelForceKernel->addArg(ccmaDistance);
ccmaVelForceKernel->addArg(context.getVelm());
ccmaVelForceKernel->addArg(ccmaReducedMass);
ccmaVelForceKernel->addArg(ccmaDelta1);
ccmaVelForceKernel->addArg(ccmaConverged);
ccmaVelForceKernel->addArg();
ccmaVelForceKernel->addArg();
ccmaVelForceKernel->addArg();
ccmaMultiplyKernel->addArg(ccmaDelta1);
ccmaMultiplyKernel->addArg(ccmaDelta2);
ccmaMultiplyKernel->addArg(ccmaConstraintMatrixColumn);
ccmaMultiplyKernel->addArg(ccmaConstraintMatrixValue);
ccmaMultiplyKernel->addArg(ccmaConverged);
ccmaMultiplyKernel->addArg();
ccmaUpdateKernel->addArg(ccmaNumAtomConstraints);
ccmaUpdateKernel->addArg(ccmaAtomConstraints);
ccmaUpdateKernel->addArg(ccmaDistance);
ccmaUpdateKernel->addArg();
ccmaUpdateKernel->addArg(context.getVelm());
ccmaUpdateKernel->addArg(ccmaDelta1);
ccmaUpdateKernel->addArg(ccmaDelta2);
ccmaUpdateKernel->addArg(ccmaConverged);
ccmaUpdateKernel->addArg();
}
// Arguments for time shift kernel will be set later.
for (int i = 0; i < 3; i++)
timeShiftKernel->addArg();
}
void IntegrationUtilities::setNextStepSize(double size) {
if (size != lastStepSize.x || size != lastStepSize.y) {
lastStepSize = mm_double2(size, size);
if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
stepSize.upload(&lastStepSize);
else {
mm_float2 lastStepSizeFloat = mm_float2((float) size, (float) size);
stepSize.upload(&lastStepSizeFloat);
}
}
}
double IntegrationUtilities::getLastStepSize() {
if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
stepSize.download(&lastStepSize);
else {
mm_float2 lastStepSizeFloat;
stepSize.download(&lastStepSizeFloat);
lastStepSize = mm_double2(lastStepSizeFloat.x, lastStepSizeFloat.y);
}
return lastStepSize.y;
}
void IntegrationUtilities::applyConstraints(double tol) {
applyConstraintsImpl(false, tol);
}
void IntegrationUtilities::applyVelocityConstraints(double tol) {
applyConstraintsImpl(true, tol);
}
void IntegrationUtilities::computeVirtualSites() {
if (numVsites > 0)
vsitePositionKernel->execute(numVsites);
}
void IntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSeed) {
if (random.isInitialized()) {
if (randomNumberSeed != lastSeed)
throw OpenMMException("IntegrationUtilities::initRandomNumberGenerator(): Requested two different values for the random number seed");
return;
}
// Create the random number arrays.
lastSeed = randomNumberSeed;
random.initialize<mm_float4>(context, 4*context.getPaddedNumAtoms(), "random");
randomSeed.initialize<mm_int4>(context, context.getNumThreadBlocks()*64, "randomSeed");
randomPos = random.getSize();
randomKernel->addArg(random.getSize());
randomKernel->addArg(random);
randomKernel->addArg(randomSeed);
// Use a quick and dirty RNG to pick seeds for the real random number generator.
vector<mm_int4> seed(randomSeed.getSize());
unsigned int r = randomNumberSeed;
if (r == 0)
r = (unsigned int) osrngseed(); // A seed of 0 means use a unique one
for (int i = 0; i < randomSeed.getSize(); i++) {
seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
}
randomSeed.upload(seed);
}
int IntegrationUtilities::prepareRandomNumbers(int numValues) {
if (randomPos+numValues <= random.getSize()) {
int oldPos = randomPos;
randomPos += numValues;
return oldPos;
}
if (numValues > random.getSize()) {
random.resize(numValues);
randomKernel->setArg(0, numValues);
}
randomKernel->execute(random.getSize(), 64);
randomPos = numValues;
return 0;
}
void IntegrationUtilities::createCheckpoint(ostream& stream) {
int numChains = noseHooverChainState.size();
bool useDouble = context.getUseDoublePrecision() || context.getUseMixedPrecision();
stream.write((char*) &numChains, sizeof(int));
for (auto &chainState: noseHooverChainState){
int chainID = chainState.first;
int chainLength = chainState.second.getSize();
stream.write((char*) &chainID, sizeof(int));
stream.write((char*) &chainLength, sizeof(int));
if (useDouble) {
vector<mm_double2> stateVec;
chainState.second.download(stateVec);
stream.write((char*) stateVec.data(), sizeof(mm_double2)*chainLength);
}
else {
vector<mm_float2> stateVec;
chainState.second.download(stateVec);
stream.write((char*) stateVec.data(), sizeof(mm_float2)*chainLength);
}
}
if (!random.isInitialized())
return;
stream.write((char*) &randomPos, sizeof(int));
vector<mm_float4> randomVec;
random.download(randomVec);
stream.write((char*) &randomVec[0], sizeof(mm_float4)*random.getSize());
vector<mm_int4> randomSeedVec;
randomSeed.download(randomSeedVec);
stream.write((char*) &randomSeedVec[0], sizeof(mm_int4)*randomSeed.getSize());
}
void IntegrationUtilities::loadCheckpoint(istream& stream) {
int numChains;
bool useDouble = context.getUseDoublePrecision() || context.getUseMixedPrecision();
stream.read((char*) &numChains, sizeof(int));
noseHooverChainState.clear();
for (int i = 0; i < numChains; i++) {
int chainID, chainLength;
stream.read((char*) &chainID, sizeof(int));
stream.read((char*) &chainLength, sizeof(int));
if (useDouble) {
noseHooverChainState[chainID] = ComputeArray();
noseHooverChainState[chainID].initialize<mm_double2>(context, chainLength, "chainState" + to_string(chainID));
vector<mm_double2> stateVec(chainLength);
stream.read((char*) &stateVec[0], sizeof(mm_double2)*chainLength);
noseHooverChainState[chainID].upload(stateVec);
}
else {
noseHooverChainState[chainID] = ComputeArray();
noseHooverChainState[chainID].initialize<mm_float2>(context, chainLength, "chainState" + to_string(chainID));
vector<mm_float2> stateVec(chainLength);
stream.read((char*) &stateVec[0], sizeof(mm_float2)*chainLength);
noseHooverChainState[chainID].upload(stateVec);
}
}
if (!random.isInitialized())
return;
stream.read((char*) &randomPos, sizeof(int));
vector<mm_float4> randomVec(random.getSize());
stream.read((char*) &randomVec[0], sizeof(mm_float4)*random.getSize());
random.upload(randomVec);
vector<mm_int4> randomSeedVec(randomSeed.getSize());
stream.read((char*) &randomSeedVec[0], sizeof(mm_int4)*randomSeed.getSize());
randomSeed.upload(randomSeedVec);
}
double IntegrationUtilities::computeKineticEnergy(double timeShift) {
int numParticles = context.getNumAtoms();
if (timeShift != 0) {
// Copy the velocities into the posDelta array while we temporarily modify them.
context.getVelm().copyTo(posDelta);
// Apply the time shift.
timeShiftKernel->setArg(0, context.getVelm());
timeShiftKernel->setArg(1, context.getLongForceBuffer());
if (context.getUseDoublePrecision())
timeShiftKernel->setArg(2, timeShift);
else
timeShiftKernel->setArg(2, (float) timeShift);
timeShiftKernel->execute(numParticles);
applyConstraintsImpl(true, 1e-4);
}
// Compute the kinetic energy.
double energy = 0.0;
if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) {
vector<mm_double4> velm;
context.getVelm().download(velm);
for (int i = 0; i < numParticles; i++) {
mm_double4 v = velm[i];
if (v.w != 0)
energy += (v.x*v.x+v.y*v.y+v.z*v.z)/v.w;
}
}
else {
vector<mm_float4> velm;
context.getVelm().download(velm);
for (int i = 0; i < numParticles; i++) {
mm_float4 v = velm[i];
if (v.w != 0)
energy += (v.x*v.x+v.y*v.y+v.z*v.z)/v.w;
}
}
// Restore the velocities.
if (timeShift != 0)
posDelta.copyTo(context.getVelm());
return 0.5*energy;
}
......@@ -2,11 +2,11 @@
* Apply the Andersen thermostat to adjust particle velocities.
*/
extern "C" __global__ void applyAndersenThermostat(int numAtoms, float collisionFrequency, float kT, mixed4* velm, const mixed4* __restrict__ stepSize, const float4* __restrict__ random,
unsigned int randomIndex, const int* __restrict__ atomGroups) {
float collisionProbability = 1.0f-expf(-(float) (collisionFrequency*stepSize[0].y));
float randomRange = erff(collisionProbability/sqrtf(2.0f));
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
KERNEL void applyAndersenThermostat(int numAtoms, float collisionFrequency, float kT, GLOBAL mixed4* velm, real stepSize, GLOBAL const float4* RESTRICT random,
unsigned int randomIndex, GLOBAL const int* RESTRICT atomGroups) {
float collisionProbability = (float) (1-EXP(-collisionFrequency*stepSize));
float randomRange = (float) erf(collisionProbability/SQRT(2.0f));
for (int index = GLOBAL_ID; index < numAtoms; index += GLOBAL_SIZE) {
mixed4 velocity = velm[index];
float4 selectRand = random[randomIndex+atomGroups[index]];
float4 velRand = random[randomIndex+index];
......
real3 v0 = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
real3 v1 = make_real3(pos2.x-pos3.x, pos2.y-pos3.y, pos2.z-pos3.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0)
APPLY_PERIODIC_TO_DELTA(v1)
#endif
real3 cp = cross(v0, v1);
real rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z;
rp = max(SQRT(rp), (real) 1.0e-06f);
real r21 = v0.x*v0.x + v0.y*v0.y + v0.z*v0.z;
real r23 = v1.x*v1.x + v1.y*v1.y + v1.z*v1.z;
real dot = v0.x*v1.x + v0.y*v1.y + v0.z*v1.z;
real cosine = min(max(dot*RSQRT(r21*r23), (real) -1), (real) 1);
real theta = ACOS(cosine);
COMPUTE_FORCE
real3 force1 = cross(v0, cp)*(dEdAngle/(r21*rp));
real3 force3 = cross(cp, v1)*(dEdAngle/(r23*rp));
real3 force2 = -force1-force3;
......@@ -4,11 +4,11 @@ enum {VelScale, NoiseScale};
* Perform the first part of BAOAB integration: velocity half step, then position half step.
*/
extern "C" __global__ void integrateBAOABPart1(int numAtoms, int paddedNumAtoms, mixed4* __restrict__ velm, const long long* __restrict__ force, mixed4* __restrict__ posDelta,
mixed4* __restrict__ oldDelta, const mixed2* __restrict__ dt) {
KERNEL void integrateBAOABPart1(int numAtoms, int paddedNumAtoms, GLOBAL mixed4* RESTRICT velm, GLOBAL const mm_long* RESTRICT force, GLOBAL mixed4* RESTRICT posDelta,
GLOBAL mixed4* RESTRICT oldDelta, GLOBAL const mixed2* RESTRICT dt) {
mixed halfdt = 0.5*dt[0].y;
mixed fscale = halfdt/(mixed) 0x100000000;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
for (int index = GLOBAL_ID; index < numAtoms; index += GLOBAL_SIZE) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
velocity.x += fscale*velocity.w*force[index];
......@@ -27,13 +27,17 @@ extern "C" __global__ void integrateBAOABPart1(int numAtoms, int paddedNumAtoms,
* then position half step.
*/
extern "C" __global__ void integrateBAOABPart2(int numAtoms, real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ velm, mixed4* __restrict__ posDelta,
mixed4* __restrict__ oldDelta, const mixed* __restrict__ paramBuffer, const mixed2* __restrict__ dt, const float4* __restrict__ random, unsigned int randomIndex) {
KERNEL void integrateBAOABPart2(int numAtoms, GLOBAL real4* RESTRICT posq, GLOBAL mixed4* RESTRICT velm, GLOBAL mixed4* RESTRICT posDelta,
GLOBAL mixed4* RESTRICT oldDelta, GLOBAL const mixed* RESTRICT paramBuffer, GLOBAL const mixed2* RESTRICT dt, GLOBAL const float4* RESTRICT random, unsigned int randomIndex
#ifdef USE_MIXED_PRECISION
, GLOBAL real4* RESTRICT posqCorrection
#endif
) {
mixed vscale = paramBuffer[VelScale];
mixed noisescale = paramBuffer[NoiseScale];
mixed halfdt = 0.5*dt[0].y;
mixed invHalfdt = 1/halfdt;
int index = blockIdx.x*blockDim.x+threadIdx.x;
int index = GLOBAL_ID;
randomIndex += index;
while (index < numAtoms) {
mixed4 velocity = velm[index];
......@@ -67,8 +71,8 @@ extern "C" __global__ void integrateBAOABPart2(int numAtoms, real4* __restrict__
posDelta[index] = delta;
oldDelta[index] = delta;
}
randomIndex += blockDim.x*gridDim.x;
index += blockDim.x*gridDim.x;
randomIndex += GLOBAL_SIZE;
index += GLOBAL_SIZE;
}
}
......@@ -77,11 +81,15 @@ extern "C" __global__ void integrateBAOABPart2(int numAtoms, real4* __restrict__
* the constrained positions in preparation for computing forces.
*/
extern "C" __global__ void integrateBAOABPart3(int numAtoms, real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ velm,
mixed4* __restrict__ posDelta, mixed4* __restrict__ oldDelta, const mixed2* __restrict__ dt) {
KERNEL void integrateBAOABPart3(int numAtoms, GLOBAL real4* RESTRICT posq, GLOBAL mixed4* RESTRICT velm,
GLOBAL mixed4* RESTRICT posDelta, GLOBAL mixed4* RESTRICT oldDelta, GLOBAL const mixed2* RESTRICT dt
#ifdef USE_MIXED_PRECISION
, GLOBAL real4* RESTRICT posqCorrection
#endif
) {
mixed halfdt = 0.5*dt[0].y;
mixed invHalfdt = 1/halfdt;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
for (int index = GLOBAL_ID; index < numAtoms; index += GLOBAL_SIZE) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
mixed4 delta = posDelta[index];
......@@ -113,11 +121,11 @@ extern "C" __global__ void integrateBAOABPart3(int numAtoms, real4* __restrict__
* Perform the fourth part of BAOAB integration: velocity half step.
*/
extern "C" __global__ void integrateBAOABPart4(int numAtoms, int paddedNumAtoms, mixed4* __restrict__ velm,
const long long* __restrict__ force, const mixed2* __restrict__ dt) {
KERNEL void integrateBAOABPart4(int numAtoms, int paddedNumAtoms, GLOBAL mixed4* RESTRICT velm,
GLOBAL const mm_long* RESTRICT force, GLOBAL const mixed2* RESTRICT dt) {
mixed halfdt = 0.5*dt[0].y;
mixed fscale = halfdt/(mixed) 0x100000000;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
for (int index = GLOBAL_ID; index < numAtoms; index += GLOBAL_SIZE) {
mixed4 velocity = velm[index];
if (velocity.w != 0.0) {
velocity.x += fscale*velocity.w*force[index];
......
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
COMPUTE_FORCE
dEdR = (r > 0) ? (dEdR / r) : 0;
delta *= dEdR;
real3 force1 = delta;
real3 force2 = -delta;
......@@ -2,18 +2,18 @@
* Perform the first step of Brownian integration.
*/
extern "C" __global__ void integrateBrownianPart1(int numAtoms, int paddedNumAtoms, mixed tauDeltaT, mixed noiseAmplitude, const long long* __restrict__ force,
mixed4* __restrict__ posDelta, const mixed4* __restrict__ velm, const float4* __restrict__ random, unsigned int randomIndex) {
randomIndex += blockIdx.x*blockDim.x+threadIdx.x;
KERNEL void integrateBrownianPart1(int numAtoms, int paddedNumAtoms, mixed tauDeltaT, mixed noiseAmplitude, GLOBAL const mm_long* RESTRICT force,
GLOBAL mixed4* RESTRICT posDelta, GLOBAL const mixed4* RESTRICT velm, GLOBAL const float4* RESTRICT random, unsigned int randomIndex) {
randomIndex += GLOBAL_ID;
const mixed fscale = tauDeltaT/(mixed) 0x100000000;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
for (int index = GLOBAL_ID; index < numAtoms; index += GLOBAL_SIZE) {
mixed invMass = velm[index].w;
if (invMass != 0) {
posDelta[index].x = fscale*invMass*force[index] + noiseAmplitude*SQRT(invMass)*random[randomIndex].x;
posDelta[index].y = fscale*invMass*force[index+paddedNumAtoms] + noiseAmplitude*SQRT(invMass)*random[randomIndex].y;
posDelta[index].z = fscale*invMass*force[index+paddedNumAtoms*2] + noiseAmplitude*SQRT(invMass)*random[randomIndex].z;
}
randomIndex += blockDim.x*gridDim.x;
randomIndex += GLOBAL_SIZE;
}
}
......@@ -21,9 +21,12 @@ extern "C" __global__ void integrateBrownianPart1(int numAtoms, int paddedNumAto
* Perform the second step of Brownian integration.
*/
extern "C" __global__ void integrateBrownianPart2(int numAtoms, mixed deltaT, real4* posq, real4* __restrict__ posqCorrection, mixed4* velm, const mixed4* __restrict__ posDelta) {
const mixed oneOverDeltaT = RECIP(deltaT);
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < numAtoms; index += blockDim.x*gridDim.x) {
KERNEL void integrateBrownianPart2(int numAtoms, mixed oneOverDeltaT, GLOBAL real4* posq, GLOBAL mixed4* velm, GLOBAL const mixed4* RESTRICT posDelta
#ifdef USE_MIXED_PRECISION
, GLOBAL real4* RESTRICT posqCorrection
#endif
) {
for (int index = GLOBAL_ID; index < numAtoms; index += GLOBAL_SIZE) {
if (velm[index].w != 0) {
mixed4 delta = posDelta[index];
velm[index].x = oneOverDeltaT*delta.x;
......
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
/**
* Compute the center of each group.
*/
__kernel void computeGroupCenters(__global const real4* restrict posq, __global const int* restrict groupParticles,
__global const real* restrict groupWeights, __global const int* restrict groupOffsets, __global real4* restrict centerPositions) {
__local volatile real3 temp[64];
for (int group = get_group_id(0); group < NUM_GROUPS; group += get_num_groups(0)) {
KERNEL void computeGroupCenters(int numParticleGroups, GLOBAL const real4* RESTRICT posq, GLOBAL const int* RESTRICT groupParticles,
GLOBAL const real* RESTRICT groupWeights, GLOBAL const int* RESTRICT groupOffsets, GLOBAL real4* RESTRICT centerPositions) {
LOCAL volatile real3 temp[64];
for (int group = GROUP_ID; group < numParticleGroups; group += NUM_GROUPS) {
// The threads in this block work together to compute the center one group.
int firstIndex = groupOffsets[group];
int lastIndex = groupOffsets[group+1];
real3 center = (real3) 0;
for (int index = get_local_id(0); index < lastIndex-firstIndex; index += get_local_size(0)) {
real3 center = make_real3(0);
for (int index = LOCAL_ID; index < lastIndex-firstIndex; index += LOCAL_SIZE) {
int atom = groupParticles[firstIndex+index];
real weight = groupWeights[firstIndex+index];
real4 pos = posq[atom];
......@@ -23,18 +21,16 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global
// Sum the values.
int thread = get_local_id(0);
int thread = LOCAL_ID;
temp[thread].x = center.x;
temp[thread].y = center.y;
temp[thread].z = center.z;
barrier(CLK_LOCAL_MEM_FENCE);
SYNC_THREADS;
if (thread < 32) {
temp[thread].x += temp[thread+32].x;
temp[thread].y += temp[thread+32].y;
temp[thread].z += temp[thread+32].z;
}
SYNC_WARPS;
if (thread < 16) {
temp[thread].x += temp[thread+16].x;
......@@ -47,7 +43,6 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global
temp[thread].y += temp[thread+8].y;
temp[thread].z += temp[thread+8].z;
}
SYNC_WARPS;
if (thread < 4) {
temp[thread].x += temp[thread+4].x;
......@@ -60,19 +55,18 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global
temp[thread].y += temp[thread+2].y;
temp[thread].z += temp[thread+2].z;
}
SYNC_WARPS;
if (thread == 0)
centerPositions[group] = (real4) (temp[0].x+temp[1].x, temp[0].y+temp[1].y, temp[0].z+temp[1].z, 0);
centerPositions[group] = make_real4(temp[0].x+temp[1].x, temp[0].y+temp[1].y, temp[0].z+temp[1].z, 0);
}
}
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
real4 delta(real4 vec1, real4 vec2, bool periodic, real4 periodicBoxSize, real4 invPeriodicBoxSize,
DEVICE real4 delta(real4 vec1, real4 vec2, bool periodic, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
if (periodic)
APPLY_PERIODIC_TO_DELTA(result);
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
......@@ -82,65 +76,64 @@ real4 delta(real4 vec1, real4 vec2, bool periodic, real4 periodicBoxSize, real4
/**
* Compute the angle between two vectors. The w component of each vector should contain the squared magnitude.
*/
real computeAngle(real4 vec1, real4 vec2) {
DEVICE real computeAngle(real4 vec1, real4 vec2) {
real dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
real cosine = dotProduct*RSQRT(vec1.w*vec2.w);
real angle;
if (cosine > 0.99f || cosine < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real4 crossProduct = cross(vec1, vec2);
real3 crossProduct = cross(trimTo3(vec1), trimTo3(vec2));
real scale = vec1.w*vec2.w;
angle = asin(SQRT(dot(crossProduct, crossProduct)/scale));
angle = ASIN(SQRT(dot(crossProduct, crossProduct)/scale));
if (cosine < 0)
angle = M_PI-angle;
}
else
angle = acos(cosine);
angle = ACOS(cosine);
return angle;
}
/**
* Compute the cross product of two vectors, setting the fourth component to the squared magnitude.
*/
real4 computeCross(real4 vec1, real4 vec2) {
real4 result = cross(vec1, vec2);
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
DEVICE real4 computeCross(real4 vec1, real4 vec2) {
real3 cp = cross(trimTo3(vec1), trimTo3(vec2));
return make_real4(cp.x, cp.y, cp.z, cp.x*cp.x+cp.y*cp.y+cp.z*cp.z);
}
/**
* Compute the forces on groups based on the bonds.
*/
__kernel void computeGroupForces(__global long* restrict groupForce, __global mixed* restrict energyBuffer, __global const real4* restrict centerPositions,
__global const int* restrict bondGroups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
KERNEL void computeGroupForces(int numParticleGroups, GLOBAL mm_ulong* RESTRICT groupForce, GLOBAL mixed* RESTRICT energyBuffer, GLOBAL const real4* RESTRICT centerPositions,
GLOBAL const int* RESTRICT bondGroups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
EXTRA_ARGS) {
mixed energy = 0;
INIT_PARAM_DERIVS
for (int index = get_global_id(0); index < NUM_BONDS; index += get_global_size(0)) {
for (int index = GLOBAL_ID; index < NUM_BONDS; index += GLOBAL_SIZE) {
COMPUTE_FORCE
}
energyBuffer[get_global_id(0)] += energy;
energyBuffer[GLOBAL_ID] += energy;
SAVE_PARAM_DERIVS
}
/**
* Apply the forces from the group centers to the individual atoms.
*/
__kernel void applyForcesToAtoms(__global const int* restrict groupParticles, __global const real* restrict groupWeights, __global const int* restrict groupOffsets,
__global const long* restrict groupForce, __global long* restrict atomForce) {
for (int group = get_group_id(0); group < NUM_GROUPS; group += get_num_groups(0)) {
long fx = groupForce[group];
long fy = groupForce[group+NUM_GROUPS];
long fz = groupForce[group+NUM_GROUPS*2];
KERNEL void applyForcesToAtoms(int numParticleGroups, GLOBAL const int* RESTRICT groupParticles, GLOBAL const real* RESTRICT groupWeights, GLOBAL const int* RESTRICT groupOffsets,
GLOBAL const mm_long* RESTRICT groupForce, GLOBAL mm_ulong* RESTRICT atomForce) {
for (int group = GROUP_ID; group < numParticleGroups; group += NUM_GROUPS) {
mm_long fx = groupForce[group];
mm_long fy = groupForce[group+numParticleGroups];
mm_long fz = groupForce[group+numParticleGroups*2];
int firstIndex = groupOffsets[group];
int lastIndex = groupOffsets[group+1];
for (int index = get_local_id(0); index < lastIndex-firstIndex; index += get_local_size(0)) {
for (int index = LOCAL_ID; index < lastIndex-firstIndex; index += LOCAL_SIZE) {
int atom = groupParticles[firstIndex+index];
real weight = groupWeights[firstIndex+index];
atom_add(&atomForce[atom], (long) (fx*weight));
atom_add(&atomForce[atom+PADDED_NUM_ATOMS], (long) (fy*weight));
atom_add(&atomForce[atom+2*PADDED_NUM_ATOMS], (long) (fz*weight));
ATOMIC_ADD(&atomForce[atom], (mm_ulong) ((mm_long) (fx*weight)));
ATOMIC_ADD(&atomForce[atom+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (fy*weight)));
ATOMIC_ADD(&atomForce[atom+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (fz*weight)));
}
}
}
/**
* Convert a real4 to a real3 by removing its last element.
*/
inline __device__ real3 ccb_trim(real4 v) {
return make_real3(v.x, v.y, v.z);
}
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
inline __device__ real4 ccb_delta(real4 vec1, real4 vec2, bool periodic, real4 periodicBoxSize, real4 invPeriodicBoxSize,
DEVICE real4 ccb_delta(real4 vec1, real4 vec2, bool periodic, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
if (periodic)
......@@ -20,17 +13,17 @@ inline __device__ real4 ccb_delta(real4 vec1, real4 vec2, bool periodic, real4 p
/**
* Compute the angle between two vectors. The w component of each vector should contain the squared magnitude.
*/
__device__ real ccb_computeAngle(real4 vec1, real4 vec2) {
DEVICE real ccb_computeAngle(real4 vec1, real4 vec2) {
real dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
real cosine = dotProduct*RSQRT(vec1.w*vec2.w);
real angle;
if (cosine > 0.99f || cosine < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real3 crossProduct = cross(vec1, vec2);
real3 crossProduct = cross(trimTo3(vec1), trimTo3(vec2));
real scale = vec1.w*vec2.w;
angle = ASIN(SQRT(dot(crossProduct, crossProduct)/scale));
if (cosine < 0.0f)
if (cosine < 0)
angle = M_PI-angle;
}
else
......@@ -41,7 +34,8 @@ __device__ real ccb_computeAngle(real4 vec1, real4 vec2) {
/**
* Compute the cross product of two vectors, setting the fourth component to the squared magnitude.
*/
inline __device__ real4 ccb_computeCross(real4 vec1, real4 vec2) {
real3 cp = cross(vec1, vec2);
DEVICE real4 ccb_computeCross(real4 vec1, real4 vec2) {
real3 cp = cross(trimTo3(vec1), trimTo3(vec2));
return make_real4(cp.x, cp.y, cp.z, cp.x*cp.x+cp.y*cp.y+cp.z*cp.z);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment