Unverified Commit db4cefd4 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Vectorize nonbonded interactions with no cutoff (#3575)

* Vectorize NonbondedForce with no cutoff

* Vectorize CustomNonbondedForce with no cutoff

* Memory efficient dense neighbor list

* Fixed errors
parent 11a1982a
...@@ -48,7 +48,7 @@ public: ...@@ -48,7 +48,7 @@ public:
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
CpuCustomNonbondedForce(ThreadPool& threads); CpuCustomNonbondedForce(ThreadPool& threads, const CpuNeighborList& neighbors);
void initialize(const Lepton::ParsedExpression& energyExpression, const Lepton::ParsedExpression& forceExpression, void initialize(const Lepton::ParsedExpression& energyExpression, const Lepton::ParsedExpression& forceExpression,
const std::vector<std::string>& parameterNames, const std::vector<std::set<int> >& exclusions, const std::vector<std::string>& parameterNames, const std::vector<std::set<int> >& exclusions,
...@@ -68,11 +68,10 @@ public: ...@@ -68,11 +68,10 @@ public:
Set the force to use a cutoff. Set the force to use a cutoff.
@param distance the cutoff distance @param distance the cutoff distance
@param neighbors the neighbor list to use
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void setUseCutoff(double distance, const CpuNeighborList& neighbors); void setUseCutoff(double distance);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -212,7 +211,7 @@ public: ...@@ -212,7 +211,7 @@ public:
/** /**
* This function is called to create an instance of an appropriate subclass for the current CPU. * This function is called to create an instance of an appropriate subclass for the current CPU.
*/ */
CpuCustomNonbondedForce* createCpuCustomNonbondedForce(ThreadPool& threads); CpuCustomNonbondedForce* createCpuCustomNonbondedForce(ThreadPool& threads, const CpuNeighborList& neighbors);
} // namespace OpenMM } // namespace OpenMM
......
...@@ -29,12 +29,12 @@ ...@@ -29,12 +29,12 @@
namespace OpenMM { namespace OpenMM {
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic}; enum PeriodicType {NoCutoff, NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
template <typename FVEC, int BLOCK_SIZE> template <typename FVEC, int BLOCK_SIZE>
class CpuCustomNonbondedForceFvec : public CpuCustomNonbondedForce { class CpuCustomNonbondedForceFvec : public CpuCustomNonbondedForce {
public: public:
CpuCustomNonbondedForceFvec(ThreadPool& threads) : CpuCustomNonbondedForce(threads) { CpuCustomNonbondedForceFvec(ThreadPool& threads, const CpuNeighborList& neighbors) : CpuCustomNonbondedForce(threads, neighbors) {
} }
protected: protected:
...@@ -101,7 +101,9 @@ void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::calculateBlockIxn(ThreadData ...@@ -101,7 +101,9 @@ void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::calculateBlockIxn(ThreadData
// Call the appropriate version depending on what calculation is required for periodic boundary conditions. // Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic) if (!cutoff)
calculateBlockIxnImpl<NoCutoff>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == NoPeriodic)
calculateBlockIxnImpl<NoPeriodic>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter); calculateBlockIxnImpl<NoPeriodic>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom) else if (periodicType == PeriodicPerAtom)
calculateBlockIxnImpl<PeriodicPerAtom>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter); calculateBlockIxnImpl<PeriodicPerAtom>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
...@@ -137,13 +139,12 @@ void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::calculateBlockIxnImpl(Thread ...@@ -137,13 +139,12 @@ void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::calculateBlockIxnImpl(Thread
// Loop over neighbors for this block. // Loop over neighbors for this block.
const auto& neighbors = neighborList->getBlockNeighbors(blockIndex); CpuNeighborList::NeighborIterator neighbors = neighborList->getNeighborIterator(blockIndex);
const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
FVEC partialEnergy = {}; FVEC partialEnergy = {};
for (int i = 0; i < neighbors.size(); i++) { while (neighbors.next()) {
// Load the next neighbor. // Load the next neighbor.
int atom = neighbors[i]; int atom = neighbors.getNeighbor();
for (int j = 0; j < numParams; j++) for (int j = 0; j < numParams; j++)
for (int k = 0; k < BLOCK_SIZE; k++) for (int k = 0; k < BLOCK_SIZE; k++)
data.vecParticle2Params[j*BLOCK_SIZE+k] = atomParameters[atom][j]; data.vecParticle2Params[j*BLOCK_SIZE+k] = atomParameters[atom][j];
...@@ -158,8 +159,9 @@ void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::calculateBlockIxnImpl(Thread ...@@ -158,8 +159,9 @@ void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::calculateBlockIxnImpl(Thread
if (PERIODIC_TYPE == PeriodicPerAtom) if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize; atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, boxSize, invBoxSize); getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, boxSize, invBoxSize);
const auto exclNotMask = FVEC::expandBitsToMask(~exclusions[i]); auto include = FVEC::expandBitsToMask(~neighbors.getExclusions());
const auto include = blendZero(r2 < cutoffDistanceSquared, exclNotMask); if (PERIODIC_TYPE != NoCutoff)
include = blendZero(r2 < cutoffDistanceSquared, include);
if (!any(include)) if (!any(include))
continue; // No interactions to compute. continue; // No interactions to compute.
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. * * Portions copyright (c) 2013-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -46,11 +46,34 @@ namespace OpenMM { ...@@ -46,11 +46,34 @@ namespace OpenMM {
class OPENMM_EXPORT_CPU CpuNeighborList { class OPENMM_EXPORT_CPU CpuNeighborList {
public: public:
class Voxels; class Voxels;
class NeighborIterator;
CpuNeighborList(int blockSize); CpuNeighborList(int blockSize);
/**
* Compute the neighbor list based on the current positions of atoms.
*
* @param numAtoms the number of atoms in the system
* @param atomLocations the positions of the atoms
* @param exclusions exclusions[i] contains the indices of all atoms with which atom i should not interact
* @param periodicBoxVectors the current periodic box vectors
* @param usePeriodic whether to apply periodic boundary conditions
* @param maxDistance the neighbor list will contain all pairs that are within this distance of each other
* @param threads used for parallelization
*/
void computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const std::vector<std::set<int> >& exclusions, void computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const std::vector<std::set<int> >& exclusions,
const Vec3* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads); const Vec3* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads);
/**
* Build a dense neighbor list, in which every atom interacts with every other (except exclusions), regardless of distance.
*
* @param numAtoms the number of atoms in the system
* @param exclusions exclusions[i] contains the indices of all atoms with which atom i should not interact
*/
void createDenseNeighborList(int numAtoms, const std::vector<std::set<int> >& exclusions);
int getNumBlocks() const; int getNumBlocks() const;
int getBlockSize() const; int getBlockSize() const;
/**
* Get an object for iterating over the neighbors of an atom block.
*/
NeighborIterator getNeighborIterator(int blockIndex) const;
const std::vector<int32_t>& getSortedAtoms() const; const std::vector<int32_t>& getSortedAtoms() const;
const std::vector<int>& getBlockNeighbors(int blockIndex) const; const std::vector<int>& getBlockNeighbors(int blockIndex) const;
...@@ -71,7 +94,7 @@ private: ...@@ -71,7 +94,7 @@ private:
int blockSize; int blockSize;
std::vector<int> sortedAtoms; std::vector<int> sortedAtoms;
std::vector<float> sortedPositions; std::vector<float> sortedPositions;
std::vector<std::vector<int> > blockNeighbors; std::vector<std::vector<int> > blockNeighbors, blockExclusionIndices;
std::vector<std::vector<BlockExclusionMask> > blockExclusions; std::vector<std::vector<BlockExclusionMask> > blockExclusions;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
float minx, maxx, miny, maxy, minz, maxz; float minx, maxx, miny, maxy, minz, maxz;
...@@ -81,11 +104,46 @@ private: ...@@ -81,11 +104,46 @@ private:
const float* atomLocations; const float* atomLocations;
Vec3 periodicBoxVectors[3]; Vec3 periodicBoxVectors[3];
int numAtoms; int numAtoms;
bool usePeriodic; bool usePeriodic, dense;
float maxDistance; float maxDistance;
std::atomic<int> atomicCounter; std::atomic<int> atomicCounter;
}; };
class OPENMM_EXPORT_CPU CpuNeighborList::NeighborIterator {
public:
/**
* This constructor is used for standard neighbor lists. Do not call it directly. Obtain a
* NeighborIterator by calling getNeighborIterator() on a neighbor list.
*/
NeighborIterator(const std::vector<int>& neighbors, const std::vector<BlockExclusionMask>& exclusions);
/**
* This constructor is used for dense neighbor lists. Do not call it directly. Obtain a
* NeighborIterator by calling getNeighborIterator() on a neighbor list.
*/
NeighborIterator(int firstAtom, int lastAtom, const std::vector<int>& exclusionIndices, const std::vector<BlockExclusionMask>& exclusions);
/**
* Advance the iterator to the next neighbor.
*
* @return false if there are no more neighbors, true otherwise.
*/
bool next();
/**
* Get the index of the current neighbor.
*/
int getNeighbor() const;
/**
* Get bit flags marking which atoms in the block the current atom is excluded from interacting with.
*/
BlockExclusionMask getExclusions() const;
private:
bool dense;
int currentAtom, currentIndex, lastAtom;
BlockExclusionMask currentExclusions;
const std::vector<int>* neighbors;
const std::vector<int>* exclusionIndices;
const std::vector<BlockExclusionMask>* exclusions;
};
} // namespace OpenMM } // namespace OpenMM
#endif // OPENMM_CPU_NEIGHBORLIST_H_ #endif // OPENMM_CPU_NEIGHBORLIST_H_
...@@ -44,10 +44,12 @@ class CpuNonbondedForce { ...@@ -44,10 +44,12 @@ class CpuNonbondedForce {
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Constructor Constructor
@param neighbors the neighbor list to use
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
CpuNonbondedForce(); CpuNonbondedForce(const CpuNeighborList& neighbors);
/** /**
* Virtual destructor. * Virtual destructor.
...@@ -60,12 +62,11 @@ class CpuNonbondedForce { ...@@ -60,12 +62,11 @@ class CpuNonbondedForce {
Set the force to use a cutoff. Set the force to use a cutoff.
@param distance the cutoff distance @param distance the cutoff distance
@param neighbors the neighbor list to use
@param solventDielectric the dielectric constant of the bulk solvent @param solventDielectric the dielectric constant of the bulk solvent
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void setUseCutoff(float distance, const CpuNeighborList& neighbors, float solventDielectric); void setUseCutoff(float distance, float solventDielectric);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -213,19 +214,6 @@ protected: ...@@ -213,19 +214,6 @@ protected:
static const float TWO_OVER_SQRT_PI; static const float TWO_OVER_SQRT_PI;
static const int NUM_TABLE_POINTS; static const int NUM_TABLE_POINTS;
/**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn between two atoms
@param atom1 the index of the first atom
@param atom2 the index of the second atom
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void calculateOneIxn(int atom1, int atom2, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
namespace OpenMM { namespace OpenMM {
enum BlockType {EWALD, NON_EWALD}; // :TODO: Better name for non-ewald. enum BlockType {EWALD, NON_EWALD}; // :TODO: Better name for non-ewald.
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic}; enum PeriodicType {NoCutoff, NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
/** /**
* Generic SIMD implementation of CpuNonbondedForce. The templating allows the same * Generic SIMD implementation of CpuNonbondedForce. The templating allows the same
...@@ -50,6 +50,10 @@ public: ...@@ -50,6 +50,10 @@ public:
* Store how many elements are contained in each block of atoms. * Store how many elements are contained in each block of atoms.
*/ */
static constexpr int blockSize = sizeof(FVEC) / sizeof(float); static constexpr int blockSize = sizeof(FVEC) / sizeof(float);
/**
* Constructor.
*/
CpuNonbondedForceFvec(const CpuNeighborList& neighbors);
protected: protected:
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -95,6 +99,10 @@ protected: ...@@ -95,6 +99,10 @@ protected:
}; };
template <typename FVEC>
CpuNonbondedForceFvec<FVEC>::CpuNonbondedForceFvec(const CpuNeighborList& neighbors) : CpuNonbondedForce(neighbors) {
}
/** /**
* Use a table lookup to approximate a function specific function. * Use a table lookup to approximate a function specific function.
*/ */
...@@ -166,7 +174,9 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnHandler(int blockIndex, float ...@@ -166,7 +174,9 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnHandler(int blockIndex, float
} }
// Call the appropriate version depending on what calculation is required for periodic boundary conditions. // Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic) if (!cutoff)
calculateBlockIxnImpl<NoCutoff, BLOCK_TYPE>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == NoPeriodic)
calculateBlockIxnImpl<NoPeriodic, BLOCK_TYPE>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter); calculateBlockIxnImpl<NoPeriodic, BLOCK_TYPE>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom) else if (periodicType == PeriodicPerAtom)
calculateBlockIxnImpl<PeriodicPerAtom, BLOCK_TYPE>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter); calculateBlockIxnImpl<PeriodicPerAtom, BLOCK_TYPE>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
...@@ -181,7 +191,7 @@ template <int PERIODIC_TYPE, BlockType BLOCK_TYPE> ...@@ -181,7 +191,7 @@ template <int PERIODIC_TYPE, BlockType BLOCK_TYPE>
void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) { void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block. // Load the positions and parameters of the atoms in the block.
const int32_t* blockAtom = &neighborList->getSortedAtoms()[blockSize * blockIndex]; const int32_t* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
fvec4 blockAtomPosq[blockSize]; fvec4 blockAtomPosq[blockSize];
FVEC blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f); FVEC blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
FVEC blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge; FVEC blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge;
...@@ -198,8 +208,7 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f ...@@ -198,8 +208,7 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f
// the cycles are spent anyway. // the cycles are spent anyway.
FVEC blockAtomSigma = {}; FVEC blockAtomSigma = {};
FVEC blockAtomEpsilon = {}; FVEC blockAtomEpsilon = {};
for (int i=0; i<blockSize; ++i) for (int i = 0; i < blockSize; ++i) {
{
((float*)&blockAtomSigma)[i] = atomParameters[blockAtom[i]].first; ((float*)&blockAtomSigma)[i] = atomParameters[blockAtom[i]].first;
((float*)&blockAtomEpsilon)[i] = atomParameters[blockAtom[i]].second; ((float*)&blockAtomEpsilon)[i] = atomParameters[blockAtom[i]].second;
} }
...@@ -211,25 +220,23 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f ...@@ -211,25 +220,23 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f
const FVEC cutoffDistanceSquared = cutoffDistance * cutoffDistance; const FVEC cutoffDistanceSquared = cutoffDistance * cutoffDistance;
// Loop over neighbors for this block. // Loop over neighbors for this block.
const auto& neighbors = neighborList->getBlockNeighbors(blockIndex); CpuNeighborList::NeighborIterator neighbors = neighborList->getNeighborIterator(blockIndex);
const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
FVEC partialEnergy = {}; FVEC partialEnergy = {};
while (neighbors.next()) {
for (int i = 0; i < (int) neighbors.size(); i++) {
// Load the next neighbor. // Load the next neighbor.
int atom = neighbors[i]; int atom = neighbors.getNeighbor();
// Compute the distances to the block atoms. // Compute the distances to the block atoms.
FVEC dx, dy, dz, r2; FVEC dx, dy, dz, r2;
fvec4 atomPos(posq+4*atom); fvec4 atomPos(posq+4*atom);
if (PERIODIC_TYPE == PeriodicPerAtom) if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize; atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, boxSize, invBoxSize); getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, boxSize, invBoxSize);
auto include = FVEC::expandBitsToMask(~neighbors.getExclusions());
const auto exclNotMask = FVEC::expandBitsToMask(~exclusions[i]); if (PERIODIC_TYPE != NoCutoff)
const auto include = blendZero(r2 < cutoffDistanceSquared, exclNotMask); include = blendZero(r2 < cutoffDistanceSquared, include);
if (!any(include)) if (!any(include))
continue; // No interactions to compute. continue; // No interactions to compute.
...@@ -270,12 +277,10 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f ...@@ -270,12 +277,10 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f
dEdR = 0.0f; dEdR = 0.0f;
} }
const auto chargeProd = blockAtomCharge*posq[4*atom+3]; const auto chargeProd = blockAtomCharge*posq[4*atom+3];
if (BLOCK_TYPE == BlockType::EWALD) if (BLOCK_TYPE == BlockType::EWALD) {
{
dEdR += chargeProd*inverseR*approximateFunctionFromTable(ewaldScaleTable, r, FVEC(ewaldDXInv)); dEdR += chargeProd*inverseR*approximateFunctionFromTable(ewaldScaleTable, r, FVEC(ewaldDXInv));
} }
else else {
{
if (cutoff) if (cutoff)
dEdR += chargeProd*(inverseR-2.0f*krf*r2); dEdR += chargeProd*(inverseR-2.0f*krf*r2);
else else
...@@ -287,8 +292,7 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f ...@@ -287,8 +292,7 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f
if (totalEnergy) { if (totalEnergy) {
if (BLOCK_TYPE == BlockType::EWALD) if (BLOCK_TYPE == BlockType::EWALD)
energy += chargeProd*inverseR*approximateFunctionFromTable(erfcTable, alphaEwald*r, FVEC(erfcDXInv)); energy += chargeProd*inverseR*approximateFunctionFromTable(erfcTable, alphaEwald*r, FVEC(erfcDXInv));
else // Non-ewald. else { // Non-ewald.
{
if (cutoff) if (cutoff)
energy += chargeProd*(inverseR+krf*r2-crf); energy += chargeProd*(inverseR+krf*r2-crf);
else else
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. * * Portions copyright (c) 2013-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -91,6 +91,18 @@ class CpuPlatform::PlatformData { ...@@ -91,6 +91,18 @@ class CpuPlatform::PlatformData {
public: public:
PlatformData(int numParticles, int numThreads, bool deterministicForces); PlatformData(int numParticles, int numThreads, bool deterministicForces);
~PlatformData(); ~PlatformData();
/**
* Request that a neighbor list be built and maintained.
*
* @param cutoffDistance the cutoff distance for particle pairs to be included in the neighbor list.
* If this is 0, a dense neighbor list is built that includes all particle pairs
* regardless of distance.
* @param padding a padding distance that should be added to the cutoff so the neighbor list
* does not need to be rebuilt every step
* @param useExclusions whether to omit specific excluded interactions
* @param exclusionList if useExclusions is true, exclusionList[i] should contain the indices of all
* particles with which particle i should not interact
*/
void requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const std::vector<std::set<int> >& exclusionList); void requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const std::vector<std::set<int> >& exclusionList);
int requestPosqIndex(); int requestPosqIndex();
AlignedArray<float> posq; AlignedArray<float> posq;
...@@ -99,6 +111,7 @@ public: ...@@ -99,6 +111,7 @@ public:
bool isPeriodic; bool isPeriodic;
CpuRandom random; CpuRandom random;
std::map<std::string, std::string> propertyValues; std::map<std::string, std::string> propertyValues;
int numParticles;
CpuNeighborList* neighborList; CpuNeighborList* neighborList;
double cutoff, paddedCutoff; double cutoff, paddedCutoff;
bool anyExclusions, deterministicForces; bool anyExclusions, deterministicForces;
......
...@@ -94,7 +94,8 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const CompiledExpression& energy ...@@ -94,7 +94,8 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const CompiledExpression& energy
} }
} }
CpuCustomNonbondedForce::CpuCustomNonbondedForce(ThreadPool& threads) : cutoff(false), useSwitch(false), periodic(false), useInteractionGroups(false), threads(threads) { CpuCustomNonbondedForce::CpuCustomNonbondedForce(ThreadPool& threads, const CpuNeighborList& neighbors) : cutoff(false), useSwitch(false),
periodic(false), useInteractionGroups(false), threads(threads), neighborList(&neighbors) {
} }
void CpuCustomNonbondedForce::initialize(const ParsedExpression& energyExpression, void CpuCustomNonbondedForce::initialize(const ParsedExpression& energyExpression,
...@@ -123,10 +124,9 @@ CpuCustomNonbondedForce::~CpuCustomNonbondedForce() { ...@@ -123,10 +124,9 @@ CpuCustomNonbondedForce::~CpuCustomNonbondedForce() {
delete data; delete data;
} }
void CpuCustomNonbondedForce::setUseCutoff(double distance, const CpuNeighborList& neighbors) { void CpuCustomNonbondedForce::setUseCutoff(double distance) {
cutoff = true; cutoff = true;
cutoffDistance = distance; cutoffDistance = distance;
neighborList = &neighbors;
} }
void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, set<int> > >& groups) { void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, set<int> > >& groups) {
...@@ -280,28 +280,28 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread ...@@ -280,28 +280,28 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
calculateOneIxn(atom1, atom2, data, forces, energy, boxSize, invBoxSize); calculateOneIxn(atom1, atom2, data, forces, energy, boxSize, invBoxSize);
} }
} }
else if (cutoff) { else {
// We are using a cutoff, so get the interactions from the neighbor list. // Get the interactions from the neighbor list.
while (true) { while (true) {
int blockIndex = atomicCounter++; int blockIndex = atomicCounter++;
if (blockIndex >= neighborList->getNumBlocks()) if (blockIndex >= neighborList->getNumBlocks())
break; break;
const int blockSize = neighborList->getBlockSize();
const int32_t* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
if (data.energyParamDerivs.size() == 0) if (data.energyParamDerivs.size() == 0)
calculateBlockIxn(data, blockIndex, forces, energy, boxSize, invBoxSize); calculateBlockIxn(data, blockIndex, forces, energy, boxSize, invBoxSize);
else { else {
for (int i = 0; i < (int) neighbors.size(); i++) { const int blockSize = neighborList->getBlockSize();
int first = neighbors[i]; const int32_t* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
CpuNeighborList::NeighborIterator neighbors = neighborList->getNeighborIterator(blockIndex);
while (neighbors.next()) {
int first = neighbors.getNeighbor();
for (int j = 0; j < paramNames.size(); j++) for (int j = 0; j < paramNames.size(); j++)
data.particleParam[j*2] = atomParameters[first][j]; data.particleParam[j*2] = atomParameters[first][j];
for (int j = 0; j < computedValueNames.size(); j++) for (int j = 0; j < computedValueNames.size(); j++)
data.computedValues[j*2] = atomComputedValues[j][first]; data.computedValues[j*2] = atomComputedValues[j][first];
auto exclusions = neighbors.getExclusions();
for (int k = 0; k < blockSize; k++) { for (int k = 0; k < blockSize; k++) {
if ((exclusions[i] & (1<<k)) == 0) { if ((exclusions & (1<<k)) == 0) {
int second = blockAtom[k]; int second = blockAtom[k];
for (int j = 0; j < paramNames.size(); j++) for (int j = 0; j < paramNames.size(); j++)
data.particleParam[j*2+1] = atomParameters[second][j]; data.particleParam[j*2+1] = atomParameters[second][j];
...@@ -314,28 +314,6 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread ...@@ -314,28 +314,6 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
} }
} }
} }
else {
// Every particle interacts with every other one.
while (true) {
int ii = atomicCounter++;
if (ii >= numberOfAtoms)
break;
for (int jj = ii+1; jj < numberOfAtoms; jj++) {
if (exclusions[jj].find(ii) == exclusions[jj].end()) {
for (int j = 0; j < paramNames.size(); j++) {
data.particleParam[j*2] = atomParameters[ii][j];
data.particleParam[j*2+1] = atomParameters[jj][j];
}
for (int j = 0; j < computedValueNames.size(); j++) {
data.computedValues[j*2] = atomComputedValues[j][ii];
data.computedValues[j*2+1] = atomComputedValues[j][jj];
}
calculateOneIxn(ii, jj, data, forces, energy, boxSize, invBoxSize);
}
}
}
}
} }
void CpuCustomNonbondedForce::calculateOneIxn(int ii, int jj, ThreadData& data, void CpuCustomNonbondedForce::calculateOneIxn(int ii, int jj, ThreadData& data,
......
...@@ -25,15 +25,17 @@ ...@@ -25,15 +25,17 @@
#include "CpuCustomNonbondedForceFvec.h" #include "CpuCustomNonbondedForceFvec.h"
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
using namespace OpenMM;
#ifdef __AVX__ #ifdef __AVX__
#include "openmm/internal/vectorizeAvx.h" #include "openmm/internal/vectorizeAvx.h"
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(OpenMM::ThreadPool& threads) { CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(ThreadPool& threads, const CpuNeighborList& neighbors) {
return new OpenMM::CpuCustomNonbondedForceFvec<fvec8, 8>(threads); return new CpuCustomNonbondedForceFvec<fvec8, 8>(threads, neighbors);
} }
#else #else
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(OpenMM::ThreadPool& threads) { CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(ThreadPool& threads, const CpuNeighborList& neighbors) {
throw OpenMM::OpenMMException("Internal error: OpenMM was compiled without AVX support"); throw OpenMMException("Internal error: OpenMM was compiled without AVX support");
} }
#endif #endif
...@@ -25,12 +25,14 @@ ...@@ -25,12 +25,14 @@
#include "CpuCustomNonbondedForceFvec.h" #include "CpuCustomNonbondedForceFvec.h"
#include "openmm/internal/hardware.h" #include "openmm/internal/hardware.h"
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceVec4(OpenMM::ThreadPool& threads); using namespace OpenMM;
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(OpenMM::ThreadPool& threads);
OpenMM::CpuCustomNonbondedForce* OpenMM::createCpuCustomNonbondedForce(OpenMM::ThreadPool& threads) { CpuCustomNonbondedForce* createCpuCustomNonbondedForceVec4(ThreadPool& threads, const CpuNeighborList& neighbors);
CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(ThreadPool& threads, const CpuNeighborList& neighbors);
CpuCustomNonbondedForce* OpenMM::createCpuCustomNonbondedForce(ThreadPool& threads, const CpuNeighborList& neighbors) {
if (isAvxSupported()) if (isAvxSupported())
return createCpuCustomNonbondedForceAvx(threads); return createCpuCustomNonbondedForceAvx(threads, neighbors);
else else
return createCpuCustomNonbondedForceVec4(threads); return createCpuCustomNonbondedForceVec4(threads, neighbors);
} }
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
#include "CpuCustomNonbondedForceFvec.h" #include "CpuCustomNonbondedForceFvec.h"
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceVec4(OpenMM::ThreadPool& threads) { using namespace OpenMM;
return new OpenMM::CpuCustomNonbondedForceFvec<fvec4, 4>(threads);
CpuCustomNonbondedForce* createCpuCustomNonbondedForceVec4(ThreadPool& threads, const CpuNeighborList& neighbors) {
return new CpuCustomNonbondedForceFvec<fvec4, 4>(threads, neighbors);
} }
...@@ -76,9 +76,15 @@ CpuGayBerneForce::CpuGayBerneForce(const GayBerneForce& force) { ...@@ -76,9 +76,15 @@ CpuGayBerneForce::CpuGayBerneForce(const GayBerneForce& force) {
particleExclusions[e.particle2].insert(e.particle1); particleExclusions[e.particle2].insert(e.particle1);
} }
nonbondedMethod = force.getNonbondedMethod(); nonbondedMethod = force.getNonbondedMethod();
cutoffDistance = force.getCutoffDistance(); if (nonbondedMethod == GayBerneForce::NoCutoff) {
switchingDistance = force.getSwitchingDistance(); cutoffDistance = 0.0;
useSwitchingFunction = force.getUseSwitchingFunction(); useSwitchingFunction = false;
}
else {
cutoffDistance = force.getCutoffDistance();
switchingDistance = force.getSwitchingDistance();
useSwitchingFunction = force.getUseSwitchingFunction();
}
// Allocate workspace for calculations. // Allocate workspace for calculations.
...@@ -157,7 +163,7 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex, ...@@ -157,7 +163,7 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
// Compute this thread's subset of interactions. // Compute this thread's subset of interactions.
if (neighborList == NULL) { if (cutoffDistance == 0.0) {
while (true) { while (true) {
int i = atomicCounter++; int i = atomicCounter++;
if (i >= numParticles) if (i >= numParticles)
......
...@@ -233,7 +233,7 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i ...@@ -233,7 +233,7 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
// Determine whether we need to recompute the neighbor list. // Determine whether we need to recompute the neighbor list.
if (data.neighborList != NULL) { if (data.neighborList != NULL && data.cutoff > 0.0) {
double padding = data.paddedCutoff-data.cutoff;; double padding = data.paddedCutoff-data.cutoff;;
bool needRecompute = false; bool needRecompute = false;
double closeCutoff2 = 0.25*padding*padding; double closeCutoff2 = 0.25*padding*padding;
...@@ -474,11 +474,10 @@ private: ...@@ -474,11 +474,10 @@ private:
int numParticles; int numParticles;
}; };
CpuNonbondedForce* createCpuNonbondedForceVec(); CpuNonbondedForce* createCpuNonbondedForceVec(const CpuNeighborList& neighbors);
CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform), CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform),
data(data), hasInitializedPme(false), hasInitializedDispersionPme(false), nonbonded(NULL) { data(data), hasInitializedPme(false), hasInitializedDispersionPme(false), nonbonded(NULL) {
nonbonded = createCpuNonbondedForceVec();
} }
CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() { CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
...@@ -581,8 +580,10 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -581,8 +580,10 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod()); nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
nonbondedCutoff = force.getCutoffDistance(); nonbondedCutoff = force.getCutoffDistance();
if (nonbondedMethod == NoCutoff) if (nonbondedMethod == NoCutoff) {
data.requestNeighborList(0.0, 0.0, true, exclusions);
useSwitchingFunction = false; useSwitchingFunction = false;
}
else { else {
data.requestNeighborList(nonbondedCutoff, 0.25*nonbondedCutoff, true, exclusions); data.requestNeighborList(nonbondedCutoff, 0.25*nonbondedCutoff, true, exclusions);
useSwitchingFunction = force.getUseSwitchingFunction(); useSwitchingFunction = force.getUseSwitchingFunction();
...@@ -616,6 +617,7 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -616,6 +617,7 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
else else
dispersionCoefficient = 0.0; dispersionCoefficient = 0.0;
data.isPeriodic |= (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME); data.isPeriodic |= (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME);
nonbonded = createCpuNonbondedForceVec(*data.neighborList);
} }
double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) { double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
...@@ -661,7 +663,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -661,7 +663,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
bool pme = (nonbondedMethod == PME); bool pme = (nonbondedMethod == PME);
bool ljpme = (nonbondedMethod == LJPME); bool ljpme = (nonbondedMethod == LJPME);
if (nonbondedMethod != NoCutoff) if (nonbondedMethod != NoCutoff)
nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric); nonbonded->setUseCutoff(nonbondedCutoff, rfDielectric);
if (data.isPeriodic) { if (data.isPeriodic) {
Vec3* boxVectors = extractBoxVectors(context); Vec3* boxVectors = extractBoxVectors(context);
double minAllowedSize = 1.999999*nonbondedCutoff; double minAllowedSize = 1.999999*nonbondedCutoff;
...@@ -876,8 +878,10 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C ...@@ -876,8 +878,10 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C
force.getParticleParameters(i, particleParamArray[i]); force.getParticleParameters(i, particleParamArray[i]);
nonbondedMethod = CalcCustomNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod()); nonbondedMethod = CalcCustomNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
nonbondedCutoff = force.getCutoffDistance(); nonbondedCutoff = force.getCutoffDistance();
if (nonbondedMethod == NoCutoff) if (nonbondedMethod == NoCutoff) {
data.requestNeighborList(0.0, 0.0, true, exclusions);
useSwitchingFunction = false; useSwitchingFunction = false;
}
else { else {
data.requestNeighborList(nonbondedCutoff, 0.25*nonbondedCutoff, true, exclusions); data.requestNeighborList(nonbondedCutoff, 0.25*nonbondedCutoff, true, exclusions);
useSwitchingFunction = force.getUseSwitchingFunction(); useSwitchingFunction = force.getUseSwitchingFunction();
...@@ -968,7 +972,7 @@ void CpuCalcCustomNonbondedForceKernel::createInteraction(const CustomNonbondedF ...@@ -968,7 +972,7 @@ void CpuCalcCustomNonbondedForceKernel::createInteraction(const CustomNonbondedF
// Create the object that computes the interaction. // Create the object that computes the interaction.
nonbonded = createCpuCustomNonbondedForce(data.threads); nonbonded = createCpuCustomNonbondedForce(data.threads, *data.neighborList);
nonbonded->initialize(energyExpression, forceExpression, parameterNames, exclusions, energyParamDerivExpressions, nonbonded->initialize(energyExpression, forceExpression, parameterNames, exclusions, energyParamDerivExpressions,
computedValueNames, computedValueExpressions); computedValueNames, computedValueExpressions);
if (interactionGroups.size() > 0) if (interactionGroups.size() > 0)
...@@ -982,7 +986,7 @@ double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool inc ...@@ -982,7 +986,7 @@ double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool inc
double energy = 0; double energy = 0;
bool periodic = (nonbondedMethod == CutoffPeriodic); bool periodic = (nonbondedMethod == CutoffPeriodic);
if (nonbondedMethod != NoCutoff) if (nonbondedMethod != NoCutoff)
nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList); nonbonded->setUseCutoff(nonbondedCutoff);
if (periodic) { if (periodic) {
double minAllowedSize = 2*nonbondedCutoff; double minAllowedSize = 2*nonbondedCutoff;
if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize) if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. * * Portions copyright (c) 2013-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -418,6 +418,7 @@ CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) { ...@@ -418,6 +418,7 @@ CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) {
void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const vector<set<int> >& exclusions, void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const vector<set<int> >& exclusions,
const Vec3* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads) { const Vec3* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads) {
dense = false;
int numBlocks = (numAtoms+blockSize-1)/blockSize; int numBlocks = (numAtoms+blockSize-1)/blockSize;
blockNeighbors.resize(numBlocks); blockNeighbors.resize(numBlocks);
blockExclusions.resize(numBlocks); blockExclusions.resize(numBlocks);
...@@ -497,6 +498,58 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float ...@@ -497,6 +498,58 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
} }
} }
void CpuNeighborList::createDenseNeighborList(int numAtoms, const vector<set<int> >& exclusions) {
dense = true;
this->numAtoms = numAtoms;
int numBlocks = (numAtoms+blockSize-1)/blockSize;
blockExclusionIndices.resize(numBlocks);
blockExclusions.resize(numBlocks);
sortedAtoms.resize(numAtoms);
for (int i = 0; i < numAtoms; i++)
sortedAtoms[i] = i;
for (int i = 0; i < numBlocks; i++) {
// Build the list of exclusions for this block.
int firstIndex = blockSize*i;
int atomsInBlock = min(blockSize, numAtoms-firstIndex);
map<int, int> exclusionMap;
for (int j = 0; j < atomsInBlock; j++) {
exclusionMap[firstIndex+j] = (1<<(j+1))-1;
}
for (int j = 0; j < atomsInBlock; j++) {
const set<int>& atomExclusions = exclusions[firstIndex+j];
const BlockExclusionMask mask = 1<<j;
for (int exclusion : atomExclusions) {
if (firstIndex <= exclusion) {
auto thisAtomFlags = exclusionMap.find(exclusion);
if (thisAtomFlags == exclusionMap.end())
exclusionMap[exclusion] = mask;
else
thisAtomFlags->second |= mask;
}
}
}
blockExclusionIndices[i].clear();
blockExclusions[i].clear();
for (auto flags : exclusionMap) {
blockExclusionIndices[i].push_back(flags.first);
blockExclusions[i].push_back(flags.second);
}
}
// Add padding atoms to fill up the last block.
int numPadding = numBlocks*blockSize-numAtoms;
if (numPadding > 0) {
const BlockExclusionMask mask = (~0) << (blockSize - numPadding);
for (int i = 0; i < numPadding; i++)
sortedAtoms.push_back(0);
auto& exc = blockExclusions.back();
for (int i = 0; i < (int) exc.size(); i++)
exc[i] |= mask;
}
}
int CpuNeighborList::getNumBlocks() const { int CpuNeighborList::getNumBlocks() const {
return sortedAtoms.size()/blockSize; return sortedAtoms.size()/blockSize;
} }
...@@ -518,6 +571,13 @@ const std::vector<CpuNeighborList::BlockExclusionMask>& CpuNeighborList::getBloc ...@@ -518,6 +571,13 @@ const std::vector<CpuNeighborList::BlockExclusionMask>& CpuNeighborList::getBloc
} }
CpuNeighborList::NeighborIterator CpuNeighborList::getNeighborIterator(int blockIndex) const {
if (dense)
return NeighborIterator(blockIndex*blockSize, numAtoms, blockExclusionIndices[blockIndex], blockExclusions[blockIndex]);
else
return NeighborIterator(blockNeighbors[blockIndex], blockExclusions[blockIndex]);
}
void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadIndex) { void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadIndex) {
// Compute the positions of atoms along the Hilbert curve. // Compute the positions of atoms along the Hilbert curve.
...@@ -599,4 +659,40 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI ...@@ -599,4 +659,40 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
} }
} }
CpuNeighborList::NeighborIterator::NeighborIterator(const vector<int>& neighbors, const vector<BlockExclusionMask>& exclusions) :
dense(false), neighbors(&neighbors), exclusions(&exclusions), currentIndex(-1) {
}
CpuNeighborList::NeighborIterator::NeighborIterator(int firstAtom, int lastAtom, const vector<int>& exclusionIndices, const vector<BlockExclusionMask>& exclusions) :
dense(true), currentAtom(firstAtom-1), lastAtom(lastAtom), exclusionIndices(&exclusionIndices), exclusions(&exclusions), currentIndex(0) {
}
bool CpuNeighborList::NeighborIterator::next() {
if (dense) {
if (++currentAtom >= lastAtom)
return false;
if (currentIndex < exclusionIndices->size() && (*exclusionIndices)[currentIndex] == currentAtom)
currentExclusions = (*exclusions)[currentIndex++];
else
currentExclusions = 0;
return true;
}
else {
if (++currentIndex < neighbors->size()) {
currentAtom = (*neighbors)[currentIndex];
currentExclusions = (*exclusions)[currentIndex];
return true;
}
return false;
}
}
int CpuNeighborList::NeighborIterator::getNeighbor() const {
return currentAtom;
}
CpuNeighborList::BlockExclusionMask CpuNeighborList::NeighborIterator::getExclusions() const {
return currentExclusions;
}
} // namespace OpenMM } // namespace OpenMM
...@@ -47,8 +47,9 @@ const int CpuNonbondedForce::NUM_TABLE_POINTS = 2048; ...@@ -47,8 +47,9 @@ const int CpuNonbondedForce::NUM_TABLE_POINTS = 2048;
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), periodicExceptions(false), ewald(false), pme(false), ljpme(false), tableIsValid(false), expTableIsValid(false), CpuNonbondedForce::CpuNonbondedForce(const CpuNeighborList& neighbors) : neighborList(&neighbors), cutoff(false), useSwitch(false), periodic(false),
cutoffDistance(0.0f), alphaDispersionEwald(0.0f), alphaEwald(0.0f) { periodicExceptions(false), ewald(false), pme(false), ljpme(false), tableIsValid(false), expTableIsValid(false), cutoffDistance(0.0f),
alphaDispersionEwald(0.0f), alphaEwald(0.0f) {
} }
CpuNonbondedForce::~CpuNonbondedForce() { CpuNonbondedForce::~CpuNonbondedForce() {
...@@ -59,18 +60,16 @@ CpuNonbondedForce::~CpuNonbondedForce() { ...@@ -59,18 +60,16 @@ CpuNonbondedForce::~CpuNonbondedForce() {
Set the force to use a cutoff. Set the force to use a cutoff.
@param distance the cutoff distance @param distance the cutoff distance
@param neighbors the neighbor list to use
@param solventDielectric the dielectric constant of the bulk solvent @param solventDielectric the dielectric constant of the bulk solvent
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neighbors, float solventDielectric) { void CpuNonbondedForce::setUseCutoff(float distance, float solventDielectric) {
if (distance != cutoffDistance) if (distance != cutoffDistance)
tableIsValid = false; tableIsValid = false;
cutoff = true; cutoff = true;
cutoffDistance = distance; cutoffDistance = distance;
inverseRcut6 = pow(cutoffDistance, -6); inverseRcut6 = pow(cutoffDistance, -6);
neighborList = &neighbors;
krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0); krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0);
crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0); crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0);
if(alphaDispersionEwald != 0.0f){ if(alphaDispersionEwald != 0.0f){
...@@ -486,7 +485,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -486,7 +485,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
} }
} }
} }
else if (cutoff) { else {
// Compute the interactions from the neighbor list. // Compute the interactions from the neighbor list.
while (true) { while (true) {
...@@ -496,72 +495,6 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -496,72 +495,6 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
calculateBlockIxn(nextBlock, forces, energyPtr, boxSize, invBoxSize); calculateBlockIxn(nextBlock, forces, energyPtr, boxSize, invBoxSize);
} }
} }
else {
// Loop over all atom pairs
while (true) {
int i = atomicCounter++;
if (i >= numberOfAtoms)
break;
for (int j = i+1; j < numberOfAtoms; j++)
if (exclusions[j].find(i) == exclusions[j].end())
calculateOneIxn(i, j, forces, energyPtr, boxSize, invBoxSize);
}
}
}
void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// get deltaR, R2, and R between 2 atoms
fvec4 deltaR;
fvec4 posI(posq+4*ii);
fvec4 posJ(posq+4*jj);
float r2;
getDeltaR(posJ, posI, deltaR, r2, periodic, boxSize, invBoxSize);
if (cutoff && r2 >= cutoffDistance*cutoffDistance)
return;
float r = sqrtf(r2);
float inverseR = 1/r;
float switchValue = 1, switchDeriv = 0;
if (useSwitch && r > switchingDistance) {
float t = (r-switchingDistance)/(cutoffDistance-switchingDistance);
switchValue = 1+t*t*t*(-10+t*(15-t*6));
switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance);
}
float sig = atomParameters[ii].first + atomParameters[jj].first;
float sig2 = inverseR*sig;
sig2 *= sig2;
float sig6 = sig2*sig2*sig2;
float eps = atomParameters[ii].second*atomParameters[jj].second;
float dEdR = switchValue*eps*(12.0f*sig6 - 6.0f)*sig6;
float chargeProd = ONE_4PI_EPS0*posq[4*ii+3]*posq[4*jj+3];
if (cutoff)
dEdR += (float) (chargeProd*(inverseR-2.0f*krf*r2));
else
dEdR += (float) (chargeProd*inverseR);
dEdR *= inverseR*inverseR;
float energy = eps*(sig6-1.0f)*sig6;
if (useSwitch) {
dEdR -= energy*switchDeriv*inverseR;
energy *= switchValue;
}
// accumulate energies
if (totalEnergy) {
if (cutoff)
energy += (float) (chargeProd*(inverseR+krf*r2-crf));
else
energy += (float) (chargeProd*inverseR);
*totalEnergy += energy;
}
// accumulate forces
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*ii)+result).store(forces+4*ii);
(fvec4(forces+4*jj)-result).store(forces+4*jj);
} }
void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const { void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
......
...@@ -23,17 +23,18 @@ ...@@ -23,17 +23,18 @@
*/ */
#include "CpuNonbondedForceFvec.h" #include "CpuNonbondedForceFvec.h"
#include "CpuNeighborList.h"
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
#ifdef __AVX__ #ifdef __AVX__
#include "openmm/internal/vectorizeAvx.h" #include "openmm/internal/vectorizeAvx.h"
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx() { OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx(const OpenMM::CpuNeighborList& neighbors) {
return new OpenMM::CpuNonbondedForceFvec<fvec8>(); return new OpenMM::CpuNonbondedForceFvec<fvec8>(neighbors);
} }
#else #else
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx() { OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx(const OpenMM::CpuNeighborList& neighbors) {
throw OpenMM::OpenMMException("Internal error: OpenMM was compiled without AVX support"); throw OpenMM::OpenMMException("Internal error: OpenMM was compiled without AVX support");
} }
#endif #endif
...@@ -23,13 +23,14 @@ ...@@ -23,13 +23,14 @@
*/ */
#include "CpuNonbondedForceFvec.h" #include "CpuNonbondedForceFvec.h"
#include "CpuNeighborList.h"
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
#ifdef __AVX2__ #ifdef __AVX2__
#include "openmm/internal/vectorizeAvx2.h" #include "openmm/internal/vectorizeAvx2.h"
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx2() { OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx2(const OpenMM::CpuNeighborList& neighbors) {
return new OpenMM::CpuNonbondedForceFvec<fvecAvx2>(); return new OpenMM::CpuNonbondedForceFvec<fvecAvx2>(neighbors);
} }
#else #else
...@@ -38,7 +39,7 @@ bool isAvx2Supported() { ...@@ -38,7 +39,7 @@ bool isAvx2Supported() {
return false; return false;
} }
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx2() { OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx2(const OpenMM::CpuNeighborList& neighbors) {
throw OpenMM::OpenMMException("Internal error: OpenMM was compiled without AVX2 support"); throw OpenMM::OpenMMException("Internal error: OpenMM was compiled without AVX2 support");
} }
#endif #endif
...@@ -23,21 +23,24 @@ ...@@ -23,21 +23,24 @@
*/ */
#include "CpuNonbondedForceFvec.h" #include "CpuNonbondedForceFvec.h"
#include "CpuNeighborList.h"
#include "openmm/internal/hardware.h" #include "openmm/internal/hardware.h"
OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec4(); using namespace OpenMM;
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx();
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx2(); CpuNonbondedForce* createCpuNonbondedForceVec4(const CpuNeighborList& neighbors);
CpuNonbondedForce* createCpuNonbondedForceAvx(const CpuNeighborList& neighbors);
CpuNonbondedForce* createCpuNonbondedForceAvx2(const CpuNeighborList& neighbors);
bool isAvx2Supported(); bool isAvx2Supported();
#include <iostream> #include <iostream>
OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec() { CpuNonbondedForce* createCpuNonbondedForceVec(const CpuNeighborList& neighbors) {
if (isAvx2Supported()) if (isAvx2Supported())
return createCpuNonbondedForceAvx2(); return createCpuNonbondedForceAvx2(neighbors);
else if (isAvxSupported()) else if (isAvxSupported())
return createCpuNonbondedForceAvx(); return createCpuNonbondedForceAvx(neighbors);
else else
return createCpuNonbondedForceVec4(); return createCpuNonbondedForceVec4(neighbors);
} }
...@@ -23,10 +23,11 @@ ...@@ -23,10 +23,11 @@
*/ */
#include "CpuNonbondedForceFvec.h" #include "CpuNonbondedForceFvec.h"
#include "CpuNeighborList.h"
// Very minimal file. It exists purely to be able to compile it in SIMD-4. // Very minimal file. It exists purely to be able to compile it in SIMD-4.
OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec4() { OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec4(const OpenMM::CpuNeighborList& neighbors) {
return new OpenMM::CpuNonbondedForceFvec<fvec4>(); return new OpenMM::CpuNonbondedForceFvec<fvec4>(neighbors);
} }
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2019 Stanford University and the Authors. * * Portions copyright (c) 2013-2022 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -148,7 +148,8 @@ const CpuPlatform::PlatformData& CpuPlatform::getPlatformData(const ContextImpl& ...@@ -148,7 +148,8 @@ const CpuPlatform::PlatformData& CpuPlatform::getPlatformData(const ContextImpl&
} }
CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads, bool deterministicForces) : posq(4*numParticles), threads(numThreads), CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads, bool deterministicForces) : posq(4*numParticles), threads(numThreads),
deterministicForces(deterministicForces), neighborList(NULL), cutoff(0.0), paddedCutoff(0.0), anyExclusions(false), currentPosqIndex(-1), nextPosqIndex(0) { deterministicForces(deterministicForces), numParticles(numParticles), neighborList(NULL), cutoff(0.0), paddedCutoff(0.0), anyExclusions(false),
currentPosqIndex(-1), nextPosqIndex(0) {
numThreads = threads.getNumThreads(); numThreads = threads.getNumThreads();
threadForce.resize(numThreads); threadForce.resize(numThreads);
for (int i = 0; i < numThreads; i++) for (int i = 0; i < numThreads; i++)
...@@ -166,8 +167,13 @@ CpuPlatform::PlatformData::~PlatformData() { ...@@ -166,8 +167,13 @@ CpuPlatform::PlatformData::~PlatformData() {
} }
void CpuPlatform::PlatformData::requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const vector<set<int> >& exclusionList) { void CpuPlatform::PlatformData::requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const vector<set<int> >& exclusionList) {
if (neighborList == NULL) if (neighborList == NULL) {
neighborList = new CpuNeighborList(getVectorWidth()); neighborList = new CpuNeighborList(getVectorWidth());
if (cutoffDistance == 0.0)
neighborList->createDenseNeighborList(numParticles, exclusionList);
}
else if ((cutoffDistance == 0.0) != (cutoff == 0.0))
throw OpenMMException("All nonbonded Forces must agree on whether to apply a cutoff");
if (cutoffDistance > cutoff) if (cutoffDistance > cutoff)
cutoff = cutoffDistance; cutoff = cutoffDistance;
if (cutoffDistance+padding > paddedCutoff) if (cutoffDistance+padding > paddedCutoff)
......
...@@ -252,8 +252,7 @@ int ReferenceDiscrete1DFunction::getNumArguments() const { ...@@ -252,8 +252,7 @@ int ReferenceDiscrete1DFunction::getNumArguments() const {
double ReferenceDiscrete1DFunction::evaluate(const double* arguments) const { double ReferenceDiscrete1DFunction::evaluate(const double* arguments) const {
int i = (int) round(arguments[0]); int i = (int) round(arguments[0]);
if (i < 0 || i >= values.size()) i = max(0, min((int) values.size()-1, i));
throw OpenMMException("ReferenceDiscrete1DFunction: argument out of range");
return values[i]; return values[i];
} }
...@@ -276,8 +275,8 @@ int ReferenceDiscrete2DFunction::getNumArguments() const { ...@@ -276,8 +275,8 @@ int ReferenceDiscrete2DFunction::getNumArguments() const {
double ReferenceDiscrete2DFunction::evaluate(const double* arguments) const { double ReferenceDiscrete2DFunction::evaluate(const double* arguments) const {
int i = (int) round(arguments[0]); int i = (int) round(arguments[0]);
int j = (int) round(arguments[1]); int j = (int) round(arguments[1]);
if (i < 0 || i >= xsize || j < 0 || j >= ysize) i = max(0, min(xsize-1, i));
throw OpenMMException("ReferenceDiscrete2DFunction: argument out of range"); j = max(0, min(ysize-1, j));
return values[i+j*xsize]; return values[i+j*xsize];
} }
...@@ -301,8 +300,9 @@ double ReferenceDiscrete3DFunction::evaluate(const double* arguments) const { ...@@ -301,8 +300,9 @@ double ReferenceDiscrete3DFunction::evaluate(const double* arguments) const {
int i = (int) round(arguments[0]); int i = (int) round(arguments[0]);
int j = (int) round(arguments[1]); int j = (int) round(arguments[1]);
int k = (int) round(arguments[2]); int k = (int) round(arguments[2]);
if (i < 0 || i >= xsize || j < 0 || j >= ysize || k < 0 || k >= zsize) i = max(0, min(xsize-1, i));
throw OpenMMException("ReferenceDiscrete3DFunction: argument out of range"); j = max(0, min(ysize-1, j));
k = max(0, min(zsize-1, k));
return values[i+(j+k*ysize)*xsize]; return values[i+(j+k*ysize)*xsize];
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment