"platforms/common/vscode:/vscode.git/clone" did not exist on "add2bbeee88933f4ffe9066fb8a41fce561c8086"
Unverified Commit db4cefd4 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Vectorize nonbonded interactions with no cutoff (#3575)

* Vectorize NonbondedForce with no cutoff

* Vectorize CustomNonbondedForce with no cutoff

* Memory efficient dense neighbor list

* Fixed errors
parent 11a1982a
......@@ -48,7 +48,7 @@ public:
--------------------------------------------------------------------------------------- */
CpuCustomNonbondedForce(ThreadPool& threads);
CpuCustomNonbondedForce(ThreadPool& threads, const CpuNeighborList& neighbors);
void initialize(const Lepton::ParsedExpression& energyExpression, const Lepton::ParsedExpression& forceExpression,
const std::vector<std::string>& parameterNames, const std::vector<std::set<int> >& exclusions,
......@@ -68,11 +68,10 @@ public:
Set the force to use a cutoff.
@param distance the cutoff distance
@param neighbors the neighbor list to use
--------------------------------------------------------------------------------------- */
void setUseCutoff(double distance, const CpuNeighborList& neighbors);
void setUseCutoff(double distance);
/**---------------------------------------------------------------------------------------
......@@ -212,7 +211,7 @@ public:
/**
* This function is called to create an instance of an appropriate subclass for the current CPU.
*/
CpuCustomNonbondedForce* createCpuCustomNonbondedForce(ThreadPool& threads);
CpuCustomNonbondedForce* createCpuCustomNonbondedForce(ThreadPool& threads, const CpuNeighborList& neighbors);
} // namespace OpenMM
......
......@@ -29,12 +29,12 @@
namespace OpenMM {
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
enum PeriodicType {NoCutoff, NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
template <typename FVEC, int BLOCK_SIZE>
class CpuCustomNonbondedForceFvec : public CpuCustomNonbondedForce {
public:
CpuCustomNonbondedForceFvec(ThreadPool& threads) : CpuCustomNonbondedForce(threads) {
CpuCustomNonbondedForceFvec(ThreadPool& threads, const CpuNeighborList& neighbors) : CpuCustomNonbondedForce(threads, neighbors) {
}
protected:
......@@ -101,7 +101,9 @@ void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::calculateBlockIxn(ThreadData
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic)
if (!cutoff)
calculateBlockIxnImpl<NoCutoff>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == NoPeriodic)
calculateBlockIxnImpl<NoPeriodic>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom)
calculateBlockIxnImpl<PeriodicPerAtom>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
......@@ -137,13 +139,12 @@ void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::calculateBlockIxnImpl(Thread
// Loop over neighbors for this block.
const auto& neighbors = neighborList->getBlockNeighbors(blockIndex);
const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
CpuNeighborList::NeighborIterator neighbors = neighborList->getNeighborIterator(blockIndex);
FVEC partialEnergy = {};
for (int i = 0; i < neighbors.size(); i++) {
while (neighbors.next()) {
// Load the next neighbor.
int atom = neighbors[i];
int atom = neighbors.getNeighbor();
for (int j = 0; j < numParams; j++)
for (int k = 0; k < BLOCK_SIZE; k++)
data.vecParticle2Params[j*BLOCK_SIZE+k] = atomParameters[atom][j];
......@@ -158,8 +159,9 @@ void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::calculateBlockIxnImpl(Thread
if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, boxSize, invBoxSize);
const auto exclNotMask = FVEC::expandBitsToMask(~exclusions[i]);
const auto include = blendZero(r2 < cutoffDistanceSquared, exclNotMask);
auto include = FVEC::expandBitsToMask(~neighbors.getExclusions());
if (PERIODIC_TYPE != NoCutoff)
include = blendZero(r2 < cutoffDistanceSquared, include);
if (!any(include))
continue; // No interactions to compute.
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Portions copyright (c) 2013-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -46,11 +46,34 @@ namespace OpenMM {
class OPENMM_EXPORT_CPU CpuNeighborList {
public:
class Voxels;
class NeighborIterator;
CpuNeighborList(int blockSize);
/**
* Compute the neighbor list based on the current positions of atoms.
*
* @param numAtoms the number of atoms in the system
* @param atomLocations the positions of the atoms
* @param exclusions exclusions[i] contains the indices of all atoms with which atom i should not interact
* @param periodicBoxVectors the current periodic box vectors
* @param usePeriodic whether to apply periodic boundary conditions
* @param maxDistance the neighbor list will contain all pairs that are within this distance of each other
* @param threads used for parallelization
*/
void computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const std::vector<std::set<int> >& exclusions,
const Vec3* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads);
/**
* Build a dense neighbor list, in which every atom interacts with every other (except exclusions), regardless of distance.
*
* @param numAtoms the number of atoms in the system
* @param exclusions exclusions[i] contains the indices of all atoms with which atom i should not interact
*/
void createDenseNeighborList(int numAtoms, const std::vector<std::set<int> >& exclusions);
int getNumBlocks() const;
int getBlockSize() const;
/**
* Get an object for iterating over the neighbors of an atom block.
*/
NeighborIterator getNeighborIterator(int blockIndex) const;
const std::vector<int32_t>& getSortedAtoms() const;
const std::vector<int>& getBlockNeighbors(int blockIndex) const;
......@@ -71,7 +94,7 @@ private:
int blockSize;
std::vector<int> sortedAtoms;
std::vector<float> sortedPositions;
std::vector<std::vector<int> > blockNeighbors;
std::vector<std::vector<int> > blockNeighbors, blockExclusionIndices;
std::vector<std::vector<BlockExclusionMask> > blockExclusions;
// The following variables are used to make information accessible to the individual threads.
float minx, maxx, miny, maxy, minz, maxz;
......@@ -81,11 +104,46 @@ private:
const float* atomLocations;
Vec3 periodicBoxVectors[3];
int numAtoms;
bool usePeriodic;
bool usePeriodic, dense;
float maxDistance;
std::atomic<int> atomicCounter;
};
class OPENMM_EXPORT_CPU CpuNeighborList::NeighborIterator {
public:
/**
* This constructor is used for standard neighbor lists. Do not call it directly. Obtain a
* NeighborIterator by calling getNeighborIterator() on a neighbor list.
*/
NeighborIterator(const std::vector<int>& neighbors, const std::vector<BlockExclusionMask>& exclusions);
/**
* This constructor is used for dense neighbor lists. Do not call it directly. Obtain a
* NeighborIterator by calling getNeighborIterator() on a neighbor list.
*/
NeighborIterator(int firstAtom, int lastAtom, const std::vector<int>& exclusionIndices, const std::vector<BlockExclusionMask>& exclusions);
/**
* Advance the iterator to the next neighbor.
*
* @return false if there are no more neighbors, true otherwise.
*/
bool next();
/**
* Get the index of the current neighbor.
*/
int getNeighbor() const;
/**
* Get bit flags marking which atoms in the block the current atom is excluded from interacting with.
*/
BlockExclusionMask getExclusions() const;
private:
bool dense;
int currentAtom, currentIndex, lastAtom;
BlockExclusionMask currentExclusions;
const std::vector<int>* neighbors;
const std::vector<int>* exclusionIndices;
const std::vector<BlockExclusionMask>* exclusions;
};
} // namespace OpenMM
#endif // OPENMM_CPU_NEIGHBORLIST_H_
......@@ -44,10 +44,12 @@ class CpuNonbondedForce {
/**---------------------------------------------------------------------------------------
Constructor
@param neighbors the neighbor list to use
--------------------------------------------------------------------------------------- */
CpuNonbondedForce();
CpuNonbondedForce(const CpuNeighborList& neighbors);
/**
* Virtual destructor.
......@@ -60,12 +62,11 @@ class CpuNonbondedForce {
Set the force to use a cutoff.
@param distance the cutoff distance
@param neighbors the neighbor list to use
@param solventDielectric the dielectric constant of the bulk solvent
--------------------------------------------------------------------------------------- */
void setUseCutoff(float distance, const CpuNeighborList& neighbors, float solventDielectric);
void setUseCutoff(float distance, float solventDielectric);
/**---------------------------------------------------------------------------------------
......@@ -213,19 +214,6 @@ protected:
static const float TWO_OVER_SQRT_PI;
static const int NUM_TABLE_POINTS;
/**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn between two atoms
@param atom1 the index of the first atom
@param atom2 the index of the second atom
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void calculateOneIxn(int atom1, int atom2, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
/**---------------------------------------------------------------------------------------
......
......@@ -36,7 +36,7 @@
namespace OpenMM {
enum BlockType {EWALD, NON_EWALD}; // :TODO: Better name for non-ewald.
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
enum PeriodicType {NoCutoff, NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
/**
* Generic SIMD implementation of CpuNonbondedForce. The templating allows the same
......@@ -50,6 +50,10 @@ public:
* Store how many elements are contained in each block of atoms.
*/
static constexpr int blockSize = sizeof(FVEC) / sizeof(float);
/**
* Constructor.
*/
CpuNonbondedForceFvec(const CpuNeighborList& neighbors);
protected:
/**---------------------------------------------------------------------------------------
......@@ -95,6 +99,10 @@ protected:
};
template <typename FVEC>
CpuNonbondedForceFvec<FVEC>::CpuNonbondedForceFvec(const CpuNeighborList& neighbors) : CpuNonbondedForce(neighbors) {
}
/**
* Use a table lookup to approximate a function specific function.
*/
......@@ -166,7 +174,9 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnHandler(int blockIndex, float
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic)
if (!cutoff)
calculateBlockIxnImpl<NoCutoff, BLOCK_TYPE>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == NoPeriodic)
calculateBlockIxnImpl<NoPeriodic, BLOCK_TYPE>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom)
calculateBlockIxnImpl<PeriodicPerAtom, BLOCK_TYPE>(blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
......@@ -181,7 +191,7 @@ template <int PERIODIC_TYPE, BlockType BLOCK_TYPE>
void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block.
const int32_t* blockAtom = &neighborList->getSortedAtoms()[blockSize * blockIndex];
const int32_t* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
fvec4 blockAtomPosq[blockSize];
FVEC blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
FVEC blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge;
......@@ -198,8 +208,7 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f
// the cycles are spent anyway.
FVEC blockAtomSigma = {};
FVEC blockAtomEpsilon = {};
for (int i=0; i<blockSize; ++i)
{
for (int i = 0; i < blockSize; ++i) {
((float*)&blockAtomSigma)[i] = atomParameters[blockAtom[i]].first;
((float*)&blockAtomEpsilon)[i] = atomParameters[blockAtom[i]].second;
}
......@@ -211,25 +220,23 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f
const FVEC cutoffDistanceSquared = cutoffDistance * cutoffDistance;
// Loop over neighbors for this block.
const auto& neighbors = neighborList->getBlockNeighbors(blockIndex);
const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
CpuNeighborList::NeighborIterator neighbors = neighborList->getNeighborIterator(blockIndex);
FVEC partialEnergy = {};
for (int i = 0; i < (int) neighbors.size(); i++) {
while (neighbors.next()) {
// Load the next neighbor.
int atom = neighbors[i];
int atom = neighbors.getNeighbor();
// Compute the distances to the block atoms.
FVEC dx, dy, dz, r2;
fvec4 atomPos(posq+4*atom);
if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, boxSize, invBoxSize);
const auto exclNotMask = FVEC::expandBitsToMask(~exclusions[i]);
const auto include = blendZero(r2 < cutoffDistanceSquared, exclNotMask);
auto include = FVEC::expandBitsToMask(~neighbors.getExclusions());
if (PERIODIC_TYPE != NoCutoff)
include = blendZero(r2 < cutoffDistanceSquared, include);
if (!any(include))
continue; // No interactions to compute.
......@@ -270,12 +277,10 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f
dEdR = 0.0f;
}
const auto chargeProd = blockAtomCharge*posq[4*atom+3];
if (BLOCK_TYPE == BlockType::EWALD)
{
if (BLOCK_TYPE == BlockType::EWALD) {
dEdR += chargeProd*inverseR*approximateFunctionFromTable(ewaldScaleTable, r, FVEC(ewaldDXInv));
}
else
{
else {
if (cutoff)
dEdR += chargeProd*(inverseR-2.0f*krf*r2);
else
......@@ -287,8 +292,7 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f
if (totalEnergy) {
if (BLOCK_TYPE == BlockType::EWALD)
energy += chargeProd*inverseR*approximateFunctionFromTable(erfcTable, alphaEwald*r, FVEC(erfcDXInv));
else // Non-ewald.
{
else { // Non-ewald.
if (cutoff)
energy += chargeProd*(inverseR+krf*r2-crf);
else
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Portions copyright (c) 2013-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -91,6 +91,18 @@ class CpuPlatform::PlatformData {
public:
PlatformData(int numParticles, int numThreads, bool deterministicForces);
~PlatformData();
/**
* Request that a neighbor list be built and maintained.
*
* @param cutoffDistance the cutoff distance for particle pairs to be included in the neighbor list.
* If this is 0, a dense neighbor list is built that includes all particle pairs
* regardless of distance.
* @param padding a padding distance that should be added to the cutoff so the neighbor list
* does not need to be rebuilt every step
* @param useExclusions whether to omit specific excluded interactions
* @param exclusionList if useExclusions is true, exclusionList[i] should contain the indices of all
* particles with which particle i should not interact
*/
void requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const std::vector<std::set<int> >& exclusionList);
int requestPosqIndex();
AlignedArray<float> posq;
......@@ -99,6 +111,7 @@ public:
bool isPeriodic;
CpuRandom random;
std::map<std::string, std::string> propertyValues;
int numParticles;
CpuNeighborList* neighborList;
double cutoff, paddedCutoff;
bool anyExclusions, deterministicForces;
......
......@@ -94,7 +94,8 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const CompiledExpression& energy
}
}
CpuCustomNonbondedForce::CpuCustomNonbondedForce(ThreadPool& threads) : cutoff(false), useSwitch(false), periodic(false), useInteractionGroups(false), threads(threads) {
CpuCustomNonbondedForce::CpuCustomNonbondedForce(ThreadPool& threads, const CpuNeighborList& neighbors) : cutoff(false), useSwitch(false),
periodic(false), useInteractionGroups(false), threads(threads), neighborList(&neighbors) {
}
void CpuCustomNonbondedForce::initialize(const ParsedExpression& energyExpression,
......@@ -123,10 +124,9 @@ CpuCustomNonbondedForce::~CpuCustomNonbondedForce() {
delete data;
}
void CpuCustomNonbondedForce::setUseCutoff(double distance, const CpuNeighborList& neighbors) {
void CpuCustomNonbondedForce::setUseCutoff(double distance) {
cutoff = true;
cutoffDistance = distance;
neighborList = &neighbors;
}
void CpuCustomNonbondedForce::setInteractionGroups(const vector<pair<set<int>, set<int> > >& groups) {
......@@ -280,28 +280,28 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
calculateOneIxn(atom1, atom2, data, forces, energy, boxSize, invBoxSize);
}
}
else if (cutoff) {
// We are using a cutoff, so get the interactions from the neighbor list.
else {
// Get the interactions from the neighbor list.
while (true) {
int blockIndex = atomicCounter++;
if (blockIndex >= neighborList->getNumBlocks())
break;
const int blockSize = neighborList->getBlockSize();
const int32_t* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
if (data.energyParamDerivs.size() == 0)
calculateBlockIxn(data, blockIndex, forces, energy, boxSize, invBoxSize);
else {
for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i];
const int blockSize = neighborList->getBlockSize();
const int32_t* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
CpuNeighborList::NeighborIterator neighbors = neighborList->getNeighborIterator(blockIndex);
while (neighbors.next()) {
int first = neighbors.getNeighbor();
for (int j = 0; j < paramNames.size(); j++)
data.particleParam[j*2] = atomParameters[first][j];
for (int j = 0; j < computedValueNames.size(); j++)
data.computedValues[j*2] = atomComputedValues[j][first];
auto exclusions = neighbors.getExclusions();
for (int k = 0; k < blockSize; k++) {
if ((exclusions[i] & (1<<k)) == 0) {
if ((exclusions & (1<<k)) == 0) {
int second = blockAtom[k];
for (int j = 0; j < paramNames.size(); j++)
data.particleParam[j*2+1] = atomParameters[second][j];
......@@ -314,28 +314,6 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
}
}
}
else {
// Every particle interacts with every other one.
while (true) {
int ii = atomicCounter++;
if (ii >= numberOfAtoms)
break;
for (int jj = ii+1; jj < numberOfAtoms; jj++) {
if (exclusions[jj].find(ii) == exclusions[jj].end()) {
for (int j = 0; j < paramNames.size(); j++) {
data.particleParam[j*2] = atomParameters[ii][j];
data.particleParam[j*2+1] = atomParameters[jj][j];
}
for (int j = 0; j < computedValueNames.size(); j++) {
data.computedValues[j*2] = atomComputedValues[j][ii];
data.computedValues[j*2+1] = atomComputedValues[j][jj];
}
calculateOneIxn(ii, jj, data, forces, energy, boxSize, invBoxSize);
}
}
}
}
}
void CpuCustomNonbondedForce::calculateOneIxn(int ii, int jj, ThreadData& data,
......
......@@ -25,15 +25,17 @@
#include "CpuCustomNonbondedForceFvec.h"
#include "openmm/OpenMMException.h"
using namespace OpenMM;
#ifdef __AVX__
#include "openmm/internal/vectorizeAvx.h"
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(OpenMM::ThreadPool& threads) {
return new OpenMM::CpuCustomNonbondedForceFvec<fvec8, 8>(threads);
CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(ThreadPool& threads, const CpuNeighborList& neighbors) {
return new CpuCustomNonbondedForceFvec<fvec8, 8>(threads, neighbors);
}
#else
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(OpenMM::ThreadPool& threads) {
throw OpenMM::OpenMMException("Internal error: OpenMM was compiled without AVX support");
CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(ThreadPool& threads, const CpuNeighborList& neighbors) {
throw OpenMMException("Internal error: OpenMM was compiled without AVX support");
}
#endif
......@@ -25,12 +25,14 @@
#include "CpuCustomNonbondedForceFvec.h"
#include "openmm/internal/hardware.h"
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceVec4(OpenMM::ThreadPool& threads);
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(OpenMM::ThreadPool& threads);
using namespace OpenMM;
OpenMM::CpuCustomNonbondedForce* OpenMM::createCpuCustomNonbondedForce(OpenMM::ThreadPool& threads) {
CpuCustomNonbondedForce* createCpuCustomNonbondedForceVec4(ThreadPool& threads, const CpuNeighborList& neighbors);
CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(ThreadPool& threads, const CpuNeighborList& neighbors);
CpuCustomNonbondedForce* OpenMM::createCpuCustomNonbondedForce(ThreadPool& threads, const CpuNeighborList& neighbors) {
if (isAvxSupported())
return createCpuCustomNonbondedForceAvx(threads);
return createCpuCustomNonbondedForceAvx(threads, neighbors);
else
return createCpuCustomNonbondedForceVec4(threads);
return createCpuCustomNonbondedForceVec4(threads, neighbors);
}
......@@ -23,6 +23,8 @@
#include "CpuCustomNonbondedForceFvec.h"
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceVec4(OpenMM::ThreadPool& threads) {
return new OpenMM::CpuCustomNonbondedForceFvec<fvec4, 4>(threads);
using namespace OpenMM;
CpuCustomNonbondedForce* createCpuCustomNonbondedForceVec4(ThreadPool& threads, const CpuNeighborList& neighbors) {
return new CpuCustomNonbondedForceFvec<fvec4, 4>(threads, neighbors);
}
......@@ -76,9 +76,15 @@ CpuGayBerneForce::CpuGayBerneForce(const GayBerneForce& force) {
particleExclusions[e.particle2].insert(e.particle1);
}
nonbondedMethod = force.getNonbondedMethod();
cutoffDistance = force.getCutoffDistance();
switchingDistance = force.getSwitchingDistance();
useSwitchingFunction = force.getUseSwitchingFunction();
if (nonbondedMethod == GayBerneForce::NoCutoff) {
cutoffDistance = 0.0;
useSwitchingFunction = false;
}
else {
cutoffDistance = force.getCutoffDistance();
switchingDistance = force.getSwitchingDistance();
useSwitchingFunction = force.getUseSwitchingFunction();
}
// Allocate workspace for calculations.
......@@ -157,7 +163,7 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
// Compute this thread's subset of interactions.
if (neighborList == NULL) {
if (cutoffDistance == 0.0) {
while (true) {
int i = atomicCounter++;
if (i >= numParticles)
......
......@@ -233,7 +233,7 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
// Determine whether we need to recompute the neighbor list.
if (data.neighborList != NULL) {
if (data.neighborList != NULL && data.cutoff > 0.0) {
double padding = data.paddedCutoff-data.cutoff;;
bool needRecompute = false;
double closeCutoff2 = 0.25*padding*padding;
......@@ -474,11 +474,10 @@ private:
int numParticles;
};
CpuNonbondedForce* createCpuNonbondedForceVec();
CpuNonbondedForce* createCpuNonbondedForceVec(const CpuNeighborList& neighbors);
CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform),
data(data), hasInitializedPme(false), hasInitializedDispersionPme(false), nonbonded(NULL) {
nonbonded = createCpuNonbondedForceVec();
}
CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
......@@ -581,8 +580,10 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
nonbondedCutoff = force.getCutoffDistance();
if (nonbondedMethod == NoCutoff)
if (nonbondedMethod == NoCutoff) {
data.requestNeighborList(0.0, 0.0, true, exclusions);
useSwitchingFunction = false;
}
else {
data.requestNeighborList(nonbondedCutoff, 0.25*nonbondedCutoff, true, exclusions);
useSwitchingFunction = force.getUseSwitchingFunction();
......@@ -616,6 +617,7 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
else
dispersionCoefficient = 0.0;
data.isPeriodic |= (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME);
nonbonded = createCpuNonbondedForceVec(*data.neighborList);
}
double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
......@@ -661,7 +663,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
bool pme = (nonbondedMethod == PME);
bool ljpme = (nonbondedMethod == LJPME);
if (nonbondedMethod != NoCutoff)
nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric);
nonbonded->setUseCutoff(nonbondedCutoff, rfDielectric);
if (data.isPeriodic) {
Vec3* boxVectors = extractBoxVectors(context);
double minAllowedSize = 1.999999*nonbondedCutoff;
......@@ -876,8 +878,10 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C
force.getParticleParameters(i, particleParamArray[i]);
nonbondedMethod = CalcCustomNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
nonbondedCutoff = force.getCutoffDistance();
if (nonbondedMethod == NoCutoff)
if (nonbondedMethod == NoCutoff) {
data.requestNeighborList(0.0, 0.0, true, exclusions);
useSwitchingFunction = false;
}
else {
data.requestNeighborList(nonbondedCutoff, 0.25*nonbondedCutoff, true, exclusions);
useSwitchingFunction = force.getUseSwitchingFunction();
......@@ -968,7 +972,7 @@ void CpuCalcCustomNonbondedForceKernel::createInteraction(const CustomNonbondedF
// Create the object that computes the interaction.
nonbonded = createCpuCustomNonbondedForce(data.threads);
nonbonded = createCpuCustomNonbondedForce(data.threads, *data.neighborList);
nonbonded->initialize(energyExpression, forceExpression, parameterNames, exclusions, energyParamDerivExpressions,
computedValueNames, computedValueExpressions);
if (interactionGroups.size() > 0)
......@@ -982,7 +986,7 @@ double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool inc
double energy = 0;
bool periodic = (nonbondedMethod == CutoffPeriodic);
if (nonbondedMethod != NoCutoff)
nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList);
nonbonded->setUseCutoff(nonbondedCutoff);
if (periodic) {
double minAllowedSize = 2*nonbondedCutoff;
if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2018 Stanford University and the Authors. *
* Portions copyright (c) 2013-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -418,6 +418,7 @@ CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) {
void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const vector<set<int> >& exclusions,
const Vec3* periodicBoxVectors, bool usePeriodic, float maxDistance, ThreadPool& threads) {
dense = false;
int numBlocks = (numAtoms+blockSize-1)/blockSize;
blockNeighbors.resize(numBlocks);
blockExclusions.resize(numBlocks);
......@@ -497,6 +498,58 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
}
}
void CpuNeighborList::createDenseNeighborList(int numAtoms, const vector<set<int> >& exclusions) {
dense = true;
this->numAtoms = numAtoms;
int numBlocks = (numAtoms+blockSize-1)/blockSize;
blockExclusionIndices.resize(numBlocks);
blockExclusions.resize(numBlocks);
sortedAtoms.resize(numAtoms);
for (int i = 0; i < numAtoms; i++)
sortedAtoms[i] = i;
for (int i = 0; i < numBlocks; i++) {
// Build the list of exclusions for this block.
int firstIndex = blockSize*i;
int atomsInBlock = min(blockSize, numAtoms-firstIndex);
map<int, int> exclusionMap;
for (int j = 0; j < atomsInBlock; j++) {
exclusionMap[firstIndex+j] = (1<<(j+1))-1;
}
for (int j = 0; j < atomsInBlock; j++) {
const set<int>& atomExclusions = exclusions[firstIndex+j];
const BlockExclusionMask mask = 1<<j;
for (int exclusion : atomExclusions) {
if (firstIndex <= exclusion) {
auto thisAtomFlags = exclusionMap.find(exclusion);
if (thisAtomFlags == exclusionMap.end())
exclusionMap[exclusion] = mask;
else
thisAtomFlags->second |= mask;
}
}
}
blockExclusionIndices[i].clear();
blockExclusions[i].clear();
for (auto flags : exclusionMap) {
blockExclusionIndices[i].push_back(flags.first);
blockExclusions[i].push_back(flags.second);
}
}
// Add padding atoms to fill up the last block.
int numPadding = numBlocks*blockSize-numAtoms;
if (numPadding > 0) {
const BlockExclusionMask mask = (~0) << (blockSize - numPadding);
for (int i = 0; i < numPadding; i++)
sortedAtoms.push_back(0);
auto& exc = blockExclusions.back();
for (int i = 0; i < (int) exc.size(); i++)
exc[i] |= mask;
}
}
int CpuNeighborList::getNumBlocks() const {
return sortedAtoms.size()/blockSize;
}
......@@ -518,6 +571,13 @@ const std::vector<CpuNeighborList::BlockExclusionMask>& CpuNeighborList::getBloc
}
CpuNeighborList::NeighborIterator CpuNeighborList::getNeighborIterator(int blockIndex) const {
if (dense)
return NeighborIterator(blockIndex*blockSize, numAtoms, blockExclusionIndices[blockIndex], blockExclusions[blockIndex]);
else
return NeighborIterator(blockNeighbors[blockIndex], blockExclusions[blockIndex]);
}
void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadIndex) {
// Compute the positions of atoms along the Hilbert curve.
......@@ -599,4 +659,40 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
}
}
CpuNeighborList::NeighborIterator::NeighborIterator(const vector<int>& neighbors, const vector<BlockExclusionMask>& exclusions) :
dense(false), neighbors(&neighbors), exclusions(&exclusions), currentIndex(-1) {
}
CpuNeighborList::NeighborIterator::NeighborIterator(int firstAtom, int lastAtom, const vector<int>& exclusionIndices, const vector<BlockExclusionMask>& exclusions) :
dense(true), currentAtom(firstAtom-1), lastAtom(lastAtom), exclusionIndices(&exclusionIndices), exclusions(&exclusions), currentIndex(0) {
}
bool CpuNeighborList::NeighborIterator::next() {
if (dense) {
if (++currentAtom >= lastAtom)
return false;
if (currentIndex < exclusionIndices->size() && (*exclusionIndices)[currentIndex] == currentAtom)
currentExclusions = (*exclusions)[currentIndex++];
else
currentExclusions = 0;
return true;
}
else {
if (++currentIndex < neighbors->size()) {
currentAtom = (*neighbors)[currentIndex];
currentExclusions = (*exclusions)[currentIndex];
return true;
}
return false;
}
}
int CpuNeighborList::NeighborIterator::getNeighbor() const {
return currentAtom;
}
CpuNeighborList::BlockExclusionMask CpuNeighborList::NeighborIterator::getExclusions() const {
return currentExclusions;
}
} // namespace OpenMM
......@@ -47,8 +47,9 @@ const int CpuNonbondedForce::NUM_TABLE_POINTS = 2048;
--------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), periodicExceptions(false), ewald(false), pme(false), ljpme(false), tableIsValid(false), expTableIsValid(false),
cutoffDistance(0.0f), alphaDispersionEwald(0.0f), alphaEwald(0.0f) {
CpuNonbondedForce::CpuNonbondedForce(const CpuNeighborList& neighbors) : neighborList(&neighbors), cutoff(false), useSwitch(false), periodic(false),
periodicExceptions(false), ewald(false), pme(false), ljpme(false), tableIsValid(false), expTableIsValid(false), cutoffDistance(0.0f),
alphaDispersionEwald(0.0f), alphaEwald(0.0f) {
}
CpuNonbondedForce::~CpuNonbondedForce() {
......@@ -59,18 +60,16 @@ CpuNonbondedForce::~CpuNonbondedForce() {
Set the force to use a cutoff.
@param distance the cutoff distance
@param neighbors the neighbor list to use
@param solventDielectric the dielectric constant of the bulk solvent
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neighbors, float solventDielectric) {
void CpuNonbondedForce::setUseCutoff(float distance, float solventDielectric) {
if (distance != cutoffDistance)
tableIsValid = false;
cutoff = true;
cutoffDistance = distance;
inverseRcut6 = pow(cutoffDistance, -6);
neighborList = &neighbors;
krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0);
crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0);
if(alphaDispersionEwald != 0.0f){
......@@ -486,7 +485,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
}
}
}
else if (cutoff) {
else {
// Compute the interactions from the neighbor list.
while (true) {
......@@ -496,72 +495,6 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
calculateBlockIxn(nextBlock, forces, energyPtr, boxSize, invBoxSize);
}
}
else {
// Loop over all atom pairs
while (true) {
int i = atomicCounter++;
if (i >= numberOfAtoms)
break;
for (int j = i+1; j < numberOfAtoms; j++)
if (exclusions[j].find(i) == exclusions[j].end())
calculateOneIxn(i, j, forces, energyPtr, boxSize, invBoxSize);
}
}
}
void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// get deltaR, R2, and R between 2 atoms
fvec4 deltaR;
fvec4 posI(posq+4*ii);
fvec4 posJ(posq+4*jj);
float r2;
getDeltaR(posJ, posI, deltaR, r2, periodic, boxSize, invBoxSize);
if (cutoff && r2 >= cutoffDistance*cutoffDistance)
return;
float r = sqrtf(r2);
float inverseR = 1/r;
float switchValue = 1, switchDeriv = 0;
if (useSwitch && r > switchingDistance) {
float t = (r-switchingDistance)/(cutoffDistance-switchingDistance);
switchValue = 1+t*t*t*(-10+t*(15-t*6));
switchDeriv = t*t*(-30+t*(60-t*30))/(cutoffDistance-switchingDistance);
}
float sig = atomParameters[ii].first + atomParameters[jj].first;
float sig2 = inverseR*sig;
sig2 *= sig2;
float sig6 = sig2*sig2*sig2;
float eps = atomParameters[ii].second*atomParameters[jj].second;
float dEdR = switchValue*eps*(12.0f*sig6 - 6.0f)*sig6;
float chargeProd = ONE_4PI_EPS0*posq[4*ii+3]*posq[4*jj+3];
if (cutoff)
dEdR += (float) (chargeProd*(inverseR-2.0f*krf*r2));
else
dEdR += (float) (chargeProd*inverseR);
dEdR *= inverseR*inverseR;
float energy = eps*(sig6-1.0f)*sig6;
if (useSwitch) {
dEdR -= energy*switchDeriv*inverseR;
energy *= switchValue;
}
// accumulate energies
if (totalEnergy) {
if (cutoff)
energy += (float) (chargeProd*(inverseR+krf*r2-crf));
else
energy += (float) (chargeProd*inverseR);
*totalEnergy += energy;
}
// accumulate forces
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*ii)+result).store(forces+4*ii);
(fvec4(forces+4*jj)-result).store(forces+4*jj);
}
void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
......
......@@ -23,17 +23,18 @@
*/
#include "CpuNonbondedForceFvec.h"
#include "CpuNeighborList.h"
#include "openmm/OpenMMException.h"
#ifdef __AVX__
#include "openmm/internal/vectorizeAvx.h"
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx() {
return new OpenMM::CpuNonbondedForceFvec<fvec8>();
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx(const OpenMM::CpuNeighborList& neighbors) {
return new OpenMM::CpuNonbondedForceFvec<fvec8>(neighbors);
}
#else
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx() {
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx(const OpenMM::CpuNeighborList& neighbors) {
throw OpenMM::OpenMMException("Internal error: OpenMM was compiled without AVX support");
}
#endif
......@@ -23,13 +23,14 @@
*/
#include "CpuNonbondedForceFvec.h"
#include "CpuNeighborList.h"
#include "openmm/OpenMMException.h"
#ifdef __AVX2__
#include "openmm/internal/vectorizeAvx2.h"
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx2() {
return new OpenMM::CpuNonbondedForceFvec<fvecAvx2>();
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx2(const OpenMM::CpuNeighborList& neighbors) {
return new OpenMM::CpuNonbondedForceFvec<fvecAvx2>(neighbors);
}
#else
......@@ -38,7 +39,7 @@ bool isAvx2Supported() {
return false;
}
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx2() {
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx2(const OpenMM::CpuNeighborList& neighbors) {
throw OpenMM::OpenMMException("Internal error: OpenMM was compiled without AVX2 support");
}
#endif
......@@ -23,21 +23,24 @@
*/
#include "CpuNonbondedForceFvec.h"
#include "CpuNeighborList.h"
#include "openmm/internal/hardware.h"
OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec4();
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx();
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx2();
using namespace OpenMM;
CpuNonbondedForce* createCpuNonbondedForceVec4(const CpuNeighborList& neighbors);
CpuNonbondedForce* createCpuNonbondedForceAvx(const CpuNeighborList& neighbors);
CpuNonbondedForce* createCpuNonbondedForceAvx2(const CpuNeighborList& neighbors);
bool isAvx2Supported();
#include <iostream>
OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec() {
CpuNonbondedForce* createCpuNonbondedForceVec(const CpuNeighborList& neighbors) {
if (isAvx2Supported())
return createCpuNonbondedForceAvx2();
return createCpuNonbondedForceAvx2(neighbors);
else if (isAvxSupported())
return createCpuNonbondedForceAvx();
return createCpuNonbondedForceAvx(neighbors);
else
return createCpuNonbondedForceVec4();
return createCpuNonbondedForceVec4(neighbors);
}
......@@ -23,10 +23,11 @@
*/
#include "CpuNonbondedForceFvec.h"
#include "CpuNeighborList.h"
// Very minimal file. It exists purely to be able to compile it in SIMD-4.
OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec4() {
return new OpenMM::CpuNonbondedForceFvec<fvec4>();
OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec4(const OpenMM::CpuNeighborList& neighbors) {
return new OpenMM::CpuNonbondedForceFvec<fvec4>(neighbors);
}
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2019 Stanford University and the Authors. *
* Portions copyright (c) 2013-2022 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -148,7 +148,8 @@ const CpuPlatform::PlatformData& CpuPlatform::getPlatformData(const ContextImpl&
}
CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads, bool deterministicForces) : posq(4*numParticles), threads(numThreads),
deterministicForces(deterministicForces), neighborList(NULL), cutoff(0.0), paddedCutoff(0.0), anyExclusions(false), currentPosqIndex(-1), nextPosqIndex(0) {
deterministicForces(deterministicForces), numParticles(numParticles), neighborList(NULL), cutoff(0.0), paddedCutoff(0.0), anyExclusions(false),
currentPosqIndex(-1), nextPosqIndex(0) {
numThreads = threads.getNumThreads();
threadForce.resize(numThreads);
for (int i = 0; i < numThreads; i++)
......@@ -166,8 +167,13 @@ CpuPlatform::PlatformData::~PlatformData() {
}
void CpuPlatform::PlatformData::requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const vector<set<int> >& exclusionList) {
if (neighborList == NULL)
if (neighborList == NULL) {
neighborList = new CpuNeighborList(getVectorWidth());
if (cutoffDistance == 0.0)
neighborList->createDenseNeighborList(numParticles, exclusionList);
}
else if ((cutoffDistance == 0.0) != (cutoff == 0.0))
throw OpenMMException("All nonbonded Forces must agree on whether to apply a cutoff");
if (cutoffDistance > cutoff)
cutoff = cutoffDistance;
if (cutoffDistance+padding > paddedCutoff)
......
......@@ -252,8 +252,7 @@ int ReferenceDiscrete1DFunction::getNumArguments() const {
double ReferenceDiscrete1DFunction::evaluate(const double* arguments) const {
int i = (int) round(arguments[0]);
if (i < 0 || i >= values.size())
throw OpenMMException("ReferenceDiscrete1DFunction: argument out of range");
i = max(0, min((int) values.size()-1, i));
return values[i];
}
......@@ -276,8 +275,8 @@ int ReferenceDiscrete2DFunction::getNumArguments() const {
double ReferenceDiscrete2DFunction::evaluate(const double* arguments) const {
int i = (int) round(arguments[0]);
int j = (int) round(arguments[1]);
if (i < 0 || i >= xsize || j < 0 || j >= ysize)
throw OpenMMException("ReferenceDiscrete2DFunction: argument out of range");
i = max(0, min(xsize-1, i));
j = max(0, min(ysize-1, j));
return values[i+j*xsize];
}
......@@ -301,8 +300,9 @@ double ReferenceDiscrete3DFunction::evaluate(const double* arguments) const {
int i = (int) round(arguments[0]);
int j = (int) round(arguments[1]);
int k = (int) round(arguments[2]);
if (i < 0 || i >= xsize || j < 0 || j >= ysize || k < 0 || k >= zsize)
throw OpenMMException("ReferenceDiscrete3DFunction: argument out of range");
i = max(0, min(xsize-1, i));
j = max(0, min(ysize-1, j));
k = max(0, min(zsize-1, k));
return values[i+(j+k*ysize)*xsize];
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment