Unverified Commit 655518c3 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Skip neighbor list for very small systems (#4070)

* Skip neighbor list for very small systems

* Fixed typos

* Don't skip box size check when not using neighbor list

* Made test larger to ensure it uses neighbor list
parent d6cca390
......@@ -69,8 +69,10 @@ public:
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param kernel the code to evaluate the interaction
* @param forceGroup the force group in which the interaction should be calculated
* @param useNeighborList specifies whether a neighbor list should be used to optimize this interaction. This should
* be viewed as only a suggestion. Even when it is false, a neighbor list may be used anyway.
*/
virtual void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup) = 0;
virtual void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup, bool useNeighborList=true) = 0;
/**
* Add a per-atom parameter that the default interaction kernel may depend on.
*/
......
......@@ -1992,7 +1992,7 @@ void CommonCalcCustomNonbondedForceKernel::initialize(const System& system, cons
if (force.getNumInteractionGroups() > 0)
initInteractionGroups(force, source, tableTypes);
else {
cc.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
cc.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), numParticles > 2000);
for (int i = 0; i < paramBuffers.size(); i++)
cc.getNonbondedUtilities().addParameter(ComputeParameterInfo(paramBuffers[i].getArray(), prefix+"params"+cc.intToString(i+1),
paramBuffers[i].getComponentType(), paramBuffers[i].getNumComponents()));
......
......@@ -81,8 +81,10 @@ public:
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param kernel the code to evaluate the interaction
* @param forceGroup the force group in which the interaction should be calculated
* @param useNeighborList specifies whether a neighbor list should be used to optimize this interaction. This should
* be viewed as only a suggestion. Even when it is false, a neighbor list may be used anyway.
*/
void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup);
void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup, bool useNeighborList=true);
/**
* Add a nonbonded interaction to be evaluated by the default interaction kernel.
*
......@@ -93,9 +95,11 @@ public:
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param kernel the code to evaluate the interaction
* @param forceGroup the force group in which the interaction should be calculated
* @param useNeighborList specifies whether a neighbor list should be used to optimize this interaction. This should
* be viewed as only a suggestion. Even when it is false, a neighbor list may be used anyway.
* @param supportsPairList specifies whether this interaction can work with a neighbor list that uses a separate pair list
*/
void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup, bool supportsPairList);
void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup, bool useNeighborList, bool supportsPairList);
/**
* Add a per-atom parameter that the default interaction kernel may depend on.
*/
......@@ -347,7 +351,7 @@ private:
std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource;
double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
bool useCutoff, usePeriodic, anyExclusions, usePadding, useNeighborList, forceRebuildNeighborList, canUsePairList;
int startTileIndex, startBlockIndex, numBlocks, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags;
unsigned int maxTiles, maxSinglePairs, tilesAfterReorder;
long long numTiles;
......
......@@ -1035,7 +1035,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
}
source = cu.replaceStrings(source, replacements);
if (force.getIncludeDirectSpace())
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), numParticles > 3000, true);
// Initialize the exceptions.
......
......@@ -63,7 +63,7 @@ private:
bool useDouble;
};
CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true),
CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), useCutoff(false), usePeriodic(false), useNeighborList(false), anyExclusions(false), usePadding(true),
blockSorter(NULL), pinnedCountBuffer(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0), canUsePairList(true), tilesAfterReorder(0) {
// Decide how many thread blocks to use.
......@@ -85,11 +85,11 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
cuEventDestroy(downloadCountEvent);
}
void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) {
addInteraction(usesCutoff, usesPeriodic, usesExclusions, cutoffDistance, exclusionList, kernel, forceGroup, false);
void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup, bool useNeighborList) {
addInteraction(usesCutoff, usesPeriodic, usesExclusions, cutoffDistance, exclusionList, kernel, forceGroup, useNeighborList, false);
}
void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup, bool supportsPairList) {
void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup, bool useNeighborList, bool supportsPairList) {
if (groupCutoff.size() > 0) {
if (usesCutoff != useCutoff)
throw OpenMMException("All Forces must agree on whether to use a cutoff");
......@@ -102,6 +102,7 @@ void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic,
requestExclusions(exclusionList);
useCutoff = usesCutoff;
usePeriodic = usesPeriodic;
this->useNeighborList |= (useNeighborList && useCutoff);
groupCutoff[forceGroup] = cutoffDistance;
groupFlags |= 1<<forceGroup;
canUsePairList &= supportsPairList;
......@@ -378,17 +379,17 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
return;
if (groupKernels.find(forceGroups) == groupKernels.end())
createKernelsForGroups(forceGroups);
if (!useCutoff)
return;
if (numTiles == 0)
return;
KernelSet& kernels = groupKernels[forceGroups];
if (usePeriodic) {
if (useCutoff && usePeriodic) {
double4 box = context.getPeriodicBoxSize();
double minAllowedSize = 1.999999*kernels.cutoffDistance;
if (box.x < minAllowedSize || box.y < minAllowedSize || box.z < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
}
if (!useNeighborList)
return;
if (numTiles == 0)
return;
// Compute the neighbor list.
......@@ -414,7 +415,7 @@ void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeFo
kernel = createInteractionKernel(kernels.source, parameters, arguments, true, true, forceGroups, includeForces, includeEnergy);
context.executeKernel(kernel, &forceArgs[0], numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
if (useCutoff && numTiles > 0) {
if (useNeighborList && numTiles > 0) {
cuEventSynchronize(downloadCountEvent);
updateNeighborListSize();
}
......@@ -654,6 +655,8 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines["USE_EXCLUSIONS"] = "1";
if (isSymmetric)
defines["USE_SYMMETRIC"] = "1";
if (useNeighborList)
defines["USE_NEIGHBOR_LIST"] = "1";
defines["ENABLE_SHUFFLE"] = "1";
if (includeForces)
defines["INCLUDE_FORCES"] = "1";
......
......@@ -267,7 +267,7 @@ extern "C" __global__ void computeNonbonded(
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// of them (no cutoff).
#ifdef USE_CUTOFF
#ifdef USE_NEIGHBOR_LIST
const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
......@@ -293,7 +293,7 @@ extern "C" __global__ void computeNonbonded(
// Extract the coordinates of this tile.
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_NEIGHBOR_LIST
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
......@@ -328,7 +328,7 @@ extern "C" __global__ void computeNonbonded(
// Load atom data for this tile.
real4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS
#ifdef USE_CUTOFF
#ifdef USE_NEIGHBOR_LIST
unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx];
#else
unsigned int j = y*TILE_SIZE + tgx;
......@@ -459,7 +459,7 @@ extern "C" __global__ void computeNonbonded(
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>(realToFixedPoint(force.x)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.y)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>(realToFixedPoint(force.z)));
#ifdef USE_CUTOFF
#ifdef USE_NEIGHBOR_LIST
unsigned int atom2 = atomIndices[threadIdx.x];
#else
unsigned int atom2 = y*TILE_SIZE + tgx;
......@@ -476,7 +476,7 @@ extern "C" __global__ void computeNonbonded(
// Third loop: single pairs that aren't part of a tile.
#if USE_CUTOFF
#if USE_NEIGHBOR_LIST
const unsigned int numPairs = interactionCount[1];
if (numPairs > maxSinglePairs)
return; // There wasn't enough memory for the neighbor list.
......
......@@ -80,8 +80,10 @@ public:
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param kernel the code to evaluate the interaction
* @param forceGroup the force group in which the interaction should be calculated
* @param useNeighborList specifies whether a neighbor list should be used to optimize this interaction. This should
* be viewed as only a suggestion. Even when it is false, a neighbor list may be used anyway.
*/
void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup);
void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup, bool useNeighborList=true);
/**
* Add a per-atom parameter that the default interaction kernel may depend on.
*/
......@@ -330,7 +332,7 @@ private:
std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource;
double lastCutoff;
bool useCutoff, usePeriodic, deviceIsCpu, anyExclusions, usePadding, forceRebuildNeighborList;
bool useCutoff, usePeriodic, deviceIsCpu, anyExclusions, usePadding, useNeighborList, forceRebuildNeighborList;
int startTileIndex, startBlockIndex, numBlocks, maxExclusions, numForceThreadBlocks;
int forceThreadBlockSize, interactingBlocksThreadBlockSize, groupFlags;
unsigned int tilesAfterReorder;
......
......@@ -974,7 +974,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
}
source = cl.replaceStrings(source, replacements);
if (force.getIncludeDirectSpace())
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), numParticles > 3000);
// Initialize the exceptions.
......
......@@ -55,7 +55,7 @@ private:
bool useDouble;
};
OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true),
OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : context(context), useCutoff(false), usePeriodic(false), useNeighborList(false), anyExclusions(false), usePadding(true),
blockSorter(NULL), pinnedCountBuffer(NULL), pinnedCountMemory(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0), tilesAfterReorder(0) {
// Decide how many thread blocks and force buffers to use.
......@@ -90,7 +90,7 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
delete pinnedCountBuffer;
}
void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) {
void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup, bool useNeighborList) {
if (groupCutoff.size() > 0) {
if (usesCutoff != useCutoff)
throw OpenMMException("All Forces must agree on whether to use a cutoff");
......@@ -103,6 +103,7 @@ void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic
requestExclusions(exclusionList);
useCutoff = usesCutoff;
usePeriodic = usesPeriodic;
this->useNeighborList |= ((useNeighborList || deviceIsCpu) && useCutoff);
groupCutoff[forceGroup] = cutoffDistance;
groupFlags |= 1<<forceGroup;
if (kernel.size() > 0) {
......@@ -335,17 +336,17 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
return;
if (groupKernels.find(forceGroups) == groupKernels.end())
createKernelsForGroups(forceGroups);
if (!useCutoff)
return;
if (numTiles == 0)
return;
KernelSet& kernels = groupKernels[forceGroups];
if (usePeriodic) {
if (useCutoff && usePeriodic) {
mm_float4 box = context.getPeriodicBoxSize();
double minAllowedSize = 1.999999*kernels.cutoffDistance;
if (box.x < minAllowedSize || box.y < minAllowedSize || box.z < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
}
if (!useNeighborList)
return;
if (numTiles == 0)
return;
// Compute the neighbor list.
......@@ -381,7 +382,7 @@ void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool include
setPeriodicBoxArgs(context, kernel, 9);
context.executeKernel(kernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
if (useCutoff && numTiles > 0) {
if (useNeighborList && numTiles > 0) {
#if __APPLE__ && defined(__aarch64__)
// Ensure cached up work executes while you're waiting.
if (kernels.hasForces)
......@@ -687,6 +688,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines["USE_EXCLUSIONS"] = "1";
if (isSymmetric)
defines["USE_SYMMETRIC"] = "1";
if (useNeighborList)
defines["USE_NEIGHBOR_LIST"] = "1";
if (useCutoff && context.getSIMDWidth() < 32)
defines["PRUNE_BY_CUTOFF"] = "1";
if (includeForces)
......
......@@ -185,7 +185,7 @@ __kernel void computeNonbonded(
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// of them (no cutoff).
#ifdef USE_CUTOFF
#ifdef USE_NEIGHBOR_LIST
unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
......@@ -210,7 +210,7 @@ __kernel void computeNonbonded(
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_NEIGHBOR_LIST
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
......@@ -250,7 +250,7 @@ __kernel void computeNonbonded(
real4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS
#ifdef USE_CUTOFF
#ifdef USE_NEIGHBOR_LIST
unsigned int j = interactingAtoms[pos*TILE_SIZE+tgx];
#else
unsigned int j = y*TILE_SIZE + tgx;
......@@ -389,7 +389,7 @@ __kernel void computeNonbonded(
// Write results.
#ifdef INCLUDE_FORCES
#ifdef USE_CUTOFF
#ifdef USE_NEIGHBOR_LIST
unsigned int atom2 = atomIndices[get_local_id(0)];
#else
unsigned int atom2 = y*TILE_SIZE + tgx;
......
......@@ -455,10 +455,10 @@ void testTriclinic() {
}
void testLargeSystem() {
const int numMolecules = 600;
const int numMolecules = 1600;
const int numParticles = numMolecules*2;
const double cutoff = 2.0;
const double boxSize = 20.0;
const double boxSize = 40.0;
const double tol = 2e-3;
ReferencePlatform reference;
System system;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment