Commit dbc94d89 authored by Daniel Towner's avatar Daniel Towner
Browse files

CPU: Widen mask for exclusion bits (#2676)

The exclusion mask was 8-bits wide, but future CPU support will require
more bits than this (e.g., AVX-512 will be 16-bit).
parent 6784a737
......@@ -53,7 +53,15 @@ public:
int getBlockSize() const;
const std::vector<int>& getSortedAtoms() const;
const std::vector<int>& getBlockNeighbors(int blockIndex) const;
const std::vector<char>& getBlockExclusions(int blockIndex) const;
/**
* Bitset for a single block, marking which indexes should be excluded. This data type needs to be big
* enough to store all the bits for any possible block size.
*/
using BlockExclusionMask = int16_t;
const std::vector<BlockExclusionMask>& getBlockExclusions(int blockIndex) const;
/**
* This routine contains the code executed by each thread.
*/
......@@ -64,7 +72,7 @@ private:
std::vector<int> sortedAtoms;
std::vector<float> sortedPositions;
std::vector<std::vector<int> > blockNeighbors;
std::vector<std::vector<char> > blockExclusions;
std::vector<std::vector<BlockExclusionMask> > blockExclusions;
// The following variables are used to make information accessible to the individual threads.
float minx, maxx, miny, maxy, minz, maxz;
std::vector<std::pair<int, int> > atomBins;
......
......@@ -363,7 +363,7 @@ void CpuCustomGBForce::calculateParticlePairValue(int index, ThreadData& data, i
const int blockSize = neighborList->getBlockSize();
const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& blockExclusions = neighborList->getBlockExclusions(blockIndex);
const auto& blockExclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i];
for (int k = 0; k < blockSize; k++) {
......@@ -458,7 +458,7 @@ void CpuCustomGBForce::calculateParticlePairEnergyTerm(int index, ThreadData& da
const int blockSize = neighborList->getBlockSize();
const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& blockExclusions = neighborList->getBlockExclusions(blockIndex);
const auto& blockExclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i];
for (int k = 0; k < blockSize; k++) {
......@@ -545,7 +545,7 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
const int blockSize = neighborList->getBlockSize();
const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& blockExclusions = neighborList->getBlockExclusions(blockIndex);
const auto& blockExclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i];
for (int k = 0; k < blockSize; k++) {
......
......@@ -110,7 +110,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, vector<
neighborList->computeNeighborList(numParticles, posq, exclusions, periodicBoxVectors, usePeriodic, cutoffDistance, threads);
for (int blockIndex = 0; blockIndex < neighborList->getNumBlocks(); blockIndex++) {
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex);
const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
int numNeighbors = neighbors.size();
for (int i = 0; i < 4; i++) {
int p1 = neighborList->getSortedAtoms()[4*blockIndex+i];
......
......@@ -195,7 +195,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
const int blockSize = neighborList->getBlockSize();
const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex);
const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i];
for (int j = 0; j < (int) paramNames.size(); j++)
......
......@@ -183,7 +183,7 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
const int blockSize = neighborList->getBlockSize();
const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex);
const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i];
if (particles[first].sqrtEpsilon == 0.0f)
......
......@@ -164,7 +164,7 @@ public:
return VoxelIndex(y, z);
}
void getNeighbors(vector<int>& neighbors, int blockIndex, const fvec4& blockCenter, const fvec4& blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int>& blockAtoms, const vector<float>& blockAtomX, const vector<float>& blockAtomY, const vector<float>& blockAtomZ, const vector<float>& sortedPositions, const vector<VoxelIndex>& atomVoxelIndex) const {
void getNeighbors(vector<int>& neighbors, int blockIndex, const fvec4& blockCenter, const fvec4& blockWidth, const vector<int>& sortedAtoms, vector<CpuNeighborList::BlockExclusionMask>& exclusions, float maxDistance, const vector<int>& blockAtoms, const vector<float>& blockAtomX, const vector<float>& blockAtomY, const vector<float>& blockAtomZ, const vector<float>& sortedPositions, const vector<VoxelIndex>& atomVoxelIndex) const {
neighbors.resize(0);
exclusions.resize(0);
fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
......@@ -484,10 +484,10 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
int numPadding = numBlocks*blockSize-numAtoms;
if (numPadding > 0) {
char mask = ((0xFFFF-(1<<blockSize)+1) >> numPadding);
const BlockExclusionMask mask = (~0) << (blockSize - numPadding);
for (int i = 0; i < numPadding; i++)
sortedAtoms.push_back(0);
vector<char>& exc = blockExclusions[blockExclusions.size()-1];
auto& exc = blockExclusions[blockExclusions.size()-1];
for (int i = 0; i < (int) exc.size(); i++)
exc[i] |= mask;
}
......@@ -509,7 +509,7 @@ const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const
return blockNeighbors[blockIndex];
}
const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) const {
const std::vector<CpuNeighborList::BlockExclusionMask>& CpuNeighborList::getBlockExclusions(int blockIndex) const {
return blockExclusions[blockIndex];
}
......@@ -573,12 +573,12 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
// Record the exclusions for this block.
map<int, char> atomFlags;
map<int, BlockExclusionMask> atomFlags;
for (int j = 0; j < atomsInBlock; j++) {
const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
char mask = 1<<j;
const BlockExclusionMask mask = 1<<j;
for (int exclusion : atomExclusions) {
map<int, char>::iterator thisAtomFlags = atomFlags.find(exclusion);
const auto thisAtomFlags = atomFlags.find(exclusion);
if (thisAtomFlags == atomFlags.end())
atomFlags[exclusion] = mask;
else
......@@ -588,7 +588,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
int numNeighbors = blockNeighbors[i].size();
for (int k = 0; k < numNeighbors; k++) {
int atomIndex = blockNeighbors[i][k];
map<int, char>::iterator thisAtomFlags = atomFlags.find(atomIndex);
auto thisAtomFlags = atomFlags.find(atomIndex);
if (thisAtomFlags != atomFlags.end())
blockExclusions[i][k] |= thisAtomFlags->second;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment