Commit dbc94d89 authored by Daniel Towner's avatar Daniel Towner
Browse files

CPU: Widen mask for exclusion bits (#2676)

The exclusion mask was 8-bits wide, but future CPU support will require
more bits than this (e.g., AVX-512 will be 16-bit).
parent 6784a737
...@@ -53,7 +53,15 @@ public: ...@@ -53,7 +53,15 @@ public:
int getBlockSize() const; int getBlockSize() const;
const std::vector<int>& getSortedAtoms() const; const std::vector<int>& getSortedAtoms() const;
const std::vector<int>& getBlockNeighbors(int blockIndex) const; const std::vector<int>& getBlockNeighbors(int blockIndex) const;
const std::vector<char>& getBlockExclusions(int blockIndex) const;
/**
* Bitset for a single block, marking which indexes should be excluded. This data type needs to be big
* enough to store all the bits for any possible block size.
*/
using BlockExclusionMask = int16_t;
const std::vector<BlockExclusionMask>& getBlockExclusions(int blockIndex) const;
/** /**
* This routine contains the code executed by each thread. * This routine contains the code executed by each thread.
*/ */
...@@ -64,7 +72,7 @@ private: ...@@ -64,7 +72,7 @@ private:
std::vector<int> sortedAtoms; std::vector<int> sortedAtoms;
std::vector<float> sortedPositions; std::vector<float> sortedPositions;
std::vector<std::vector<int> > blockNeighbors; std::vector<std::vector<int> > blockNeighbors;
std::vector<std::vector<char> > blockExclusions; std::vector<std::vector<BlockExclusionMask> > blockExclusions;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
float minx, maxx, miny, maxy, minz, maxz; float minx, maxx, miny, maxy, minz, maxz;
std::vector<std::pair<int, int> > atomBins; std::vector<std::pair<int, int> > atomBins;
......
...@@ -363,7 +363,7 @@ void CpuCustomGBForce::calculateParticlePairValue(int index, ThreadData& data, i ...@@ -363,7 +363,7 @@ void CpuCustomGBForce::calculateParticlePairValue(int index, ThreadData& data, i
const int blockSize = neighborList->getBlockSize(); const int blockSize = neighborList->getBlockSize();
const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex); const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& blockExclusions = neighborList->getBlockExclusions(blockIndex); const auto& blockExclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) { for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i]; int first = neighbors[i];
for (int k = 0; k < blockSize; k++) { for (int k = 0; k < blockSize; k++) {
...@@ -458,7 +458,7 @@ void CpuCustomGBForce::calculateParticlePairEnergyTerm(int index, ThreadData& da ...@@ -458,7 +458,7 @@ void CpuCustomGBForce::calculateParticlePairEnergyTerm(int index, ThreadData& da
const int blockSize = neighborList->getBlockSize(); const int blockSize = neighborList->getBlockSize();
const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex); const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& blockExclusions = neighborList->getBlockExclusions(blockIndex); const auto& blockExclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) { for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i]; int first = neighbors[i];
for (int k = 0; k < blockSize; k++) { for (int k = 0; k < blockSize; k++) {
...@@ -545,7 +545,7 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms, ...@@ -545,7 +545,7 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
const int blockSize = neighborList->getBlockSize(); const int blockSize = neighborList->getBlockSize();
const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex); const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& blockExclusions = neighborList->getBlockExclusions(blockIndex); const auto& blockExclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) { for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i]; int first = neighbors[i];
for (int k = 0; k < blockSize; k++) { for (int k = 0; k < blockSize; k++) {
......
...@@ -110,7 +110,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, vector< ...@@ -110,7 +110,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, vector<
neighborList->computeNeighborList(numParticles, posq, exclusions, periodicBoxVectors, usePeriodic, cutoffDistance, threads); neighborList->computeNeighborList(numParticles, posq, exclusions, periodicBoxVectors, usePeriodic, cutoffDistance, threads);
for (int blockIndex = 0; blockIndex < neighborList->getNumBlocks(); blockIndex++) { for (int blockIndex = 0; blockIndex < neighborList->getNumBlocks(); blockIndex++) {
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex); const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex); const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
int numNeighbors = neighbors.size(); int numNeighbors = neighbors.size();
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
int p1 = neighborList->getSortedAtoms()[4*blockIndex+i]; int p1 = neighborList->getSortedAtoms()[4*blockIndex+i];
......
...@@ -195,7 +195,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread ...@@ -195,7 +195,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
const int blockSize = neighborList->getBlockSize(); const int blockSize = neighborList->getBlockSize();
const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex); const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex); const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) { for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i]; int first = neighbors[i];
for (int j = 0; j < (int) paramNames.size(); j++) for (int j = 0; j < (int) paramNames.size(); j++)
......
...@@ -183,7 +183,7 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex, ...@@ -183,7 +183,7 @@ void CpuGayBerneForce::threadComputeForce(ThreadPool& threads, int threadIndex,
const int blockSize = neighborList->getBlockSize(); const int blockSize = neighborList->getBlockSize();
const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex]; const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex); const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex); const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) { for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i]; int first = neighbors[i];
if (particles[first].sqrtEpsilon == 0.0f) if (particles[first].sqrtEpsilon == 0.0f)
......
...@@ -164,7 +164,7 @@ public: ...@@ -164,7 +164,7 @@ public:
return VoxelIndex(y, z); return VoxelIndex(y, z);
} }
void getNeighbors(vector<int>& neighbors, int blockIndex, const fvec4& blockCenter, const fvec4& blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int>& blockAtoms, const vector<float>& blockAtomX, const vector<float>& blockAtomY, const vector<float>& blockAtomZ, const vector<float>& sortedPositions, const vector<VoxelIndex>& atomVoxelIndex) const { void getNeighbors(vector<int>& neighbors, int blockIndex, const fvec4& blockCenter, const fvec4& blockWidth, const vector<int>& sortedAtoms, vector<CpuNeighborList::BlockExclusionMask>& exclusions, float maxDistance, const vector<int>& blockAtoms, const vector<float>& blockAtomX, const vector<float>& blockAtomY, const vector<float>& blockAtomZ, const vector<float>& sortedPositions, const vector<VoxelIndex>& atomVoxelIndex) const {
neighbors.resize(0); neighbors.resize(0);
exclusions.resize(0); exclusions.resize(0);
fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0); fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
...@@ -484,10 +484,10 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float ...@@ -484,10 +484,10 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
int numPadding = numBlocks*blockSize-numAtoms; int numPadding = numBlocks*blockSize-numAtoms;
if (numPadding > 0) { if (numPadding > 0) {
char mask = ((0xFFFF-(1<<blockSize)+1) >> numPadding); const BlockExclusionMask mask = (~0) << (blockSize - numPadding);
for (int i = 0; i < numPadding; i++) for (int i = 0; i < numPadding; i++)
sortedAtoms.push_back(0); sortedAtoms.push_back(0);
vector<char>& exc = blockExclusions[blockExclusions.size()-1]; auto& exc = blockExclusions[blockExclusions.size()-1];
for (int i = 0; i < (int) exc.size(); i++) for (int i = 0; i < (int) exc.size(); i++)
exc[i] |= mask; exc[i] |= mask;
} }
...@@ -509,7 +509,7 @@ const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const ...@@ -509,7 +509,7 @@ const std::vector<int>& CpuNeighborList::getBlockNeighbors(int blockIndex) const
return blockNeighbors[blockIndex]; return blockNeighbors[blockIndex];
} }
const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) const { const std::vector<CpuNeighborList::BlockExclusionMask>& CpuNeighborList::getBlockExclusions(int blockIndex) const {
return blockExclusions[blockIndex]; return blockExclusions[blockIndex];
} }
...@@ -573,12 +573,12 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI ...@@ -573,12 +573,12 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
// Record the exclusions for this block. // Record the exclusions for this block.
map<int, char> atomFlags; map<int, BlockExclusionMask> atomFlags;
for (int j = 0; j < atomsInBlock; j++) { for (int j = 0; j < atomsInBlock; j++) {
const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]]; const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
char mask = 1<<j; const BlockExclusionMask mask = 1<<j;
for (int exclusion : atomExclusions) { for (int exclusion : atomExclusions) {
map<int, char>::iterator thisAtomFlags = atomFlags.find(exclusion); const auto thisAtomFlags = atomFlags.find(exclusion);
if (thisAtomFlags == atomFlags.end()) if (thisAtomFlags == atomFlags.end())
atomFlags[exclusion] = mask; atomFlags[exclusion] = mask;
else else
...@@ -588,7 +588,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI ...@@ -588,7 +588,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
int numNeighbors = blockNeighbors[i].size(); int numNeighbors = blockNeighbors[i].size();
for (int k = 0; k < numNeighbors; k++) { for (int k = 0; k < numNeighbors; k++) {
int atomIndex = blockNeighbors[i][k]; int atomIndex = blockNeighbors[i][k];
map<int, char>::iterator thisAtomFlags = atomFlags.find(atomIndex); auto thisAtomFlags = atomFlags.find(atomIndex);
if (thisAtomFlags != atomFlags.end()) if (thisAtomFlags != atomFlags.end())
blockExclusions[i][k] |= thisAtomFlags->second; blockExclusions[i][k] |= thisAtomFlags->second;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment