Commit 40947735 authored by peastman's avatar peastman
Browse files

Further optimizations to CPU platform

parent dc474876
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "AlignedArray.h" #include "AlignedArray.h"
#include "RealVec.h" #include "RealVec.h"
#include "windowsExportCpu.h" #include "windowsExportCpu.h"
#include "openmm/internal/gmx_atomic.h"
#include "openmm/internal/ThreadPool.h" #include "openmm/internal/ThreadPool.h"
#include <set> #include <set>
#include <utility> #include <utility>
...@@ -74,6 +75,7 @@ private: ...@@ -74,6 +75,7 @@ private:
int numAtoms; int numAtoms;
bool usePeriodic; bool usePeriodic;
float maxDistance; float maxDistance;
gmx_atomic_t atomicCounter;
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -599,7 +599,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -599,7 +599,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
// Determine whether we need to recompute the neighbor list. // Determine whether we need to recompute the neighbor list.
double padding = 0.15*nonbondedCutoff; double padding = 0.25*nonbondedCutoff;
bool needRecompute = false; bool needRecompute = false;
double closeCutoff2 = 0.25*padding*padding; double closeCutoff2 = 0.25*padding*padding;
double farCutoff2 = 0.5*padding*padding; double farCutoff2 = 0.5*padding*padding;
......
...@@ -59,22 +59,25 @@ public: ...@@ -59,22 +59,25 @@ public:
*/ */
class CpuNeighborList::Voxels { class CpuNeighborList::Voxels {
public: public:
Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const RealVec* periodicBoxVectors, bool usePeriodic) : Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const RealVec* boxVectors, bool usePeriodic) :
blockSize(blockSize), voxelSizeY(vsy), voxelSizeZ(vsz), miny(miny), maxy(maxy), minz(minz), maxz(maxz), periodicBoxVectors(periodicBoxVectors), usePeriodic(usePeriodic) { blockSize(blockSize), voxelSizeY(vsy), voxelSizeZ(vsz), miny(miny), maxy(maxy), minz(minz), maxz(maxz), usePeriodic(usePeriodic) {
periodicBoxSize[0] = (float) periodicBoxVectors[0][0]; for (int i = 0; i < 3; i++)
periodicBoxSize[1] = (float) periodicBoxVectors[1][1]; for (int j = 0; j < 3; j++)
periodicBoxSize[2] = (float) periodicBoxVectors[2][2]; periodicBoxVectors[i][j] = (float) boxVectors[i][j];
recipBoxSize[0] = (float) (1/periodicBoxVectors[0][0]); periodicBoxSize[0] = (float) boxVectors[0][0];
recipBoxSize[1] = (float) (1/periodicBoxVectors[1][1]); periodicBoxSize[1] = (float) boxVectors[1][1];
recipBoxSize[2] = (float) (1/periodicBoxVectors[2][2]); periodicBoxSize[2] = (float) boxVectors[2][2];
triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 || recipBoxSize[0] = (float) (1/boxVectors[0][0]);
periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 || recipBoxSize[1] = (float) (1/boxVectors[1][1]);
periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0); recipBoxSize[2] = (float) (1/boxVectors[2][2]);
triclinic = (boxVectors[0][1] != 0.0 || boxVectors[0][2] != 0.0 ||
boxVectors[1][0] != 0.0 || boxVectors[1][2] != 0.0 ||
boxVectors[2][0] != 0.0 || boxVectors[2][1] != 0.0);
if (usePeriodic) { if (usePeriodic) {
ny = (int) floorf(periodicBoxVectors[1][1]/voxelSizeY+0.5f); ny = (int) floorf(boxVectors[1][1]/voxelSizeY+0.5f);
nz = (int) floorf(periodicBoxVectors[2][2]/voxelSizeZ+0.5f); nz = (int) floorf(boxVectors[2][2]/voxelSizeZ+0.5f);
voxelSizeY = periodicBoxVectors[1][1]/ny; voxelSizeY = boxVectors[1][1]/ny;
voxelSizeZ = periodicBoxVectors[2][2]/nz; voxelSizeZ = boxVectors[2][2]/nz;
} }
else { else {
ny = max(1, (int) floorf((maxy-miny)/voxelSizeY+0.5f)); ny = max(1, (int) floorf((maxy-miny)/voxelSizeY+0.5f));
...@@ -282,9 +285,10 @@ public: ...@@ -282,9 +285,10 @@ public:
// Loop over atoms and check to see if they are neighbors of this block. // Loop over atoms and check to see if they are neighbors of this block.
const vector<pair<float, int> >& voxelBins = bins[voxelIndex.y][voxelIndex.z];
for (int range = 0; range < numRanges; range++) { for (int range = 0; range < numRanges; range++) {
for (int item = rangeStart[range]; item < rangeEnd[range]; item++) { for (int item = rangeStart[range]; item < rangeEnd[range]; item++) {
const int sortedIndex = bins[voxelIndex.y][voxelIndex.z][item].second; const int sortedIndex = voxelBins[item].second;
// Avoid duplicate entries. // Avoid duplicate entries.
if (sortedIndex >= lastSortedIndex) if (sortedIndex >= lastSortedIndex)
...@@ -361,7 +365,7 @@ private: ...@@ -361,7 +365,7 @@ private:
int ny, nz; int ny, nz;
float periodicBoxSize[3], recipBoxSize[3]; float periodicBoxSize[3], recipBoxSize[3];
bool triclinic; bool triclinic;
const RealVec* periodicBoxVectors; float periodicBoxVectors[3][3];
const bool usePeriodic; const bool usePeriodic;
vector<vector<vector<pair<float, int> > > > bins; vector<vector<vector<pair<float, int> > > > bins;
}; };
...@@ -444,6 +448,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float ...@@ -444,6 +448,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
gmx_atomic_set(&atomicCounter, 0);
threads.resumeThreads(); threads.resumeThreads();
threads.waitForThreads(); threads.waitForThreads();
...@@ -500,7 +505,11 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI ...@@ -500,7 +505,11 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
vector<int> blockAtoms; vector<int> blockAtoms;
vector<float> blockAtomX(blockSize), blockAtomY(blockSize), blockAtomZ(blockSize); vector<float> blockAtomX(blockSize), blockAtomY(blockSize), blockAtomZ(blockSize);
vector<VoxelIndex> atomVoxelIndex; vector<VoxelIndex> atomVoxelIndex;
for (int i = threadIndex; i < numBlocks; i += numThreads) { while (true) {
int i = gmx_atomic_fetch_add(&atomicCounter, 1);
if (i >= numBlocks)
break;
// Find the atoms in this block and compute their bounding box. // Find the atoms in this block and compute their bounding box.
int firstIndex = blockSize*i; int firstIndex = blockSize*i;
...@@ -532,14 +541,24 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI ...@@ -532,14 +541,24 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
// Record the exclusions for this block. // Record the exclusions for this block.
map<int, char> atomFlags;
for (int j = 0; j < atomsInBlock; j++) { for (int j = 0; j < atomsInBlock; j++) {
const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]]; const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
char mask = 1<<j; char mask = 1<<j;
for (int k = 0; k < (int) blockNeighbors[i].size(); k++) { for (set<int>::const_iterator iter = atomExclusions.begin(); iter != atomExclusions.end(); ++iter) {
int atomIndex = blockNeighbors[i][k]; map<int, char>::iterator thisAtomFlags = atomFlags.find(*iter);
if (atomExclusions.find(atomIndex) != atomExclusions.end()) if (thisAtomFlags == atomFlags.end())
blockExclusions[i][k] |= mask; atomFlags[*iter] = mask;
else
thisAtomFlags->second |= mask;
}
} }
int numNeighbors = blockNeighbors[i].size();
for (int k = 0; k < numNeighbors; k++) {
int atomIndex = blockNeighbors[i][k];
map<int, char>::iterator thisAtomFlags = atomFlags.find(atomIndex);
if (thisAtomFlags != atomFlags.end())
blockExclusions[i][k] |= thisAtomFlags->second;
} }
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment