Commit 40947735 authored by peastman's avatar peastman
Browse files

Further optimizations to CPU platform

parent dc474876
......@@ -35,6 +35,7 @@
#include "AlignedArray.h"
#include "RealVec.h"
#include "windowsExportCpu.h"
#include "openmm/internal/gmx_atomic.h"
#include "openmm/internal/ThreadPool.h"
#include <set>
#include <utility>
......@@ -74,6 +75,7 @@ private:
int numAtoms;
bool usePeriodic;
float maxDistance;
gmx_atomic_t atomicCounter;
};
} // namespace OpenMM
......
......@@ -599,7 +599,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
if (nonbondedMethod != NoCutoff) {
// Determine whether we need to recompute the neighbor list.
double padding = 0.15*nonbondedCutoff;
double padding = 0.25*nonbondedCutoff;
bool needRecompute = false;
double closeCutoff2 = 0.25*padding*padding;
double farCutoff2 = 0.5*padding*padding;
......
......@@ -59,22 +59,25 @@ public:
*/
class CpuNeighborList::Voxels {
public:
Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const RealVec* periodicBoxVectors, bool usePeriodic) :
blockSize(blockSize), voxelSizeY(vsy), voxelSizeZ(vsz), miny(miny), maxy(maxy), minz(minz), maxz(maxz), periodicBoxVectors(periodicBoxVectors), usePeriodic(usePeriodic) {
periodicBoxSize[0] = (float) periodicBoxVectors[0][0];
periodicBoxSize[1] = (float) periodicBoxVectors[1][1];
periodicBoxSize[2] = (float) periodicBoxVectors[2][2];
recipBoxSize[0] = (float) (1/periodicBoxVectors[0][0]);
recipBoxSize[1] = (float) (1/periodicBoxVectors[1][1]);
recipBoxSize[2] = (float) (1/periodicBoxVectors[2][2]);
triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 ||
periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 ||
periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0);
Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const RealVec* boxVectors, bool usePeriodic) :
blockSize(blockSize), voxelSizeY(vsy), voxelSizeZ(vsz), miny(miny), maxy(maxy), minz(minz), maxz(maxz), usePeriodic(usePeriodic) {
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
periodicBoxVectors[i][j] = (float) boxVectors[i][j];
periodicBoxSize[0] = (float) boxVectors[0][0];
periodicBoxSize[1] = (float) boxVectors[1][1];
periodicBoxSize[2] = (float) boxVectors[2][2];
recipBoxSize[0] = (float) (1/boxVectors[0][0]);
recipBoxSize[1] = (float) (1/boxVectors[1][1]);
recipBoxSize[2] = (float) (1/boxVectors[2][2]);
triclinic = (boxVectors[0][1] != 0.0 || boxVectors[0][2] != 0.0 ||
boxVectors[1][0] != 0.0 || boxVectors[1][2] != 0.0 ||
boxVectors[2][0] != 0.0 || boxVectors[2][1] != 0.0);
if (usePeriodic) {
ny = (int) floorf(periodicBoxVectors[1][1]/voxelSizeY+0.5f);
nz = (int) floorf(periodicBoxVectors[2][2]/voxelSizeZ+0.5f);
voxelSizeY = periodicBoxVectors[1][1]/ny;
voxelSizeZ = periodicBoxVectors[2][2]/nz;
ny = (int) floorf(boxVectors[1][1]/voxelSizeY+0.5f);
nz = (int) floorf(boxVectors[2][2]/voxelSizeZ+0.5f);
voxelSizeY = boxVectors[1][1]/ny;
voxelSizeZ = boxVectors[2][2]/nz;
}
else {
ny = max(1, (int) floorf((maxy-miny)/voxelSizeY+0.5f));
......@@ -282,9 +285,10 @@ public:
// Loop over atoms and check to see if they are neighbors of this block.
const vector<pair<float, int> >& voxelBins = bins[voxelIndex.y][voxelIndex.z];
for (int range = 0; range < numRanges; range++) {
for (int item = rangeStart[range]; item < rangeEnd[range]; item++) {
const int sortedIndex = bins[voxelIndex.y][voxelIndex.z][item].second;
const int sortedIndex = voxelBins[item].second;
// Avoid duplicate entries.
if (sortedIndex >= lastSortedIndex)
......@@ -361,7 +365,7 @@ private:
int ny, nz;
float periodicBoxSize[3], recipBoxSize[3];
bool triclinic;
const RealVec* periodicBoxVectors;
float periodicBoxVectors[3][3];
const bool usePeriodic;
vector<vector<vector<pair<float, int> > > > bins;
};
......@@ -444,6 +448,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
// Signal the threads to start running and wait for them to finish.
gmx_atomic_set(&atomicCounter, 0);
threads.resumeThreads();
threads.waitForThreads();
......@@ -500,7 +505,11 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
vector<int> blockAtoms;
vector<float> blockAtomX(blockSize), blockAtomY(blockSize), blockAtomZ(blockSize);
vector<VoxelIndex> atomVoxelIndex;
for (int i = threadIndex; i < numBlocks; i += numThreads) {
while (true) {
int i = gmx_atomic_fetch_add(&atomicCounter, 1);
if (i >= numBlocks)
break;
// Find the atoms in this block and compute their bounding box.
int firstIndex = blockSize*i;
......@@ -532,14 +541,24 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
// Record the exclusions for this block.
map<int, char> atomFlags;
for (int j = 0; j < atomsInBlock; j++) {
const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
char mask = 1<<j;
for (int k = 0; k < (int) blockNeighbors[i].size(); k++) {
int atomIndex = blockNeighbors[i][k];
if (atomExclusions.find(atomIndex) != atomExclusions.end())
blockExclusions[i][k] |= mask;
for (set<int>::const_iterator iter = atomExclusions.begin(); iter != atomExclusions.end(); ++iter) {
map<int, char>::iterator thisAtomFlags = atomFlags.find(*iter);
if (thisAtomFlags == atomFlags.end())
atomFlags[*iter] = mask;
else
thisAtomFlags->second |= mask;
}
}
int numNeighbors = blockNeighbors[i].size();
for (int k = 0; k < numNeighbors; k++) {
int atomIndex = blockNeighbors[i][k];
map<int, char>::iterator thisAtomFlags = atomFlags.find(atomIndex);
if (thisAtomFlags != atomFlags.end())
blockExclusions[i][k] |= thisAtomFlags->second;
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment