"platforms/vscode:/vscode.git/clone" did not exist on "c274f08df51217d252cb778715bec2a071f6b78d"
Commit dc474876 authored by peastman's avatar peastman
Browse files

Merge pull request #1152 from peastman/cpuopt

Further optimizations to CPU platform
parents ec799972 15e2ba85
......@@ -28,7 +28,7 @@
#include "SimTKOpenMMUtilities.h"
#include "ReferenceForce.h"
#include "CpuCustomGBForce.h"
#include "gmx_atomic.h"
#include "openmm/internal/gmx_atomic.h"
using namespace OpenMM;
using namespace std;
......
......@@ -32,7 +32,7 @@
#include "ReferenceTabulatedFunction.h"
#include "openmm/internal/CustomManyParticleForceImpl.h"
#include "lepton/CustomFunction.h"
#include "gmx_atomic.h"
#include "openmm/internal/gmx_atomic.h"
using namespace OpenMM;
using namespace std;
......
......@@ -28,7 +28,7 @@
#include "SimTKOpenMMUtilities.h"
#include "ReferenceForce.h"
#include "CpuCustomNonbondedForce.h"
#include "gmx_atomic.h"
#include "openmm/internal/gmx_atomic.h"
using namespace OpenMM;
using namespace std;
......
......@@ -25,7 +25,7 @@
#include "CpuGBSAOBCForce.h"
#include "SimTKOpenMMRealType.h"
#include "openmm/internal/vectorize.h"
#include "gmx_atomic.h"
#include "openmm/internal/gmx_atomic.h"
#include <algorithm>
#include <cmath>
#include <cstdlib>
......
......@@ -28,7 +28,7 @@
#include "CpuNonbondedForce.h"
#include "ReferenceForce.h"
#include "ReferencePME.h"
#include "gmx_atomic.h"
#include "openmm/internal/gmx_atomic.h"
#include <algorithm>
// In case we're using some primitive version of Visual Studio this will
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -30,6 +30,7 @@
* -------------------------------------------------------------------------- */
#include "CpuSETTLE.h"
#include "openmm/internal/gmx_atomic.h"
using namespace OpenMM;
using namespace std;
......@@ -39,10 +40,14 @@ public:
ApplyToPositionsTask(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& atomCoordinatesP, vector<RealOpenMM>& inverseMasses,
RealOpenMM tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), atomCoordinatesP(atomCoordinatesP),
inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
gmx_atomic_set(&atomicCounter, 0);
}
void execute(ThreadPool& threads, int threadIndex) {
if (threadIndex < threadSettle.size()) {
threadSettle[threadIndex]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
}
}
vector<OpenMM::RealVec>& atomCoordinates;
......@@ -50,6 +55,7 @@ public:
vector<RealOpenMM>& inverseMasses;
RealOpenMM tolerance;
vector<ReferenceSETTLEAlgorithm*>& threadSettle;
gmx_atomic_t atomicCounter;
};
class CpuSETTLE::ApplyToVelocitiesTask : public ThreadPool::Task {
......@@ -57,10 +63,14 @@ public:
ApplyToVelocitiesTask(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& velocities, vector<RealOpenMM>& inverseMasses,
RealOpenMM tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), velocities(velocities),
inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
gmx_atomic_set(&atomicCounter, 0);
}
void execute(ThreadPool& threads, int threadIndex) {
if (threadIndex < threadSettle.size()) {
threadSettle[threadIndex]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
}
}
vector<OpenMM::RealVec>& atomCoordinates;
......@@ -68,17 +78,18 @@ public:
vector<RealOpenMM>& inverseMasses;
RealOpenMM tolerance;
vector<ReferenceSETTLEAlgorithm*>& threadSettle;
gmx_atomic_t atomicCounter;
};
CpuSETTLE::CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settle, ThreadPool& threads) : threads(threads) {
int numThreads = threads.getNumThreads();
int numBlocks = 10*threads.getNumThreads();
int numClusters = settle.getNumClusters();
vector<RealOpenMM> mass(system.getNumParticles());
for (int i = 0; i < system.getNumParticles(); i++)
mass[i] = system.getParticleMass(i);
for (int i = 0; i < numThreads; i++) {
int start = i*numClusters/numThreads;
int end = (i+1)*numClusters/numThreads;
for (int i = 0; i < numBlocks; i++) {
int start = i*numClusters/numBlocks;
int end = (i+1)*numClusters/numBlocks;
if (start != end) {
int numThreadClusters = end-start;
vector<int> atom1(numThreadClusters), atom2(numThreadClusters), atom3(numThreadClusters);
......
......@@ -49,7 +49,7 @@ static const int PME_ORDER = 5;
bool CpuCalcPmeReciprocalForceKernel::hasInitializedThreads = false;
int CpuCalcPmeReciprocalForceKernel::numThreads = 0;
static void spreadCharge(int start, int end, float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors) {
static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors, gmx_atomic_t& atomicCounter) {
float temp[4];
fvec4 boxSize((float) periodicBoxVectors[0][0], (float) periodicBoxVectors[1][1], (float) periodicBoxVectors[2][2], 0);
fvec4 invBoxSize((float) recipBoxVectors[0][0], (float) recipBoxVectors[1][1], (float) recipBoxVectors[2][2], 0);
......@@ -64,7 +64,11 @@ static void spreadCharge(int start, int end, float* posq, float* grid, int gridx
const float epsilonFactor = sqrt(ONE_4PI_EPS0);
memset(grid, 0, sizeof(float)*gridx*gridy*gridz);
for (int i = start; i < end; i++) {
while (true) {
int i = gmx_atomic_fetch_add(&atomicCounter, 1);
if (i >= numParticles)
break;
// Find the position relative to the nearest grid point.
fvec4 pos(&posq[4*i]);
......@@ -225,25 +229,15 @@ static double reciprocalEnergy(int start, int end, fftwf_complex* grid, int grid
return 0.5*energy;
}
static void reciprocalConvolution(int start, int end, fftwf_complex* grid, int gridx, int gridy, int gridz, vector<float>& recipEterm) {
const unsigned int zsize = gridz/2+1;
const unsigned int yzsize = gridy*zsize;
int firstz = (start == 0 ? 1 : 0);
for (int kx = start; kx < end; kx++) {
for (int ky = 0; ky < gridy; ky++) {
for (int kz = firstz; kz < zsize; kz++) {
int index = kx*yzsize + ky*zsize + kz;
static void reciprocalConvolution(int start, int end, fftwf_complex* grid, vector<float>& recipEterm) {
for (int index = start; index < end; index++) {
float eterm = recipEterm[index];
grid[index][0] *= eterm;
grid[index][1] *= eterm;
}
firstz = 0;
}
}
}
static void interpolateForces(int start, int end, float* posq, float* force, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors) {
static void interpolateForces(float* posq, float* force, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors, gmx_atomic_t& atomicCounter) {
fvec4 boxSize((float) periodicBoxVectors[0][0], (float) periodicBoxVectors[1][1], (float) periodicBoxVectors[2][2], 0);
fvec4 invBoxSize((float) recipBoxVectors[0][0], (float) recipBoxVectors[1][1], (float) recipBoxVectors[2][2], 0);
fvec4 recipBoxVec0((float) recipBoxVectors[0][0], (float) recipBoxVectors[0][1], (float) recipBoxVectors[0][2], 0);
......@@ -254,7 +248,11 @@ static void interpolateForces(int start, int end, float* posq, float* force, flo
fvec4 one(1);
fvec4 scale(1.0f/(PME_ORDER-1));
const float epsilonFactor = sqrt(ONE_4PI_EPS0);
for (int i = start; i < end; i++) {
while (true) {
int i = gmx_atomic_fetch_add(&atomicCounter, 1);
if (i >= numParticles)
break;
// Find the position relative to the nearest grid point.
fvec4 pos(&posq[4*i]);
......@@ -485,6 +483,7 @@ void CpuCalcPmeReciprocalForceKernel::runMainThread() {
break;
posq = io->getPosq();
ComputeTask task(*this);
gmx_atomic_set(&atomicCounter, 0);
threads.execute(task); // Signal threads to perform charge spreading.
threads.waitForThreads();
threads.resumeThreads(); // Signal threads to sum the charge grids.
......@@ -503,6 +502,7 @@ void CpuCalcPmeReciprocalForceKernel::runMainThread() {
threads.resumeThreads(); // Signal threads to perform reciprocal convolution.
threads.waitForThreads();
fftwf_execute_dft_c2r(backwardFFT, complexGrid, realGrid);
gmx_atomic_set(&atomicCounter, 0);
threads.resumeThreads(); // Signal threads to interpolate forces.
threads.waitForThreads();
isFinished = true;
......@@ -515,14 +515,15 @@ void CpuCalcPmeReciprocalForceKernel::runMainThread() {
}
void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int index) {
int particleStart = (index*numParticles)/numThreads;
int particleEnd = ((index+1)*numParticles)/numThreads;
int gridxStart = (index*gridx)/numThreads;
int gridxEnd = ((index+1)*gridx)/numThreads;
int gridSize = (gridx*gridy*gridz+3)/4;
int gridStart = 4*((index*gridSize)/numThreads);
int gridEnd = 4*(((index+1)*gridSize)/numThreads);
spreadCharge(particleStart, particleEnd, posq, tempGrid[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors);
int complexSize = gridx*gridy*(gridz/2+1);
int complexStart = max(1, ((index*complexSize)/numThreads));
int complexEnd = (((index+1)*complexSize)/numThreads);
spreadCharge(posq, tempGrid[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter);
threads.syncThreads();
int numGrids = tempGrid.size();
for (int i = gridStart; i < gridEnd; i += 4) {
......@@ -540,9 +541,9 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
threadEnergy[index] = reciprocalEnergy(gridxStart, gridxEnd, complexGrid, gridx, gridy, gridz, alpha, bsplineModuli, periodicBoxVectors, recipBoxVectors);
threads.syncThreads();
}
reciprocalConvolution(gridxStart, gridxEnd, complexGrid, gridx, gridy, gridz, recipEterm);
reciprocalConvolution(complexStart, complexEnd, complexGrid, recipEterm);
threads.syncThreads();
interpolateForces(particleStart, particleEnd, posq, &force[0], realGrid, gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors);
interpolateForces(posq, &force[0], realGrid, gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter);
}
void CpuCalcPmeReciprocalForceKernel::beginComputation(IO& io, const Vec3* periodicBoxVectors, bool includeEnergy) {
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2014 Stanford University and the Authors. *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -36,6 +36,7 @@
#include "internal/windowsExportPme.h"
#include "openmm/kernels.h"
#include "openmm/Vec3.h"
#include "openmm/internal/gmx_atomic.h"
#include "openmm/internal/ThreadPool.h"
#include <fftw3.h>
#include <pthread.h>
......@@ -130,6 +131,7 @@ private:
float* posq;
Vec3 periodicBoxVectors[3], recipBoxVectors[3];
bool includeEnergy;
gmx_atomic_t atomicCounter;
};
} // namespace OpenMM
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment