Commit b5d4b517 authored by peastman's avatar peastman
Browse files

Added option for CPU PME to be deterministic

parent fbf193fe
......@@ -1295,8 +1295,9 @@ public:
* @param gridz the z size of the PME grid
* @param numParticles the number of particles in the system
* @param alpha the Ewald blending parameter
* @param deterministic whether it should attempt to make the resulting forces deterministic
*/
virtual void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha) = 0;
virtual void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha, bool deterministic) = 0;
/**
* Begin computing the force and energy.
*
......@@ -1368,8 +1369,9 @@ public:
* @param gridz the z size of the PME grid
* @param numParticles the number of particles in the system
* @param alpha the Ewald blending parameter
* @param deterministic whether it should attempt to make the resulting forces deterministic
*/
virtual void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha) = 0;
virtual void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha, bool deterministic) = 0;
/**
* Begin computing the force and energy.
*
......
......@@ -68,6 +68,15 @@ public:
static const std::string key = "Threads";
return key;
}
/**
* This is the name of the parameter for requesting that force computations be deterministic. Setting
* this to "true" DOES NOT GUARANTEE that the forces will actually be fully deterministic, but it does
* try to reduce the variation in them at the cost of a small loss in performance.
*/
static const std::string& CpuDeterministicForces() {
static const std::string key = "DeterministicForces";
return key;
}
/**
* We cannot use the standard mechanism for platform data, because that is already used by the superclass.
* Instead, we maintain a table of ContextImpls to PlatformDatas.
......@@ -80,7 +89,7 @@ private:
class CpuPlatform::PlatformData {
public:
PlatformData(int numParticles, int numThreads);
PlatformData(int numParticles, int numThreads, bool deterministicForces);
~PlatformData();
void requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const std::vector<std::set<int> >& exclusionList);
AlignedArray<float> posq;
......@@ -91,7 +100,7 @@ public:
std::map<std::string, std::string> propertyValues;
CpuNeighborList* neighborList;
double cutoff, paddedCutoff;
bool anyExclusions;
bool anyExclusions, deterministicForces;
std::vector<std::set<int> > exclusions;
};
......
......@@ -642,7 +642,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
useOptimizedPme = getPlatform().supportsKernels(kernelNames);
if (useOptimizedPme) {
optimizedPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), context);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha, data.deterministicForces);
}
}
if (nonbondedMethod == LJPME) {
......@@ -653,10 +653,10 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
useOptimizedPme = getPlatform().supportsKernels(kernelNames);
if (useOptimizedPme) {
optimizedPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), context);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha, data.deterministicForces);
optimizedDispersionPme = getPlatform().createKernel(CalcDispersionPmeReciprocalForceKernel::Name(), context);
optimizedDispersionPme.getAs<CalcDispersionPmeReciprocalForceKernel>().initialize(dispersionGridSize[0], dispersionGridSize[1],
dispersionGridSize[2], numParticles, ewaldDispersionAlpha);
dispersionGridSize[2], numParticles, ewaldDispersionAlpha, data.deterministicForces);
}
}
}
......
......@@ -37,6 +37,7 @@
#include "openmm/OpenMMException.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/vectorize.h"
#include <algorithm>
#include <sstream>
#include <stdlib.h>
......@@ -74,6 +75,7 @@ CpuPlatform::CpuPlatform() {
registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
platformProperties.push_back(CpuThreads());
platformProperties.push_back(CpuDeterministicForces());
int threads = getNumProcessors();
char* threadsEnv = getenv("OPENMM_CPU_THREADS");
if (threadsEnv != NULL)
......@@ -81,6 +83,7 @@ CpuPlatform::CpuPlatform() {
stringstream defaultThreads;
defaultThreads << threads;
setPropertyDefaultValue(CpuThreads(), defaultThreads.str());
setPropertyDefaultValue(CpuDeterministicForces(), "false");
}
const string& CpuPlatform::getPropertyValue(const Context& context, const string& property) const {
......@@ -111,9 +114,13 @@ void CpuPlatform::contextCreated(ContextImpl& context, const map<string, string>
ReferencePlatform::contextCreated(context, properties);
const string& threadsPropValue = (properties.find(CpuThreads()) == properties.end() ?
getPropertyDefaultValue(CpuThreads()) : properties.find(CpuThreads())->second);
string deterministicForcesValue = (properties.find(CpuDeterministicForces()) == properties.end() ?
getPropertyDefaultValue(CpuDeterministicForces()) : properties.find(CpuDeterministicForces())->second);
int numThreads;
stringstream(threadsPropValue) >> numThreads;
PlatformData* data = new PlatformData(context.getSystem().getNumParticles(), numThreads);
transform(deterministicForcesValue.begin(), deterministicForcesValue.end(), deterministicForcesValue.begin(), ::tolower);
bool deterministicForces = (deterministicForcesValue == "true");
PlatformData* data = new PlatformData(context.getSystem().getNumParticles(), numThreads, deterministicForces);
contextData[&context] = data;
ReferenceConstraints& constraints = *(ReferenceConstraints*) reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData())->constraints;
if (constraints.settle != NULL) {
......@@ -139,8 +146,8 @@ const CpuPlatform::PlatformData& CpuPlatform::getPlatformData(const ContextImpl&
return *contextData[&context];
}
CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads) : posq(4*numParticles), threads(numThreads),
neighborList(NULL), cutoff(0.0), paddedCutoff(0.0), anyExclusions(false) {
CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads, bool deterministicForces) : posq(4*numParticles), threads(numThreads),
deterministicForces(deterministicForces), neighborList(NULL), cutoff(0.0), paddedCutoff(0.0), anyExclusions(false) {
numThreads = threads.getNumThreads();
threadForce.resize(numThreads);
for (int i = 0; i < numThreads; i++)
......@@ -149,6 +156,7 @@ CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads) : posq
stringstream threadsProperty;
threadsProperty << numThreads;
propertyValues[CpuThreads()] = threadsProperty.str();
propertyValues[CpuDeterministicForces()] = deterministicForces ? "true" : "false";
}
CpuPlatform::PlatformData::~PlatformData() {
......
......@@ -1805,7 +1805,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
try {
cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cu.getPlatformData().context);
cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha, cu.getPlatformData().deterministicForces);
CUfunction addForcesKernel = cu.getKernel(module, "addForces");
pmeio = new PmeIO(cu, addForcesKernel);
cu.addPreComputation(new PmePreComputation(cu, cpuPme, *pmeio));
......
......@@ -1780,7 +1780,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
try {
cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cl.getPlatformData().context);
cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha, false);
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
cl::Kernel addForcesKernel = cl::Kernel(program, "addForces");
pmeio = new PmeIO(cl, addForcesKernel);
......
......@@ -51,7 +51,8 @@ static const int PME_ORDER = 5;
bool CpuCalcDispersionPmeReciprocalForceKernel::hasInitializedThreads = false;
int CpuCalcDispersionPmeReciprocalForceKernel::numThreads = 0;
static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors, gmx_atomic_t& atomicCounter, const float epsilonFactor) {
static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors,
gmx_atomic_t& atomicCounter, const float epsilonFactor, int threadIndex, int numThreads, bool deterministic) {
float temp[4];
fvec4 boxSize((float) periodicBoxVectors[0][0], (float) periodicBoxVectors[1][1], (float) periodicBoxVectors[2][2], 0);
fvec4 invBoxSize((float) recipBoxVectors[0][0], (float) recipBoxVectors[1][1], (float) recipBoxVectors[2][2], 0);
......@@ -65,8 +66,10 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
float posInBox[4] = {0,0,0,0};
memset(grid, 0, sizeof(float)*gridx*gridy*gridz);
int i = threadIndex;
while (true) {
int i = gmx_atomic_fetch_add(&atomicCounter, 1);
if (!deterministic)
i = gmx_atomic_fetch_add(&atomicCounter, 1);
if (i >= numParticles)
break;
......@@ -151,6 +154,8 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
}
}
}
if (deterministic)
i += numThreads;
}
}
......@@ -406,7 +411,7 @@ static void* threadBody(void* args) {
return 0;
}
void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize, int numParticles, double alpha) {
void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize, int numParticles, double alpha, bool deterministic) {
if (!hasInitializedThreads) {
numThreads = getNumProcessors();
char* threadsEnv = getenv("OPENMM_CPU_THREADS");
......@@ -421,6 +426,7 @@ void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize
gridz = findFFTDimension(zsize, true);
this->numParticles = numParticles;
this->alpha = alpha;
this->deterministic = deterministic;
force.resize(4*numParticles);
recipEterm.resize(gridx*gridy*gridz);
......@@ -580,7 +586,7 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
int complexStart = std::max(1, ((index*complexSize)/numThreads));
int complexEnd = (((index+1)*complexSize)/numThreads);
const float epsilonFactor = sqrt(ONE_4PI_EPS0);
spreadCharge(posq, tempGrid[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor);
spreadCharge(posq, tempGrid[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, index, numThreads, deterministic);
threads.syncThreads();
int numGrids = tempGrid.size();
for (int i = gridStart; i < gridEnd; i += 4) {
......@@ -696,7 +702,7 @@ static void* dispersionThreadBody(void* args) {
return 0;
}
void CpuCalcDispersionPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize, int numParticles, double alpha) {
void CpuCalcDispersionPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize, int numParticles, double alpha, bool deterministic) {
if (!hasInitializedThreads) {
numThreads = getNumProcessors();
char* threadsEnv = getenv("OPENMM_CPU_THREADS");
......@@ -711,6 +717,7 @@ void CpuCalcDispersionPmeReciprocalForceKernel::initialize(int xsize, int ysize,
gridz = findFFTDimension(zsize, true);
this->numParticles = numParticles;
this->alpha = alpha;
this->deterministic = deterministic;
force.resize(4*numParticles);
recipEterm.resize(gridx*gridy*gridz);
......@@ -871,7 +878,7 @@ void CpuCalcDispersionPmeReciprocalForceKernel::runWorkerThread(ThreadPool& thre
int complexStart = std::max(1, ((index*complexSize)/numThreads));
int complexEnd = (((index+1)*complexSize)/numThreads);
const float epsilonFactor = 1.0f;
spreadCharge(posq, tempGrid[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor);
spreadCharge(posq, tempGrid[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, index, numThreads, deterministic);
threads.syncThreads();
int numGrids = tempGrid.size();
for (int i = gridStart; i < gridEnd; i += 4) {
......
......@@ -62,8 +62,9 @@ public:
* @param gridz the z size of the PME grid
* @param numParticles the number of particles in the system
* @param alpha the Ewald blending parameter
* @param deterministic whether it should attempt to make the resulting forces deterministic
*/
void initialize(int xsize, int ysize, int zsize, int numParticles, double alpha);
void initialize(int xsize, int ysize, int zsize, int numParticles, double alpha, bool deterministic);
~CpuCalcPmeReciprocalForceKernel();
/**
* Begin computing the force and energy.
......@@ -110,6 +111,7 @@ private:
static int numThreads;
int gridx, gridy, gridz, numParticles;
double alpha;
bool deterministic;
bool hasCreatedPlan, isFinished, isDeleted;
std::vector<float> force;
std::vector<float> bsplineModuli[3];
......@@ -153,8 +155,9 @@ public:
* @param gridz the z size of the PME grid
* @param numParticles the number of particles in the system
* @param alpha the Ewald blending parameter
* @param deterministic whether it should attempt to make the resulting forces deterministic
*/
void initialize(int xsize, int ysize, int zsize, int numParticles, double alpha);
void initialize(int xsize, int ysize, int zsize, int numParticles, double alpha, bool deterministic);
~CpuCalcDispersionPmeReciprocalForceKernel();
/**
* Begin computing the force and energy.
......@@ -202,6 +205,7 @@ private:
static int numThreads;
int gridx, gridy, gridz, numParticles;
double alpha;
bool deterministic;
bool hasCreatedPlan, isFinished, isDeleted;
std::vector<float> force;
std::vector<float> bsplineModuli[3];
......
......@@ -535,7 +535,7 @@ void test_water2_dpme_energies_forces_no_exclusions() {
io.posq.push_back(c6);
selfEwaldEnergy += dalpha6 * c6 * c6 / 12.0;
}
pme.initialize(grid, grid, grid, NATOMS, dalpha);
pme.initialize(grid, grid, grid, NATOMS, dalpha, false);
Vec3 boxVectors[3];
system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
pme.beginComputation(io, boxVectors, true);
......@@ -626,7 +626,7 @@ void testPME(bool triclinic) {
sumSquaredCharges += charge*charge;
}
double ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
pme.initialize(gridx, gridy, gridz, numParticles, alpha);
pme.initialize(gridx, gridy, gridz, numParticles, alpha, true);
pme.beginComputation(io, boxVectors, true);
double energy = pme.finishComputation(io);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment