Merge pull request #1802 from peastman/deterministic

Added option for CPU PME to be deterministic

Merge pull request #1802 from peastman/deterministic
Added option for CPU PME to be deterministic
915b84f6 · peastman · GitHub · a05c1a9b · b5d4b517 · 915b84f6
Commit 915b84f6 authored May 04, 2017 by peastman Committed by GitHub May 04, 2017
9 changed files
--- a/olla/include/openmm/kernels.h
+++ b/olla/include/openmm/kernels.h
@@ -1295,8 +1295,9 @@ public:
     * @param gridz        the z size of the PME grid
     * @param numParticles the number of particles in the system
     * @param alpha        the Ewald blending parameter
+     * @param deterministic whether it should attempt to make the resulting forces deterministic
     */
-    virtual void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha) = 0;
+    virtual void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha, bool deterministic) = 0;
    /**
     * Begin computing the force and energy.
     *
@@ -1368,8 +1369,9 @@ public:
     * @param gridz        the z size of the PME grid
     * @param numParticles the number of particles in the system
     * @param alpha        the Ewald blending parameter
+     * @param deterministic whether it should attempt to make the resulting forces deterministic
     */
-    virtual void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha) = 0;
+    virtual void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha, bool deterministic) = 0;
    /**
     * Begin computing the force and energy.
     *

--- a/platforms/cpu/include/CpuPlatform.h
+++ b/platforms/cpu/include/CpuPlatform.h
@@ -68,6 +68,15 @@ public:
        static const std::string key = "Threads";
        return key;
    }
+    /**
+     * This is the name of the parameter for requesting that force computations be deterministic.  Setting
+     * this to "true" DOES NOT GUARANTEE that the forces will actually be fully deterministic, but it does
+     * try to reduce the variation in them at the cost of a small loss in performance.
+     */
+    static const std::string& CpuDeterministicForces() {
+        static const std::string key = "DeterministicForces";
+        return key;
+    }
    /**
     * We cannot use the standard mechanism for platform data, because that is already used by the superclass.
     * Instead, we maintain a table of ContextImpls to PlatformDatas.
@@ -80,7 +89,7 @@ private:
 class CpuPlatform::PlatformData {
 public:
-    PlatformData(int numParticles, int numThreads);
+    PlatformData(int numParticles, int numThreads, bool deterministicForces);
    ~PlatformData();
    void requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const std::vector<std::set<int> >& exclusionList);
    AlignedArray<float> posq;
@@ -91,7 +100,7 @@ public:
    std::map<std::string, std::string> propertyValues;
    CpuNeighborList* neighborList;
    double cutoff, paddedCutoff;
-    bool anyExclusions;
+    bool anyExclusions, deterministicForces;
    std::vector<std::set<int> > exclusions;
 };

--- a/platforms/cpu/src/CpuKernels.cpp
+++ b/platforms/cpu/src/CpuKernels.cpp
@@ -642,7 +642,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
            useOptimizedPme = getPlatform().supportsKernels(kernelNames);
            if (useOptimizedPme) {
                optimizedPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), context);
-                optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
+                optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha, data.deterministicForces);
            }
        }
        if (nonbondedMethod == LJPME) {
@@ -653,10 +653,10 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
            useOptimizedPme = getPlatform().supportsKernels(kernelNames);
            if (useOptimizedPme) {
                optimizedPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), context);
-                optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
+                optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha, data.deterministicForces);
                optimizedDispersionPme = getPlatform().createKernel(CalcDispersionPmeReciprocalForceKernel::Name(), context);
                optimizedDispersionPme.getAs<CalcDispersionPmeReciprocalForceKernel>().initialize(dispersionGridSize[0], dispersionGridSize[1],
-                                                                                                  dispersionGridSize[2], numParticles, ewaldDispersionAlpha);
+                                                                                                  dispersionGridSize[2], numParticles, ewaldDispersionAlpha, data.deterministicForces);
            }
        }
    }

--- a/platforms/cpu/src/CpuPlatform.cpp
+++ b/platforms/cpu/src/CpuPlatform.cpp
@@ -37,6 +37,7 @@
 #include "openmm/OpenMMException.h"
 #include "openmm/internal/hardware.h"
 #include "openmm/internal/vectorize.h"
+#include <algorithm>
 #include <sstream>
 #include <stdlib.h>
@@ -74,6 +75,7 @@ CpuPlatform::CpuPlatform() {
    registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
    registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
    platformProperties.push_back(CpuThreads());
+    platformProperties.push_back(CpuDeterministicForces());
    int threads = getNumProcessors();
    char* threadsEnv = getenv("OPENMM_CPU_THREADS");
    if (threadsEnv != NULL)
@@ -81,6 +83,7 @@ CpuPlatform::CpuPlatform() {
    stringstream defaultThreads;
    defaultThreads << threads;
    setPropertyDefaultValue(CpuThreads(), defaultThreads.str());
+    setPropertyDefaultValue(CpuDeterministicForces(), "false");
 }
 const string& CpuPlatform::getPropertyValue(const Context& context, const string& property) const {
@@ -111,9 +114,13 @@ void CpuPlatform::contextCreated(ContextImpl& context, const map<string, string>
    ReferencePlatform::contextCreated(context, properties);
    const string& threadsPropValue = (properties.find(CpuThreads()) == properties.end() ?
            getPropertyDefaultValue(CpuThreads()) : properties.find(CpuThreads())->second);
+    string deterministicForcesValue = (properties.find(CpuDeterministicForces()) == properties.end() ?
+            getPropertyDefaultValue(CpuDeterministicForces()) : properties.find(CpuDeterministicForces())->second);
    int numThreads;
    stringstream(threadsPropValue) >> numThreads;
-    PlatformData* data = new PlatformData(context.getSystem().getNumParticles(), numThreads);
+    transform(deterministicForcesValue.begin(), deterministicForcesValue.end(), deterministicForcesValue.begin(), ::tolower);
+    bool deterministicForces = (deterministicForcesValue == "true");
+    PlatformData* data = new PlatformData(context.getSystem().getNumParticles(), numThreads, deterministicForces);
    contextData[&context] = data;
    ReferenceConstraints& constraints = *(ReferenceConstraints*) reinterpret_cast<ReferencePlatform::PlatformData*>(context.getPlatformData())->constraints;
    if (constraints.settle != NULL) {
@@ -139,8 +146,8 @@ const CpuPlatform::PlatformData& CpuPlatform::getPlatformData(const ContextImpl&
    return *contextData[&context];
 }
-CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads) : posq(4*numParticles), threads(numThreads),
+CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads, bool deterministicForces) : posq(4*numParticles), threads(numThreads),
-        neighborList(NULL), cutoff(0.0), paddedCutoff(0.0), anyExclusions(false) {
+        deterministicForces(deterministicForces), neighborList(NULL), cutoff(0.0), paddedCutoff(0.0), anyExclusions(false) {
    numThreads = threads.getNumThreads();
    threadForce.resize(numThreads);
    for (int i = 0; i < numThreads; i++)
@@ -149,6 +156,7 @@ CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads) : posq
    stringstream threadsProperty;
    threadsProperty << numThreads;
    propertyValues[CpuThreads()] = threadsProperty.str();
+    propertyValues[CpuDeterministicForces()] = deterministicForces ? "true" : "false";
 }
 CpuPlatform::PlatformData::~PlatformData() {

--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
@@ -1805,7 +1805,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
                try {
                    cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cu.getPlatformData().context);
-                    cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
+                    cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha, cu.getPlatformData().deterministicForces);
                    CUfunction addForcesKernel = cu.getKernel(module, "addForces");
                    pmeio = new PmeIO(cu, addForcesKernel);
                    cu.addPreComputation(new PmePreComputation(cu, cpuPme, *pmeio));

--- a/platforms/opencl/src/OpenCLKernels.cpp
+++ b/platforms/opencl/src/OpenCLKernels.cpp
@@ -1780,7 +1780,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
                try {
                    cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cl.getPlatformData().context);
-                    cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
+                    cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha, false);
                    cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
                    cl::Kernel addForcesKernel = cl::Kernel(program, "addForces");
                    pmeio = new PmeIO(cl, addForcesKernel);

--- a/plugins/cpupme/src/CpuPmeKernels.cpp
+++ b/plugins/cpupme/src/CpuPmeKernels.cpp
@@ -51,7 +51,8 @@ static const int PME_ORDER = 5;
 bool CpuCalcDispersionPmeReciprocalForceKernel::hasInitializedThreads = false;
 int CpuCalcDispersionPmeReciprocalForceKernel::numThreads = 0;
-static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors, gmx_atomic_t& atomicCounter, const float epsilonFactor) {
+static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors,
+        gmx_atomic_t& atomicCounter, const float epsilonFactor, int threadIndex, int numThreads, bool deterministic) {
    float temp[4];
    fvec4 boxSize((float) periodicBoxVectors[0][0], (float) periodicBoxVectors[1][1], (float) periodicBoxVectors[2][2], 0);
    fvec4 invBoxSize((float) recipBoxVectors[0][0], (float) recipBoxVectors[1][1], (float) recipBoxVectors[2][2], 0);
@@ -65,8 +66,10 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
    float posInBox[4] = {0,0,0,0};
    memset(grid, 0, sizeof(float)*gridx*gridy*gridz);
+    int i = threadIndex;
    while (true) {
-        int i = gmx_atomic_fetch_add(&atomicCounter, 1);
+        if (!deterministic)
+            i = gmx_atomic_fetch_add(&atomicCounter, 1);
        if (i >= numParticles)
            break;
@@ -151,6 +154,8 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
                }
            }
        }
+        if (deterministic)
+            i += numThreads;
    }
 }
@@ -406,7 +411,7 @@ static void* threadBody(void* args) {
    return 0;
 }
-void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize, int numParticles, double alpha) {
+void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize, int numParticles, double alpha, bool deterministic) {
    if (!hasInitializedThreads) {
        numThreads = getNumProcessors();
        char* threadsEnv = getenv("OPENMM_CPU_THREADS");
@@ -421,6 +426,7 @@ void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize
    gridz = findFFTDimension(zsize, true);
    this->numParticles = numParticles;
    this->alpha = alpha;
+    this->deterministic = deterministic;
    force.resize(4*numParticles);
    recipEterm.resize(gridx*gridy*gridz);
@@ -580,7 +586,7 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
    int complexStart = std::max(1, ((index*complexSize)/numThreads));
    int complexEnd = (((index+1)*complexSize)/numThreads);
    const float epsilonFactor = sqrt(ONE_4PI_EPS0);
-    spreadCharge(posq, tempGrid[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor);
+    spreadCharge(posq, tempGrid[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, index, numThreads, deterministic);
    threads.syncThreads();
    int numGrids = tempGrid.size();
    for (int i = gridStart; i < gridEnd; i += 4) {
@@ -696,7 +702,7 @@ static void* dispersionThreadBody(void* args) {
    return 0;
 }
-void CpuCalcDispersionPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize, int numParticles, double alpha) {
+void CpuCalcDispersionPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize, int numParticles, double alpha, bool deterministic) {
    if (!hasInitializedThreads) {
        numThreads = getNumProcessors();
        char* threadsEnv = getenv("OPENMM_CPU_THREADS");
@@ -711,6 +717,7 @@ void CpuCalcDispersionPmeReciprocalForceKernel::initialize(int xsize, int ysize,
    gridz = findFFTDimension(zsize, true);
    this->numParticles = numParticles;
    this->alpha = alpha;
+    this->deterministic = deterministic;
    force.resize(4*numParticles);
    recipEterm.resize(gridx*gridy*gridz);
@@ -871,7 +878,7 @@ void CpuCalcDispersionPmeReciprocalForceKernel::runWorkerThread(ThreadPool& thre
    int complexStart = std::max(1, ((index*complexSize)/numThreads));
    int complexEnd = (((index+1)*complexSize)/numThreads);
    const float epsilonFactor = 1.0f;
-    spreadCharge(posq, tempGrid[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor);
+    spreadCharge(posq, tempGrid[index], gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, index, numThreads, deterministic);
    threads.syncThreads();
    int numGrids = tempGrid.size();
    for (int i = gridStart; i < gridEnd; i += 4) {

--- a/plugins/cpupme/src/CpuPmeKernels.h
+++ b/plugins/cpupme/src/CpuPmeKernels.h
@@ -62,8 +62,9 @@ public:
     * @param gridz        the z size of the PME grid
     * @param numParticles the number of particles in the system
     * @param alpha        the Ewald blending parameter
+     * @param deterministic whether it should attempt to make the resulting forces deterministic
     */
-    void initialize(int xsize, int ysize, int zsize, int numParticles, double alpha);
+    void initialize(int xsize, int ysize, int zsize, int numParticles, double alpha, bool deterministic);
    ~CpuCalcPmeReciprocalForceKernel();
    /**
     * Begin computing the force and energy.
@@ -110,6 +111,7 @@ private:
    static int numThreads;
    int gridx, gridy, gridz, numParticles;
    double alpha;
+    bool deterministic;
    bool hasCreatedPlan, isFinished, isDeleted;
    std::vector<float> force;
    std::vector<float> bsplineModuli[3];
@@ -153,8 +155,9 @@ public:
     * @param gridz        the z size of the PME grid
     * @param numParticles the number of particles in the system
     * @param alpha        the Ewald blending parameter
+     * @param deterministic whether it should attempt to make the resulting forces deterministic
     */
-    void initialize(int xsize, int ysize, int zsize, int numParticles, double alpha);
+    void initialize(int xsize, int ysize, int zsize, int numParticles, double alpha, bool deterministic);
    ~CpuCalcDispersionPmeReciprocalForceKernel();
    /**
     * Begin computing the force and energy.
@@ -202,6 +205,7 @@ private:
    static int numThreads;
    int gridx, gridy, gridz, numParticles;
    double alpha;
+    bool deterministic;
    bool hasCreatedPlan, isFinished, isDeleted;
    std::vector<float> force;
    std::vector<float> bsplineModuli[3];

--- a/plugins/cpupme/tests/TestCpuPme.cpp
+++ b/plugins/cpupme/tests/TestCpuPme.cpp
@@ -535,7 +535,7 @@ void test_water2_dpme_energies_forces_no_exclusions() {
        io.posq.push_back(c6);
        selfEwaldEnergy += dalpha6 * c6 * c6 / 12.0;
    }
-    pme.initialize(grid, grid, grid, NATOMS, dalpha);
+    pme.initialize(grid, grid, grid, NATOMS, dalpha, false);
    Vec3 boxVectors[3];
    system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
    pme.beginComputation(io, boxVectors, true);
@@ -626,7 +626,7 @@ void testPME(bool triclinic) {
        sumSquaredCharges += charge*charge;
    }
    double ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
-    pme.initialize(gridx, gridy, gridz, numParticles, alpha);
+    pme.initialize(gridx, gridy, gridz, numParticles, alpha, true);
    pme.beginComputation(io, boxVectors, true);
    double energy = pme.finishComputation(io);