Merge branch 'master' into nucleic

fd473eea · Peter Eastman · 0a751b5b · 6a985cfd · fd473eea · fd473eea
Commit fd473eea authored Oct 29, 2015 by Peter Eastman
20 changed files
--- a/platforms/cpu/include/CpuNeighborList.h
+++ b/platforms/cpu/include/CpuNeighborList.h
@@ -35,6 +35,7 @@
 #include "AlignedArray.h"
 #include "RealVec.h"
 #include "windowsExportCpu.h"
+#include "openmm/internal/gmx_atomic.h"
 #include "openmm/internal/ThreadPool.h"
 #include <set>
 #include <utility>
@@ -74,6 +75,7 @@ private:
    int numAtoms;
    bool usePeriodic;
    float maxDistance;
+    gmx_atomic_t atomicCounter;
 };

 } // namespace OpenMM

--- a/platforms/cpu/sharedTarget/CMakeLists.txt
+++ b/platforms/cpu/sharedTarget/CMakeLists.txt
@@ -18,6 +18,6 @@ ENDFOREACH(file)
 ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})

 TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} ${PTHREADS_LIB})
-SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CPU_BUILDING_SHARED_LIBRARY")
+SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CPU_BUILDING_SHARED_LIBRARY")

 INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
--- a/platforms/cpu/src/CpuCustomGBForce.cpp
+++ b/platforms/cpu/src/CpuCustomGBForce.cpp
@@ -28,7 +28,7 @@
 #include "SimTKOpenMMUtilities.h"
 #include "ReferenceForce.h"
 #include "CpuCustomGBForce.h"
-#include "gmx_atomic.h"
+#include "openmm/internal/gmx_atomic.h"

 using namespace OpenMM;
 using namespace std;

--- a/platforms/cpu/src/CpuCustomManyParticleForce.cpp
+++ b/platforms/cpu/src/CpuCustomManyParticleForce.cpp
@@ -32,7 +32,7 @@
 #include "ReferenceTabulatedFunction.h"
 #include "openmm/internal/CustomManyParticleForceImpl.h"
 #include "lepton/CustomFunction.h"
-#include "gmx_atomic.h"
+#include "openmm/internal/gmx_atomic.h"

 using namespace OpenMM;
 using namespace std;

--- a/platforms/cpu/src/CpuCustomNonbondedForce.cpp
+++ b/platforms/cpu/src/CpuCustomNonbondedForce.cpp
@@ -28,7 +28,7 @@
 #include "SimTKOpenMMUtilities.h"
 #include "ReferenceForce.h"
 #include "CpuCustomNonbondedForce.h"
-#include "gmx_atomic.h"
+#include "openmm/internal/gmx_atomic.h"

 using namespace OpenMM;
 using namespace std;

--- a/platforms/cpu/src/CpuGBSAOBCForce.cpp
+++ b/platforms/cpu/src/CpuGBSAOBCForce.cpp
@@ -25,7 +25,7 @@
 #include "CpuGBSAOBCForce.h"
 #include "SimTKOpenMMRealType.h"
 #include "openmm/internal/vectorize.h"
-#include "gmx_atomic.h"
+#include "openmm/internal/gmx_atomic.h"
 #include <algorithm>
 #include <cmath>
 #include <cstdlib>
@@ -279,7 +279,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
            fvec4 r = sqrt(r2);
            fvec4 alpha2_ij = radii*bornRadii[atomJ];
            fvec4 D_ij = r2/(4.0f*alpha2_ij);
-            fvec4 expTerm(expf(-D_ij[0]), expf(-D_ij[1]), expf(-D_ij[2]), expf(-D_ij[3]));
+            fvec4 expTerm = exp(-D_ij);
            fvec4 denominator2 = r2 + alpha2_ij*expTerm;
            fvec4 denominator = sqrt(denominator2);
            fvec4 Gpol = (partialChargeI*posJ[3])/denominator; 

--- a/platforms/cpu/src/CpuKernelFactory.cpp
+++ b/platforms/cpu/src/CpuKernelFactory.cpp
@@ -41,6 +41,8 @@ KernelImpl* CpuKernelFactory::createKernelImpl(std::string name, const Platform&
    CpuPlatform::PlatformData& data = CpuPlatform::getPlatformData(context);
    if (name == CalcForcesAndEnergyKernel::Name())
        return new CpuCalcForcesAndEnergyKernel(name, platform, data, context);
+    if (name == CalcHarmonicAngleForceKernel::Name())
+        return new CpuCalcHarmonicAngleForceKernel(name, platform, data);
    if (name == CalcPeriodicTorsionForceKernel::Name())
        return new CpuCalcPeriodicTorsionForceKernel(name, platform, data);
    if (name == CalcRBTorsionForceKernel::Name())

--- a/platforms/cpu/src/CpuKernels.cpp
+++ b/platforms/cpu/src/CpuKernels.cpp
@@ -30,6 +30,7 @@
 * -------------------------------------------------------------------------- */

 #include "CpuKernels.h"
+#include "ReferenceAngleBondIxn.h"
 #include "ReferenceBondForce.h"
 #include "ReferenceConstraints.h"
 #include "ReferenceKernelFactory.h"
@@ -47,6 +48,7 @@
 #include "RealVec.h"
 #include "lepton/CompiledExpression.h"
 #include "lepton/CustomFunction.h"
+#include "lepton/Operation.h"
 #include "lepton/Parser.h"
 #include "lepton/ParsedExpression.h"

@@ -83,6 +85,17 @@ static ReferenceConstraints& extractConstraints(ContextImpl& context) {
    return *(ReferenceConstraints*) data->constraints;
 }

+/**
+ * Make sure an expression doesn't use any undefined variables.
+ */
+static void validateVariables(const Lepton::ExpressionTreeNode& node, const set<string>& variables) {
+    const Lepton::Operation& op = node.getOperation();
+    if (op.getId() == Lepton::Operation::VARIABLE && variables.find(op.getName()) == variables.end())
+        throw OpenMMException("Unknown variable in expression: "+op.getName());
+    for (int i = 0; i < (int) node.getChildren().size(); i++)
+        validateVariables(node.getChildren()[i], variables);
+}
+
 /**
 * Compute the kinetic energy of the system, possibly shifting the velocities in time to account
 * for a leapfrog integrator.
@@ -240,6 +253,64 @@ double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, boo
    return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups, valid);
 }

+CpuCalcHarmonicAngleForceKernel::~CpuCalcHarmonicAngleForceKernel() {
+    if (angleIndexArray != NULL) {
+        for (int i = 0; i < numAngles; i++) {
+            delete[] angleIndexArray[i];
+            delete[] angleParamArray[i];
+        }
+        delete[] angleIndexArray;
+        delete[] angleParamArray;
+    }
+}
+
+void CpuCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
+    numAngles = force.getNumAngles();
+    angleIndexArray = new int*[numAngles];
+    for (int i = 0; i < numAngles; i++)
+        angleIndexArray[i] = new int[3];
+    angleParamArray = new RealOpenMM*[numAngles];
+    for (int i = 0; i < numAngles; i++)
+        angleParamArray[i] = new RealOpenMM[2];
+    for (int i = 0; i < numAngles; ++i) {
+        int particle1, particle2, particle3;
+        double angle, k;
+        force.getAngleParameters(i, particle1, particle2, particle3, angle, k);
+        angleIndexArray[i][0] = particle1;
+        angleIndexArray[i][1] = particle2;
+        angleIndexArray[i][2] = particle3;
+        angleParamArray[i][0] = (RealOpenMM) angle;
+        angleParamArray[i][1] = (RealOpenMM) k;
+    }
+    bondForce.initialize(system.getNumParticles(), numAngles, 3, angleIndexArray, data.threads);
+}
+
+double CpuCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
+    vector<RealVec>& posData = extractPositions(context);
+    vector<RealVec>& forceData = extractForces(context);
+    RealOpenMM energy = 0;
+    ReferenceAngleBondIxn angleBond;
+    bondForce.calculateForce(posData, angleParamArray, forceData, includeEnergy ? &energy : NULL, angleBond);
+    return energy;
+}
+
+void CpuCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force) {
+    if (numAngles != force.getNumAngles())
+        throw OpenMMException("updateParametersInContext: The number of angles has changed");
+
+    // Record the values.
+
+    for (int i = 0; i < numAngles; ++i) {
+        int particle1, particle2, particle3;
+        double angle, k;
+        force.getAngleParameters(i, particle1, particle2, particle3, angle, k);
+        if (particle1 != angleIndexArray[i][0] || particle2 != angleIndexArray[i][1] || particle3 != angleIndexArray[i][2])
+            throw OpenMMException("updateParametersInContext: The set of particles in an angle has changed");
+        angleParamArray[i][0] = (RealOpenMM) angle;
+        angleParamArray[i][1] = (RealOpenMM) k;
+    }
+}
+
 CpuCalcPeriodicTorsionForceKernel::~CpuCalcPeriodicTorsionForceKernel() {
    if (torsionIndexArray != NULL) {
        for (int i = 0; i < numTorsions; i++) {
@@ -467,6 +538,7 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
        bonded14ParamArray[i][1] = static_cast<RealOpenMM>(4.0*depth);
        bonded14ParamArray[i][2] = static_cast<RealOpenMM>(charge);
    }
+    bondForce.initialize(system.getNumParticles(), num14, 2, bonded14IndexArray, data.threads);
    
    // Record other parameters.
    
@@ -527,7 +599,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
    if (nonbondedMethod != NoCutoff) {
        // Determine whether we need to recompute the neighbor list.
        
-        double padding = 0.15*nonbondedCutoff;
+        double padding = 0.25*nonbondedCutoff;
        bool needRecompute = false;
        double closeCutoff2 = 0.25*padding*padding;
        double farCutoff2 = 0.5*padding*padding;
@@ -599,9 +671,8 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
    }
    energy += nonbondedEnergy;
    if (includeDirect) {
-        ReferenceBondForce refBondForce;
        ReferenceLJCoulomb14 nonbonded14;
-        refBondForce.calculateForce(num14, bonded14IndexArray, posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14);
+        bondForce.calculateForce(posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14);
        if (data.isPeriodic)
            energy += dispersionCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]);
    }
@@ -654,6 +725,19 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
        dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
 }

+void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
+    if (nonbondedMethod != PME)
+        throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
+    if (useOptimizedPme)
+        optimizedPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
+    else {
+        alpha = ewaldAlpha;
+        nx = gridSize[0];
+        ny = gridSize[1];
+        nz = gridSize[2];
+    }
+}
+
 CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) :
            CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), neighborList(NULL), nonbonded(NULL) {
 }
@@ -724,6 +808,14 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C
        globalParameterNames.push_back(force.getGlobalParameterName(i));
        globalParamValues[force.getGlobalParameterName(i)] = force.getGlobalParameterDefaultValue(i);
    }
+    set<string> variables;
+    variables.insert("r");
+    for (int i = 0; i < numParameters; i++) {
+        variables.insert(parameterNames[i]+"1");
+        variables.insert(parameterNames[i]+"2");
+    }
+    variables.insert(globalParameterNames.begin(), globalParameterNames.end());
+    validateVariables(expression.getRootNode(), variables);

    // Delete the custom functions.

@@ -937,6 +1029,18 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
    vector<vector<Lepton::CompiledExpression> > valueGradientExpressions(force.getNumComputedValues());
    vector<Lepton::CompiledExpression> valueExpressions;
    vector<Lepton::CompiledExpression> energyExpressions;
+    set<string> particleVariables, pairVariables;
+    pairVariables.insert("r");
+    particleVariables.insert("x");
+    particleVariables.insert("y");
+    particleVariables.insert("z");
+    for (int i = 0; i < numPerParticleParameters; i++) {
+        particleVariables.insert(particleParameterNames[i]);
+        pairVariables.insert(particleParameterNames[i]+"1");
+        pairVariables.insert(particleParameterNames[i]+"2");
+    }
+    particleVariables.insert(globalParameterNames.begin(), globalParameterNames.end());
+    pairVariables.insert(globalParameterNames.begin(), globalParameterNames.end());
    for (int i = 0; i < force.getNumComputedValues(); i++) {
        string name, expression;
        CustomGBForce::ComputationType type;
@@ -945,15 +1049,21 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
        valueExpressions.push_back(ex.createCompiledExpression());
        valueTypes.push_back(type);
        valueNames.push_back(name);
-        if (i == 0)
+        if (i == 0) {
            valueDerivExpressions[i].push_back(ex.differentiate("r").createCompiledExpression());
+            validateVariables(ex.getRootNode(), pairVariables);
+        }
        else {
            valueGradientExpressions[i].push_back(ex.differentiate("x").createCompiledExpression());
            valueGradientExpressions[i].push_back(ex.differentiate("y").createCompiledExpression());
            valueGradientExpressions[i].push_back(ex.differentiate("z").createCompiledExpression());
            for (int j = 0; j < i; j++)
                valueDerivExpressions[i].push_back(ex.differentiate(valueNames[j]).createCompiledExpression());
+            validateVariables(ex.getRootNode(), particleVariables);
        }
+        particleVariables.insert(name);
+        pairVariables.insert(name+"1");
+        pairVariables.insert(name+"2");
    }

    // Parse the expressions for energy terms.
@@ -975,10 +1085,12 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
                energyGradientExpressions[i].push_back(ex.differentiate("x").createCompiledExpression());
                energyGradientExpressions[i].push_back(ex.differentiate("y").createCompiledExpression());
                energyGradientExpressions[i].push_back(ex.differentiate("z").createCompiledExpression());
+                validateVariables(ex.getRootNode(), particleVariables);
            }
            else {
                energyDerivExpressions[i].push_back(ex.differentiate(valueNames[j]+"1").createCompiledExpression());
                energyDerivExpressions[i].push_back(ex.differentiate(valueNames[j]+"2").createCompiledExpression());
+                validateVariables(ex.getRootNode(), pairVariables);
            }
        }
    }

--- a/platforms/cpu/src/CpuLangevinDynamics.cpp
+++ b/platforms/cpu/src/CpuLangevinDynamics.cpp

-/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
+/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
 * Authors: Peter Eastman
 * Contributors: 
 *
@@ -49,6 +49,16 @@ public:
    CpuLangevinDynamics& owner;
 };

+class CpuLangevinDynamics::Update3Task : public ThreadPool::Task {
+public:
+    Update3Task(CpuLangevinDynamics& owner) : owner(owner) {
+    }
+    void execute(ThreadPool& threads, int threadIndex) {
+        owner.threadUpdate3(threadIndex);
+    }
+    CpuLangevinDynamics& owner;
+};
+
 CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, RealOpenMM deltaT, RealOpenMM tau, RealOpenMM temperature, ThreadPool& threads, CpuRandom& random) : 
           ReferenceStochasticDynamics(numberOfAtoms, deltaT, tau, temperature), threads(threads), random(random) {
 }
@@ -92,6 +102,23 @@ void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<RealVec>& atomCo
    threads.waitForThreads();
 }

+void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<RealVec>& atomCoordinates, vector<RealVec>& velocities,
+                                       vector<RealOpenMM>& inverseMasses, vector<RealVec>& xPrime) {
+    // Record the parameters for the threads.
+    
+    this->numberOfAtoms = numberOfAtoms;
+    this->atomCoordinates = &atomCoordinates[0];
+    this->velocities = &velocities[0];
+    this->inverseMasses = &inverseMasses[0];
+    this->xPrime = &xPrime[0];
+    
+    // Signal the threads to start running and wait for them to finish.
+    
+    Update3Task task(*this);
+    threads.execute(task);
+    threads.waitForThreads();
+}
+
 void CpuLangevinDynamics::threadUpdate1(int threadIndex) {
    const RealOpenMM tau = getTau();
    const RealOpenMM vscale = EXP(-getDeltaT()/tau);
@@ -122,3 +149,16 @@ void CpuLangevinDynamics::threadUpdate2(int threadIndex) {
        }
   }
 }
+
+void CpuLangevinDynamics::threadUpdate3(int threadIndex) {
+   const RealOpenMM invStepSize = 1.0/getDeltaT();
+    int start = threadIndex*numberOfAtoms/threads.getNumThreads();
+    int end = (threadIndex+1)*numberOfAtoms/threads.getNumThreads();
+
+   for (int i = start; i < end; ++i)
+       if (inverseMasses[i] != 0.0) {
+            velocities[i] = (xPrime[i]-atomCoordinates[i])*invStepSize;
+            atomCoordinates[i] = xPrime[i];
+       }
+}
+
--- a/platforms/cpu/src/CpuNeighborList.cpp
+++ b/platforms/cpu/src/CpuNeighborList.cpp
@@ -59,22 +59,25 @@ public:
 */
 class CpuNeighborList::Voxels {
 public:
-    Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const RealVec* periodicBoxVectors, bool usePeriodic) :
-            blockSize(blockSize), voxelSizeY(vsy), voxelSizeZ(vsz), miny(miny), maxy(maxy), minz(minz), maxz(maxz), periodicBoxVectors(periodicBoxVectors), usePeriodic(usePeriodic) {
-        periodicBoxSize[0] = (float) periodicBoxVectors[0][0];
-        periodicBoxSize[1] = (float) periodicBoxVectors[1][1];
-        periodicBoxSize[2] = (float) periodicBoxVectors[2][2];
-        recipBoxSize[0] = (float) (1/periodicBoxVectors[0][0]);
-        recipBoxSize[1] = (float) (1/periodicBoxVectors[1][1]);
-        recipBoxSize[2] = (float) (1/periodicBoxVectors[2][2]);
-        triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 ||
-                     periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 ||
-                     periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0);
+    Voxels(int blockSize, float vsy, float vsz, float miny, float maxy, float minz, float maxz, const RealVec* boxVectors, bool usePeriodic) :
+            blockSize(blockSize), voxelSizeY(vsy), voxelSizeZ(vsz), miny(miny), maxy(maxy), minz(minz), maxz(maxz), usePeriodic(usePeriodic) {
+        for (int i = 0; i < 3; i++)
+            for (int j = 0; j < 3; j++)
+                periodicBoxVectors[i][j] = (float) boxVectors[i][j];
+        periodicBoxSize[0] = (float) boxVectors[0][0];
+        periodicBoxSize[1] = (float) boxVectors[1][1];
+        periodicBoxSize[2] = (float) boxVectors[2][2];
+        recipBoxSize[0] = (float) (1/boxVectors[0][0]);
+        recipBoxSize[1] = (float) (1/boxVectors[1][1]);
+        recipBoxSize[2] = (float) (1/boxVectors[2][2]);
+        triclinic = (boxVectors[0][1] != 0.0 || boxVectors[0][2] != 0.0 ||
+                     boxVectors[1][0] != 0.0 || boxVectors[1][2] != 0.0 ||
+                     boxVectors[2][0] != 0.0 || boxVectors[2][1] != 0.0);
        if (usePeriodic) {
-            ny = (int) floorf(periodicBoxVectors[1][1]/voxelSizeY+0.5f);
-            nz = (int) floorf(periodicBoxVectors[2][2]/voxelSizeZ+0.5f);
-            voxelSizeY = periodicBoxVectors[1][1]/ny;
-            voxelSizeZ = periodicBoxVectors[2][2]/nz;
+            ny = (int) floorf(boxVectors[1][1]/voxelSizeY+0.5f);
+            nz = (int) floorf(boxVectors[2][2]/voxelSizeZ+0.5f);
+            voxelSizeY = boxVectors[1][1]/ny;
+            voxelSizeZ = boxVectors[2][2]/nz;
        }
        else {
            ny = max(1, (int) floorf((maxy-miny)/voxelSizeY+0.5f));
@@ -110,12 +113,10 @@ public:
    }
    
    /**
-     * Find the index of the first particle in voxel (y,z) whose x coordinate in >= the specified value.
+     * Find the index of the first particle in voxel (y,z) whose x coordinate is >= the specified value.
     */
-    int findLowerBound(int y, int z, double x) const {
+    int findLowerBound(int y, int z, double x, int lower, int upper) const {
        const vector<pair<float, int> >& bin = bins[y][z];
-        int lower = 0;
-        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first < x)
@@ -127,12 +128,10 @@ public:
    }
    
    /**
-     * Find the index of the first particle in voxel (y,z) whose x coordinate in greater than the specified value.
+     * Find the index of the first particle in voxel (y,z) whose x coordinate is greater than the specified value.
     */
-    int findUpperBound(int y, int z, double x) const {
+    int findUpperBound(int y, int z, double x, int lower, int upper) const {
        const vector<pair<float, int> >& bin = bins[y][z];
-        int lower = 0;
-        int upper = bin.size();
        while (lower < upper) {
            int middle = (lower+upper)/2;
            if (bin[middle].first > x)
@@ -208,7 +207,7 @@ public:

            // Loop over voxels along the y axis.

-            int boxz = (int) floor((float) z/nz);
+            float boxz = floor((float) z/nz);
            int starty = centerVoxelIndex.y-dIndexY;
            int endy = centerVoxelIndex.y+dIndexY;
            float yoffset = (float) (usePeriodic ? boxz*periodicBoxVectors[2][1] : 0);
@@ -225,7 +224,7 @@ public:
                voxelIndex.y = y;
                if (usePeriodic)
                    voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y));
-                int boxy = (int) floor((float) y/ny);
+                float boxy = floor((float) y/ny);
                float xoffset = (float) (usePeriodic ? boxy*periodicBoxVectors[1][0]+boxz*periodicBoxVectors[2][0] : 0);
                
                // Identify the range of atoms within this bin we need to search.  When using periodic boundary
@@ -261,30 +260,34 @@ public:
                int numRanges;
                int rangeStart[2];
                int rangeEnd[2];
-                rangeStart[0] = findLowerBound(voxelIndex.y, voxelIndex.z, minx);
+                int binSize = bins[voxelIndex.y][voxelIndex.z].size();
+                rangeStart[0] = findLowerBound(voxelIndex.y, voxelIndex.z, minx, 0, binSize);
                if (needPeriodic) {
                    numRanges = 2;
-                    rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx);
-                    if (rangeStart[0] > 0) {
+                    rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx, rangeStart[0], binSize);
+                    if (rangeStart[0] > 0 && rangeEnd[0] < binSize)
+                        numRanges = 1;
+                    else if (rangeStart[0] > 0) {
                        rangeStart[1] = 0;
-                        rangeEnd[1] = min(findUpperBound(voxelIndex.y, voxelIndex.z, maxx-periodicBoxSize[0]), rangeStart[0]);
+                        rangeEnd[1] = min(findUpperBound(voxelIndex.y, voxelIndex.z, maxx-periodicBoxSize[0], 0, rangeStart[0]), rangeStart[0]);
                    }
                    else {
-                        rangeStart[1] = max(findLowerBound(voxelIndex.y, voxelIndex.z, minx+periodicBoxSize[0]), rangeEnd[0]);
+                        rangeStart[1] = max(findLowerBound(voxelIndex.y, voxelIndex.z, minx+periodicBoxSize[0], rangeEnd[0], binSize), rangeEnd[0]);
                        rangeEnd[1] = bins[voxelIndex.y][voxelIndex.z].size();
                    }
                }
                else {
                    numRanges = 1;
-                    rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx);
+                    rangeEnd[0] = findUpperBound(voxelIndex.y, voxelIndex.z, maxx, rangeStart[0], binSize);
                }
                bool periodicRectangular = (needPeriodic && !triclinic);
                
                // Loop over atoms and check to see if they are neighbors of this block.
                
+                const vector<pair<float, int> >& voxelBins = bins[voxelIndex.y][voxelIndex.z];
                for (int range = 0; range < numRanges; range++) {
                    for (int item = rangeStart[range]; item < rangeEnd[range]; item++) {
-                        const int sortedIndex = bins[voxelIndex.y][voxelIndex.z][item].second;
+                        const int sortedIndex = voxelBins[item].second;

                        // Avoid duplicate entries.
                        if (sortedIndex >= lastSortedIndex)
@@ -361,7 +364,7 @@ private:
    int ny, nz;
    float periodicBoxSize[3], recipBoxSize[3];
    bool triclinic;
-    const RealVec* periodicBoxVectors;
+    float periodicBoxVectors[3][3];
    const bool usePeriodic;
    vector<vector<vector<pair<float, int> > > > bins;
 };
@@ -444,6 +447,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float

    // Signal the threads to start running and wait for them to finish.
    
+    gmx_atomic_set(&atomicCounter, 0);
    threads.resumeThreads();
    threads.waitForThreads();
    
@@ -500,7 +504,11 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
    vector<int> blockAtoms;
    vector<float> blockAtomX(blockSize), blockAtomY(blockSize), blockAtomZ(blockSize);
    vector<VoxelIndex> atomVoxelIndex;
-    for (int i = threadIndex; i < numBlocks; i += numThreads) {
+    while (true) {
+        int i = gmx_atomic_fetch_add(&atomicCounter, 1);
+        if (i >= numBlocks)
+            break;
+
        // Find the atoms in this block and compute their bounding box.
        
        int firstIndex = blockSize*i;
@@ -532,15 +540,25 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI

        // Record the exclusions for this block.

+        map<int, char> atomFlags;
        for (int j = 0; j < atomsInBlock; j++) {
            const set<int>& atomExclusions = (*exclusions)[sortedAtoms[firstIndex+j]];
            char mask = 1<<j;
-            for (int k = 0; k < (int) blockNeighbors[i].size(); k++) {
-                int atomIndex = blockNeighbors[i][k];
-                if (atomExclusions.find(atomIndex) != atomExclusions.end())
-                    blockExclusions[i][k] |= mask;
+            for (set<int>::const_iterator iter = atomExclusions.begin(); iter != atomExclusions.end(); ++iter) {
+                map<int, char>::iterator thisAtomFlags = atomFlags.find(*iter);
+                if (thisAtomFlags == atomFlags.end())
+                    atomFlags[*iter] = mask;
+                else
+                    thisAtomFlags->second |= mask;
            }
        }
+        int numNeighbors = blockNeighbors[i].size();
+        for (int k = 0; k < numNeighbors; k++) {
+            int atomIndex = blockNeighbors[i][k];
+            map<int, char>::iterator thisAtomFlags = atomFlags.find(atomIndex);
+            if (thisAtomFlags != atomFlags.end())
+                blockExclusions[i][k] |= thisAtomFlags->second;
+        }
    }
 }


--- a/platforms/cpu/src/CpuNonbondedForce.cpp
+++ b/platforms/cpu/src/CpuNonbondedForce.cpp
@@ -28,7 +28,7 @@
 #include "CpuNonbondedForce.h"
 #include "ReferenceForce.h"
 #include "ReferencePME.h"
-#include "gmx_atomic.h"
+#include "openmm/internal/gmx_atomic.h"
 #include <algorithm>

 // In case we're using some primitive version of Visual Studio this will
@@ -322,6 +322,14 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
    threads.execute(task);
    threads.waitForThreads();
    
+    // Signal the threads to subtract the exclusions.
+    
+    if (ewald || pme) {
+        gmx_atomic_set(&counter, 0);
+        threads.resumeThreads();
+        threads.waitForThreads();
+    }
+    
    // Combine the energies from all the threads.
    
    if (totalEnergy != NULL) {
@@ -354,28 +362,37 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex

        // Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.

-        for (int i = threadIndex; i < numberOfAtoms; i += numThreads) {
-            fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f);
-            for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) {
-                if (*iter > i) {
-                    int j = *iter;
-                    fvec4 deltaR;
-                    fvec4 posJ((float) atomCoordinates[j][0], (float) atomCoordinates[j][1], (float) atomCoordinates[j][2], 0.0f);
-                    float r2;
-                    getDeltaR(posJ, posI, deltaR, r2, false, boxSize, invBoxSize);
-                    float r = sqrtf(r2);
-                    float inverseR = 1/r;
-                    float chargeProd = ONE_4PI_EPS0*posq[4*i+3]*posq[4*j+3];
-                    float alphaR = alphaEwald*r;
-                    float erfAlphaR = erf(alphaR);
-                    if (erfAlphaR > 1e-6f) {
-                        float dEdR = (float) (chargeProd * inverseR * inverseR * inverseR);
-                        dEdR = (float) (dEdR * (erfAlphaR-TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR)));
-                        fvec4 result = deltaR*dEdR;
-                        (fvec4(forces+4*i)-result).store(forces+4*i);
-                        (fvec4(forces+4*j)+result).store(forces+4*j);
-                        if (includeEnergy)
-                            threadEnergy[threadIndex] -= chargeProd*inverseR*erfAlphaR;
+        threads.syncThreads();
+        const int groupSize = max(1, numberOfAtoms/(10*numThreads));
+        while (true) {
+            int start = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), groupSize);
+            if (start >= numberOfAtoms)
+                break;
+            int end = min(start+groupSize, numberOfAtoms);
+            for (int i = start; i < end; i++) {
+               fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f);
+                float scaledChargeI = (float) (ONE_4PI_EPS0*posq[4*i+3]);
+                for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) {
+                    if (*iter > i) {
+                        int j = *iter;
+                        fvec4 deltaR;
+                        fvec4 posJ((float) atomCoordinates[j][0], (float) atomCoordinates[j][1], (float) atomCoordinates[j][2], 0.0f);
+                        float r2;
+                        getDeltaR(posJ, posI, deltaR, r2, false, boxSize, invBoxSize);
+                        float r = sqrtf(r2);
+                        float alphaR = alphaEwald*r;
+                        float erfAlphaR = erf(alphaR);
+                        if (erfAlphaR > 1e-6f) {
+                            float inverseR = 1/r;
+                            float chargeProdOverR = scaledChargeI*posq[4*j+3]*inverseR;
+                            float dEdR = chargeProdOverR*inverseR*inverseR;
+                            dEdR = dEdR * (erfAlphaR-(float)TWO_OVER_SQRT_PI*alphaR*(float)exp(-alphaR*alphaR));
+                            fvec4 result = deltaR*dEdR;
+                            (fvec4(forces+4*i)-result).store(forces+4*i);
+                            (fvec4(forces+4*j)+result).store(forces+4*j);
+                            if (includeEnergy)
+                                threadEnergy[threadIndex] -= chargeProdOverR*erfAlphaR;
+                        }
                    }
                }
            }

--- a/platforms/cpu/src/CpuPlatform.cpp
+++ b/platforms/cpu/src/CpuPlatform.cpp
@@ -61,6 +61,7 @@ map<const ContextImpl*, CpuPlatform::PlatformData*> CpuPlatform::contextData;
 CpuPlatform::CpuPlatform() {
    CpuKernelFactory* factory = new CpuKernelFactory();
    registerKernelFactory(CalcForcesAndEnergyKernel::Name(), factory);
+    registerKernelFactory(CalcHarmonicAngleForceKernel::Name(), factory);
    registerKernelFactory(CalcPeriodicTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcRBTorsionForceKernel::Name(), factory);
    registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);

--- a/platforms/cpu/src/CpuSETTLE.cpp
+++ b/platforms/cpu/src/CpuSETTLE.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Portions copyright (c) 2013-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -30,6 +30,7 @@
 * -------------------------------------------------------------------------- */

 #include "CpuSETTLE.h"
+#include "openmm/internal/gmx_atomic.h"

 using namespace OpenMM;
 using namespace std;
@@ -39,10 +40,14 @@ public:
    ApplyToPositionsTask(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& atomCoordinatesP, vector<RealOpenMM>& inverseMasses,
            RealOpenMM tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), atomCoordinatesP(atomCoordinatesP),
            inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
+        gmx_atomic_set(&atomicCounter, 0);
    }
    void execute(ThreadPool& threads, int threadIndex) {
-        if (threadIndex < threadSettle.size()) {
-            threadSettle[threadIndex]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
+        while (true) {
+            int index = gmx_atomic_fetch_add(&atomicCounter, 1);
+            if (index >= threadSettle.size())
+                break;
+            threadSettle[index]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
        }
    }
    vector<OpenMM::RealVec>& atomCoordinates;
@@ -50,6 +55,7 @@ public:
    vector<RealOpenMM>& inverseMasses;
    RealOpenMM tolerance;
    vector<ReferenceSETTLEAlgorithm*>& threadSettle;
+    gmx_atomic_t atomicCounter;
 };

 class CpuSETTLE::ApplyToVelocitiesTask : public ThreadPool::Task {
@@ -57,10 +63,14 @@ public:
    ApplyToVelocitiesTask(vector<OpenMM::RealVec>& atomCoordinates, vector<OpenMM::RealVec>& velocities, vector<RealOpenMM>& inverseMasses,
            RealOpenMM tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), velocities(velocities),
            inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
+        gmx_atomic_set(&atomicCounter, 0);
    }
    void execute(ThreadPool& threads, int threadIndex) {
-        if (threadIndex < threadSettle.size()) {
-            threadSettle[threadIndex]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
+        while (true) {
+            int index = gmx_atomic_fetch_add(&atomicCounter, 1);
+            if (index >= threadSettle.size())
+                break;
+            threadSettle[index]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
        }
    }
    vector<OpenMM::RealVec>& atomCoordinates;
@@ -68,17 +78,18 @@ public:
    vector<RealOpenMM>& inverseMasses;
    RealOpenMM tolerance;
    vector<ReferenceSETTLEAlgorithm*>& threadSettle;
+    gmx_atomic_t atomicCounter;
 };

 CpuSETTLE::CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settle, ThreadPool& threads) : threads(threads) {
-    int numThreads = threads.getNumThreads();
+    int numBlocks = 10*threads.getNumThreads();
    int numClusters = settle.getNumClusters();
    vector<RealOpenMM> mass(system.getNumParticles());
    for (int i = 0; i < system.getNumParticles(); i++)
        mass[i] = system.getParticleMass(i);
-    for (int i = 0; i < numThreads; i++) {
-        int start = i*numClusters/numThreads;
-        int end = (i+1)*numClusters/numThreads;
+    for (int i = 0; i < numBlocks; i++) {
+        int start = i*numClusters/numBlocks;
+        int end = (i+1)*numClusters/numBlocks;
        if (start != end) {
            int numThreadClusters = end-start;
            vector<int> atom1(numThreadClusters), atom2(numThreadClusters), atom3(numThreadClusters);

--- a/platforms/cpu/staticTarget/CMakeLists.txt
+++ b/platforms/cpu/staticTarget/CMakeLists.txt
@@ -17,6 +17,6 @@ ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${AP

 TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${OPENMM_LIBRARY_NAME}_static ${PTHREADS_LIB_STATIC})
 #-DPTW32_STATIC_LIB only works for the windows pthreads.
-SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CPU_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB")
+SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CPU_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB")

 INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${STATIC_TARGET})
--- a/platforms/cpu/tests/CMakeLists.txt
+++ b/platforms/cpu/tests/CMakeLists.txt
@@ -23,7 +23,7 @@ FOREACH(TEST_PROG ${TEST_PROGS})
    ELSE (OPENMM_BUILD_SHARED_LIB)
        TARGET_LINK_LIBRARIES(${TEST_ROOT} ${STATIC_TARGET})
    ENDIF (OPENMM_BUILD_SHARED_LIB)
-    SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS}")
+    SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS}")
    ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} single)

 ENDFOREACH(TEST_PROG ${TEST_PROGS})
--- a/platforms/cpu/tests/CpuTests.h
+++ b/platforms/cpu/tests/CpuTests.h
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2015 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#ifdef WIN32
+  #define _USE_MATH_DEFINES // Needed to get M_PI
+#endif
+#include "CpuPlatform.h"
+#include <cstdlib>
+#include <iostream>
+
+OpenMM::CpuPlatform platform;
+
+void initializeTests(int argc, char* argv[]) {
+    if (!OpenMM::CpuPlatform::isProcessorSupported()) {
+        std::cout << "CPU is not supported.  Exiting." << std::endl;
+        exit(0);
+    }
+}
--- a/platforms/cpu/tests/TestCpuCheckpoints.cpp
+++ b/platforms/cpu/tests/TestCpuCheckpoints.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2012-2015 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "CpuTests.h"
+#include "TestCheckpoints.h"
+
+void testCheckpoint() {
+    const int numParticles = 100;
+    const double boxSize = 5.0;
+    const double temperature = 200.0;
+    System system;
+    system.addForce(new AndersenThermostat(0.0, 100.0));
+    NonbondedForce* nonbonded = new NonbondedForce();
+    system.addForce(nonbonded);
+    nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
+    vector<Vec3> positions(numParticles);
+    OpenMM_SFMT::SFMT sfmt;
+    init_gen_rand(0, sfmt);
+    for (int i = 0; i < numParticles; i++) {
+        system.addParticle(1.0);
+        nonbonded->addParticle(i%2 == 0 ? 0.1 : -0.1, 0.2, 0.1);
+        bool clash;
+        do {
+            clash = false;
+            positions[i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
+            for (int j = 0; j < i; j++) {
+                Vec3 delta = positions[i]-positions[j];
+                if (sqrt(delta.dot(delta)) < 0.1)
+                    clash = true;
+            }
+        } while (clash);
+    }
+    VerletIntegrator integrator(0.001);
+    Context context(system, integrator, platform);
+    context.setPositions(positions);
+    context.setPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
+    context.setParameter(AndersenThermostat::Temperature(), temperature);
+    
+    // Run for a little while.
+    
+    integrator.step(100);
+    
+    // Record the current state and make a checkpoint.
+    
+    State s1 = context.getState(State::Positions | State::Velocities | State::Parameters);
+    stringstream stream1(ios_base::out | ios_base::in | ios_base::binary);
+    context.createCheckpoint(stream1);
+    
+    // Continue the simulation for a few more steps and record the state again.
+    
+    integrator.step(10);
+    State s2 = context.getState(State::Positions | State::Velocities | State::Parameters);
+    
+    // Restore from the checkpoint and see if everything gets restored correctly.
+    
+    context.setPeriodicBoxVectors(Vec3(2*boxSize, 0, 0), Vec3(0, 2*boxSize, 0), Vec3(0, 0, 2*boxSize));
+    context.setParameter(AndersenThermostat::Temperature(), temperature+10);
+    context.loadCheckpoint(stream1);
+    State s3 = context.getState(State::Positions | State::Velocities | State::Parameters);
+    compareStates(s1, s3);
+    
+    // Now simulate from there and see if the trajectory is identical.
+    
+    integrator.step(10);
+    State s4 = context.getState(State::Positions | State::Velocities | State::Parameters);
+    compareStates(s2, s4);
+}
+
+void runPlatformTests() {
+    testCheckpoint();
+}
--- a/platforms/cpu/tests/TestCpuCustomGBForce.cpp
+++ b/platforms/cpu/tests/TestCpuCustomGBForce.cpp
--- a/platforms/cpu/tests/TestCpuCustomManyParticleForce.cpp
+++ b/platforms/cpu/tests/TestCpuCustomManyParticleForce.cpp
--- a/platforms/cpu/tests/TestCpuCustomNonbondedForce.cpp
+++ b/platforms/cpu/tests/TestCpuCustomNonbondedForce.cpp