Merge branch 'master' into nucleic

fd473eea · Peter Eastman · 0a751b5b · 6a985cfd · fd473eea · fd473eea
Commit fd473eea authored Oct 29, 2015 by Peter Eastman
20 changed files
--- a/platforms/cpu/tests/TestCpuEwald.cpp
+++ b/platforms/cpu/tests/TestCpuEwald.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2013 Stanford University and the Authors.      *
+ * Portions copyright (c) 2015 Stanford University and the Authors.           *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -29,297 +29,8 @@
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

-/**
- * This tests the Ewald summation method CPU implementation of NonbondedForce.
- */
+#include "CpuTests.h"
+#include "TestEwald.h"

-#include "openmm/internal/AssertionUtilities.h"
-#include "openmm/Context.h"
-#include "CpuPlatform.h"
-#include "ReferencePlatform.h"
-#include "openmm/NonbondedForce.h"
-#include "openmm/System.h"
-#include "openmm/LangevinIntegrator.h"
-#include "openmm/VerletIntegrator.h"
-#include "openmm/internal/ContextImpl.h"
-#include "SimTKOpenMMRealType.h"
-#include "sfmt/SFMT.h"
-#include <iostream>
-#include <vector>
-
-using namespace OpenMM;
-using namespace std;
-
-CpuPlatform platform;
-
-const double TOL = 1e-5;
-
-void testEwaldPME(bool includeExceptions) {
-
-//      Use amorphous NaCl system for the tests
-
-    const int numParticles = 894;
-    const double cutoff = 1.2;
-    const double boxSize = 3.00646;
-    double tol = 1e-5;
-
-    ReferencePlatform reference;
-    System system;
-    NonbondedForce* nonbonded = new NonbondedForce();
-    nonbonded->setNonbondedMethod(NonbondedForce::Ewald);
-    nonbonded->setCutoffDistance(cutoff);
-    nonbonded->setEwaldErrorTolerance(tol);
-
-    for (int i = 0; i < numParticles/2; i++)
-        system.addParticle(22.99);
-    for (int i = 0; i < numParticles/2; i++)
-        system.addParticle(35.45);
-    for (int i = 0; i < numParticles/2; i++)
-        nonbonded->addParticle(1.0, 1.0,0.0);
-    for (int i = 0; i < numParticles/2; i++)
-        nonbonded->addParticle(-1.0, 1.0,0.0);
-    system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
-    system.addForce(nonbonded);
-
-    vector<Vec3> positions(numParticles);
-    #include "nacl_amorph.dat"
-    if (includeExceptions) {
-        // Add some exclusions.
-
-        for (int i = 0; i < numParticles-1; i++) {
-            Vec3 delta = positions[i]-positions[i+1];
-            if (sqrt(delta.dot(delta)) < 0.5*cutoff)
-                nonbonded->addException(i, i+1, i%2 == 0 ? 0.0 : 0.5, 1.0, 0.0);
-        }
-    }
-
-//    (1)  Check whether the Reference and CPU platforms agree when using Ewald Method
-
-    VerletIntegrator integrator1(0.01);
-    VerletIntegrator integrator2(0.01);
-    Context cpuContext(system, integrator1, platform);
-    Context referenceContext(system, integrator2, reference);
-    cpuContext.setPositions(positions);
-    referenceContext.setPositions(positions);
-    State cpuState = cpuContext.getState(State::Forces | State::Energy);
-    State referenceState = referenceContext.getState(State::Forces | State::Energy);
-    tol = 1e-2;
-    for (int i = 0; i < numParticles; i++) {
-        ASSERT_EQUAL_VEC(referenceState.getForces()[i], cpuState.getForces()[i], tol);
-    }
-    tol = 1e-5;
-    ASSERT_EQUAL_TOL(referenceState.getPotentialEnergy(), cpuState.getPotentialEnergy(), tol);
-
-//    (2) Check whether Ewald method in CPU is self-consistent
-
-    double norm = 0.0;
-    for (int i = 0; i < numParticles; ++i) {
-        Vec3 f = cpuState.getForces()[i];
-        norm += f[0]*f[0] + f[1]*f[1] + f[2]*f[2];
-    }
-
-    norm = std::sqrt(norm);
-    const double delta = 5e-3;
-    double step = delta/norm;
-    for (int i = 0; i < numParticles; ++i) {
-        Vec3 p = positions[i];
-        Vec3 f = cpuState.getForces()[i];
-        positions[i] = Vec3(p[0]-f[0]*step, p[1]-f[1]*step, p[2]-f[2]*step);
-    }
-    VerletIntegrator integrator3(0.01);
-    Context cpuContext2(system, integrator3, platform);
-    cpuContext2.setPositions(positions);
-
-    tol = 1e-2;
-    State cpuState2 = cpuContext2.getState(State::Energy);
-    ASSERT_EQUAL_TOL(norm, (cpuState2.getPotentialEnergy()-cpuState.getPotentialEnergy())/delta, tol)
-
-//    (3)  Check whether the Reference and CPU platforms agree when using PME
-
-    nonbonded->setNonbondedMethod(NonbondedForce::PME);
-    cpuContext.reinitialize();
-    referenceContext.reinitialize();
-    cpuContext.setPositions(positions);
-    referenceContext.setPositions(positions);
-    cpuState = cpuContext.getState(State::Forces | State::Energy);
-    referenceState = referenceContext.getState(State::Forces | State::Energy);
-    tol = 1e-2;
-    for (int i = 0; i < numParticles; i++) {
-        ASSERT_EQUAL_VEC(referenceState.getForces()[i], cpuState.getForces()[i], tol);
-    }
-    tol = 1e-5;
-    ASSERT_EQUAL_TOL(referenceState.getPotentialEnergy(), cpuState.getPotentialEnergy(), tol);
-
-//    (4) Check whether PME method in CPU is self-consistent
-
-    norm = 0.0;
-    for (int i = 0; i < numParticles; ++i) {
-        Vec3 f = cpuState.getForces()[i];
-        norm += f[0]*f[0] + f[1]*f[1] + f[2]*f[2];
-    }
-
-    norm = std::sqrt(norm);
-    step = delta/norm;
-    for (int i = 0; i < numParticles; ++i) {
-        Vec3 p = positions[i];
-        Vec3 f = cpuState.getForces()[i];
-        positions[i] = Vec3(p[0]-f[0]*step, p[1]-f[1]*step, p[2]-f[2]*step);
-    }
-    VerletIntegrator integrator4(0.01);
-    Context cpuContext3(system, integrator4, platform);
-    cpuContext3.setPositions(positions);
-
-    tol = 1e-2;
-    State cpuState3 = cpuContext3.getState(State::Energy);
-    ASSERT_EQUAL_TOL(norm, (cpuState3.getPotentialEnergy()-cpuState.getPotentialEnergy())/delta, tol)
-}
-
-void testEwald2Ions() {
-    System system;
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    VerletIntegrator integrator(0.01);
-    NonbondedForce* nonbonded = new NonbondedForce();
-    nonbonded->addParticle(1.0, 1, 0);
-    nonbonded->addParticle(-1.0, 1, 0);
-    nonbonded->setNonbondedMethod(NonbondedForce::Ewald);
-    const double cutoff = 2.0;
-    nonbonded->setCutoffDistance(cutoff);
-    nonbonded->setEwaldErrorTolerance(TOL);
-    system.setDefaultPeriodicBoxVectors(Vec3(6, 0, 0), Vec3(0, 6, 0), Vec3(0, 0, 6));
-    system.addForce(nonbonded);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(2);
-    positions[0] = Vec3(3.048000,2.764000,3.156000);
-    positions[1] = Vec3(2.809000,2.888000,2.571000);
-    context.setPositions(positions);
-    State state = context.getState(State::Forces | State::Energy);
-    const vector<Vec3>& forces = state.getForces();
-
-    ASSERT_EQUAL_VEC(Vec3(-123.711,  64.1877, -302.716), forces[0], 10*TOL);
-    ASSERT_EQUAL_VEC(Vec3( 123.711, -64.1877,  302.716), forces[1], 10*TOL);
-    ASSERT_EQUAL_TOL(-217.276, state.getPotentialEnergy(), 0.01/*10*TOL*/);
-}
-
-void testTriclinic() {
-    // Create a triclinic box containing eight particles.
-
-    System system;
-    system.setDefaultPeriodicBoxVectors(Vec3(2.5, 0, 0), Vec3(0.5, 3.0, 0), Vec3(0.7, 0.9, 3.5));
-    for (int i = 0; i < 8; i++)
-        system.addParticle(1.0);
-    NonbondedForce* force = new NonbondedForce();
-    system.addForce(force);
-    force->setNonbondedMethod(NonbondedForce::PME);
-    force->setCutoffDistance(1.0);
-    force->setPMEParameters(3.45891, 32, 40, 48);
-    for (int i = 0; i < 4; i++)
-        force->addParticle(-1, 0.440104, 0.4184); // Cl parameters
-    for (int i = 0; i < 4; i++)
-        force->addParticle(1, 0.332840, 0.0115897); // Na parameters
-    vector<Vec3> positions(8);
-    positions[0] = Vec3(1.744, 2.788, 3.162);
-    positions[1] = Vec3(1.048, 0.762, 2.340);
-    positions[2] = Vec3(2.489, 1.570, 2.817);
-    positions[3] = Vec3(1.027, 1.893, 3.271);
-    positions[4] = Vec3(0.937, 0.825, 0.009);
-    positions[5] = Vec3(2.290, 1.887, 3.352);
-    positions[6] = Vec3(1.266, 1.111, 2.894);
-    positions[7] = Vec3(0.933, 1.862, 3.490);
-
-    // Compute the forces and energy.
-
-    VerletIntegrator integ(0.001);
-    Context context(system, integ, platform);
-    context.setPositions(positions);
-    State state = context.getState(State::Forces | State::Energy);
-
-    // Compare them to values computed by Gromacs.
-
-    double expectedEnergy = -963.370;
-    vector<Vec3> expectedForce(8);
-    expectedForce[0] = Vec3(4.25253e+01, -1.23503e+02, 1.22139e+02);
-    expectedForce[1] = Vec3(9.74752e+01, 1.68213e+02, 1.93169e+02);
-    expectedForce[2] = Vec3(-1.50348e+02, 1.29165e+02, 3.70435e+02);
-    expectedForce[3] = Vec3(9.18644e+02, -3.52571e+00, -1.34772e+03);
-    expectedForce[4] = Vec3(-1.61193e+02, 9.01528e+01, -7.12904e+01);
-    expectedForce[5] = Vec3(2.82630e+02, 2.78029e+01, -3.72864e+02);
-    expectedForce[6] = Vec3(-1.47454e+02, -2.14448e+02, -3.55789e+02);
-    expectedForce[7] = Vec3(-8.82195e+02, -7.39132e+01, 1.46202e+03);
-    for (int i = 0; i < 8; i++) {
-        ASSERT_EQUAL_VEC(expectedForce[i], state.getForces()[i], 1e-4);
-    }
-    ASSERT_EQUAL_TOL(expectedEnergy, state.getPotentialEnergy(), 1e-4);
-}
-
-void testErrorTolerance(NonbondedForce::NonbondedMethod method) {
-    // Create a cloud of random point charges.
-
-    const int numParticles = 51;
-    const double boxWidth = 5.0;
-    System system;
-    system.setDefaultPeriodicBoxVectors(Vec3(boxWidth, 0, 0), Vec3(0, boxWidth, 0), Vec3(0, 0, boxWidth));
-    NonbondedForce* force = new NonbondedForce();
-    system.addForce(force);
-    vector<Vec3> positions(numParticles);
-    OpenMM_SFMT::SFMT sfmt;
-    init_gen_rand(0, sfmt);
-
-    for (int i = 0; i < numParticles; i++) {
-        system.addParticle(1.0);
-        force->addParticle(-1.0+i*2.0/(numParticles-1), 1.0, 0.0);
-        positions[i] = Vec3(boxWidth*genrand_real2(sfmt), boxWidth*genrand_real2(sfmt), boxWidth*genrand_real2(sfmt));
-    }
-    force->setNonbondedMethod(method);
-
-    // For various values of the cutoff and error tolerance, see if the actual error is reasonable.
-
-    for (double cutoff = 1.0; cutoff < boxWidth/2; cutoff *= 1.2) {
-        force->setCutoffDistance(cutoff);
-        vector<Vec3> refForces;
-        double norm = 0.0;
-        for (double tol = 5e-5; tol < 1e-3; tol *= 2.0) {
-            force->setEwaldErrorTolerance(tol);
-            VerletIntegrator integrator(0.01);
-            Context context(system, integrator, platform);
-            context.setPositions(positions);
-            State state = context.getState(State::Forces);
-            if (refForces.size() == 0) {
-                refForces = state.getForces();
-                for (int i = 0; i < numParticles; i++)
-                    norm += refForces[i].dot(refForces[i]);
-                norm = sqrt(norm);
-            }
-            else {
-                double diff = 0.0;
-                for (int i = 0; i < numParticles; i++) {
-                    Vec3 delta = refForces[i]-state.getForces()[i];
-                    diff += delta.dot(delta);
-                }
-                diff = sqrt(diff)/norm;
-                ASSERT(diff < 2*tol);
-            }
-        }
-    }
-}
-
-int main(int argc, char* argv[]) {
-    try {
-        if (!CpuPlatform::isProcessorSupported()) {
-            cout << "CPU is not supported.  Exiting." << endl;
-            return 0;
-        }
-        testEwaldPME(false);
-        testEwaldPME(true);
-//        testEwald2Ions();
-        testTriclinic();
-        testErrorTolerance(NonbondedForce::Ewald);
-        testErrorTolerance(NonbondedForce::PME);
-    }
-    catch(const exception& e) {
-        cout << "exception: " << e.what() << endl;
-        return 1;
-    }
-    cout << "Done" << endl;
-    return 0;
+void runPlatformTests() {
 }
--- a/platforms/cpu/tests/TestCpuGBSAOBCForce.cpp
+++ b/platforms/cpu/tests/TestCpuGBSAOBCForce.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2014 Stanford University and the Authors.      *
+ * Portions copyright (c) 2015 Stanford University and the Authors.           *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -29,244 +29,8 @@
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

-/**
- * This tests the CPU implementation of GBSAOBCForce.
- */
+#include "CpuTests.h"
+#include "TestGBSAOBCForce.h"

-#include "openmm/internal/AssertionUtilities.h"
-#include "openmm/Context.h"
-#include "CpuPlatform.h"
-#include "openmm/GBSAOBCForce.h"
-#include "openmm/System.h"
-#include "openmm/LangevinIntegrator.h"
-#include "openmm/NonbondedForce.h"
-#include "SimTKOpenMMRealType.h"
-#include "sfmt/SFMT.h"
-#include <iostream>
-#include <vector>
-
-using namespace OpenMM;
-using namespace std;
-
-const double TOL = 1e-5;
-
-void testSingleParticle() {
-    CpuPlatform platform;
-    System system;
-    system.addParticle(2.0);
-    LangevinIntegrator integrator(0, 0.1, 0.01);
-    GBSAOBCForce* forceField = new GBSAOBCForce();
-    forceField->addParticle(0.5, 0.15, 1);
-    system.addForce(forceField);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(1);
-    positions[0] = Vec3(0, 0, 0);
-    context.setPositions(positions);
-    State state = context.getState(State::Energy);
-    double bornRadius = 0.15-0.009; // dielectric offset
-    double eps0 = EPSILON0;
-    double bornEnergy = (-0.5*0.5/(8*PI_M*eps0))*(1.0/forceField->getSoluteDielectric()-1.0/forceField->getSolventDielectric())/bornRadius;
-    double extendedRadius = 0.15+0.14; // probe radius
-    double nonpolarEnergy = 4*PI_M*2.25936*extendedRadius*extendedRadius*std::pow(0.15/bornRadius, 6.0);
-    ASSERT_EQUAL_TOL((bornEnergy+nonpolarEnergy), state.getPotentialEnergy(), 0.01);
-    
-    // Change the parameters and see if it is still correct.
-    
-    forceField->setParticleParameters(0, 0.4, 0.25, 1);
-    forceField->updateParametersInContext(context);
-    state = context.getState(State::Energy);
-    bornRadius = 0.25-0.009; // dielectric offset
-    bornEnergy = (-0.4*0.4/(8*PI_M*eps0))*(1.0/forceField->getSoluteDielectric()-1.0/forceField->getSolventDielectric())/bornRadius;
-    extendedRadius = 0.25+0.14;
-    nonpolarEnergy = 4*PI_M*2.25936*extendedRadius*extendedRadius*std::pow(0.25/bornRadius, 6.0);
-    ASSERT_EQUAL_TOL((bornEnergy+nonpolarEnergy), state.getPotentialEnergy(), 0.01);
-}
-
-void testGlobalSettings() {
-    CpuPlatform platform;
-    System system;
-    system.addParticle(2.0);
-    LangevinIntegrator integrator(0, 0.1, 0.01);
-    GBSAOBCForce* forceField = new GBSAOBCForce();
-    forceField->addParticle(0.5, 0.15, 1);
-    const double soluteDielectric = 2.1;
-    const double solventDielectric = 35.0;
-    const double surfaceAreaEnergy = 0.75;
-    forceField->setSoluteDielectric(soluteDielectric);
-    forceField->setSolventDielectric(solventDielectric);
-    forceField->setSurfaceAreaEnergy(surfaceAreaEnergy);
-    system.addForce(forceField);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(1);
-    positions[0] = Vec3(0, 0, 0);
-    context.setPositions(positions);
-    State state = context.getState(State::Energy);
-    double bornRadius = 0.15-0.009; // dielectric offset
-    double eps0 = EPSILON0;
-    double bornEnergy = (-0.5*0.5/(8*PI_M*eps0))*(1.0/soluteDielectric-1.0/solventDielectric)/bornRadius;
-    double extendedRadius = 0.15+0.14; // probe radius
-    double nonpolarEnergy = 4*PI_M*surfaceAreaEnergy*extendedRadius*extendedRadius*std::pow(0.15/bornRadius, 6.0);
-    ASSERT_EQUAL_TOL((bornEnergy+nonpolarEnergy), state.getPotentialEnergy(), 0.01);
-}
-
-void testCutoffAndPeriodic() {
-    CpuPlatform platform;
-    System system;
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    LangevinIntegrator integrator(0, 0.1, 0.01);
-    GBSAOBCForce* gbsa = new GBSAOBCForce();
-    NonbondedForce* nonbonded = new NonbondedForce();
-    gbsa->addParticle(-1, 0.15, 1);
-    nonbonded->addParticle(-1, 1, 0);
-    gbsa->addParticle(1, 0.15, 1);
-    nonbonded->addParticle(1, 1, 0);
-    const double cutoffDistance = 3.0;
-    const double boxSize = 10.0;
-    nonbonded->setCutoffDistance(cutoffDistance);
-    gbsa->setCutoffDistance(cutoffDistance);
-    system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
-    system.addForce(gbsa);
-    system.addForce(nonbonded);
-    vector<Vec3> positions(2);
-    positions[0] = Vec3(0, 0, 0);
-    positions[1] = Vec3(2, 0, 0);
-
-    // Calculate the forces for both cutoff and periodic with two different atom positions.
-
-    nonbonded->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
-    gbsa->setNonbondedMethod(GBSAOBCForce::CutoffNonPeriodic);
-    Context context(system, integrator, platform);
-    context.setPositions(positions);
-    State state1 = context.getState(State::Forces);
-    nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
-    gbsa->setNonbondedMethod(GBSAOBCForce::CutoffPeriodic);
-    context.reinitialize();
-    context.setPositions(positions);
-    State state2 = context.getState(State::Forces);
-    positions[1][0]+= boxSize;
-    nonbonded->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
-    gbsa->setNonbondedMethod(GBSAOBCForce::CutoffNonPeriodic);
-    context.reinitialize();
-    context.setPositions(positions);
-    State state3 = context.getState(State::Forces);
-    nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
-    gbsa->setNonbondedMethod(GBSAOBCForce::CutoffPeriodic);
-    context.reinitialize();
-    context.setPositions(positions);
-    State state4 = context.getState(State::Forces);
-
-    // All forces should be identical, exception state3 which should be zero.
-
-    ASSERT_EQUAL_VEC(state1.getForces()[0], state2.getForces()[0], 0.01);
-    ASSERT_EQUAL_VEC(state1.getForces()[1], state2.getForces()[1], 0.01);
-    ASSERT_EQUAL_VEC(state1.getForces()[0], state4.getForces()[0], 0.01);
-    ASSERT_EQUAL_VEC(state1.getForces()[1], state4.getForces()[1], 0.01);
-    ASSERT_EQUAL_VEC(state3.getForces()[0], Vec3(0, 0, 0), 0.01);
-    ASSERT_EQUAL_VEC(state3.getForces()[1], Vec3(0, 0, 0), 0.01);
-}
-
-void testForce(int numParticles, NonbondedForce::NonbondedMethod method, GBSAOBCForce::NonbondedMethod method2) {
-    CpuPlatform platform;
-    ReferencePlatform reference;
-    System system;
-    GBSAOBCForce* gbsa = new GBSAOBCForce();
-    NonbondedForce* nonbonded = new NonbondedForce();
-    for (int i = 0; i < numParticles; ++i) {
-        system.addParticle(1.0);
-        double charge = i%2 == 0 ? -1 : 1;
-        gbsa->addParticle(charge, 0.15, 1);
-        nonbonded->addParticle(charge, 1, 0);
-    }
-    nonbonded->setNonbondedMethod(method);
-    gbsa->setNonbondedMethod(method2);
-    nonbonded->setCutoffDistance(3.0);
-    gbsa->setCutoffDistance(3.0);
-    int grid = (int) floor(0.5+pow(numParticles, 1.0/3.0));
-    if (method == NonbondedForce::CutoffPeriodic) {
-        double boxSize = (grid+1)*1.1;
-        system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
-    }
-    system.addForce(gbsa);
-    system.addForce(nonbonded);
-    LangevinIntegrator integrator1(0, 0.1, 0.01);
-    LangevinIntegrator integrator2(0, 0.1, 0.01);
-    Context context(system, integrator1, platform);
-    Context refContext(system, integrator2, reference);
-
-    // Set random (but uniformly distributed) positions for all the particles.
-
-    vector<Vec3> positions(numParticles);
-    OpenMM_SFMT::SFMT sfmt;
-    init_gen_rand(0, sfmt);
-
-    for (int i = 0; i < grid; i++)
-        for (int j = 0; j < grid; j++)
-            for (int k = 0; k < grid; k++)
-                positions[i*grid*grid+j*grid+k] = Vec3(i*1.1, j*1.1, k*1.1);
-    for (int i = 0; i < numParticles; ++i)
-        positions[i] = positions[i] + Vec3(0.5*genrand_real2(sfmt), 0.5*genrand_real2(sfmt), 0.5*genrand_real2(sfmt));
-    context.setPositions(positions);
-    refContext.setPositions(positions);
-    State state = context.getState(State::Forces | State::Energy);
-    State refState = refContext.getState(State::Forces | State::Energy);
-
-    // Make sure the CPU and Reference platforms agree.
-
-    double norm = 0.0;
-    double diff = 0.0;
-    for (int i = 0; i < numParticles; ++i) {
-        Vec3 f = state.getForces()[i];
-        norm += f[0]*f[0] + f[1]*f[1] + f[2]*f[2];
-        Vec3 delta = f-refState.getForces()[i];
-        diff += delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2];
-    }
-    norm = std::sqrt(norm);
-    diff = std::sqrt(diff);
-    ASSERT_EQUAL_TOL(0.0, diff, 0.001*norm);
-    ASSERT_EQUAL_TOL(state.getPotentialEnergy(), refState.getPotentialEnergy(), 1e-3);
-
-    // Take a small step in the direction of the energy gradient and see whether the potential energy changes by the expected amount.
-    // (This doesn't work with cutoffs, since the energy changes discontinuously at the cutoff distance.)
-
-    if (method == NonbondedForce::NoCutoff)
-    {
-        const double delta = 0.3;
-        double step = 0.5*delta/norm;
-        vector<Vec3> positions2(numParticles), positions3(numParticles);
-        for (int i = 0; i < numParticles; ++i) {
-            Vec3 p = positions[i];
-            Vec3 f = state.getForces()[i];
-            positions2[i] = Vec3(p[0]-f[0]*step, p[1]-f[1]*step, p[2]-f[2]*step);
-            positions3[i] = Vec3(p[0]+f[0]*step, p[1]+f[1]*step, p[2]+f[2]*step);
-        }
-        context.setPositions(positions2);
-        State state2 = context.getState(State::Energy);
-        context.setPositions(positions3);
-        State state3 = context.getState(State::Energy);
-        ASSERT_EQUAL_TOL(norm, (state2.getPotentialEnergy()-state3.getPotentialEnergy())/delta, 1e-2)
-    }
-}
-
-int main() {
-    try {
-        if (!CpuPlatform::isProcessorSupported()) {
-            cout << "CPU is not supported.  Exiting." << endl;
-            return 0;
-        }
-        testSingleParticle();
-        testGlobalSettings();
-        testCutoffAndPeriodic();
-        for (int i = 5; i < 11; i++) {
-            testForce(i*i*i, NonbondedForce::NoCutoff, GBSAOBCForce::NoCutoff);
-            testForce(i*i*i, NonbondedForce::CutoffNonPeriodic, GBSAOBCForce::CutoffNonPeriodic);
-            testForce(i*i*i, NonbondedForce::CutoffPeriodic, GBSAOBCForce::CutoffPeriodic);
-        }
-    }
-    catch(const exception& e) {
-        cout << "exception: " << e.what() << endl;
-        return 1;
-    }
-    cout << "Done" << endl;
-    return 0;
+void runPlatformTests() {
 }
--- a/platforms/cpu/tests/TestCpuHarmonicAngleForce.cpp
+++ b/platforms/cpu/tests/TestCpuHarmonicAngleForce.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2008-2015 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "CpuTests.h"
+#include "TestHarmonicAngleForce.h"
+
+void testParallelComputation() {
+    System system;
+    const int numParticles = 200;
+    for (int i = 0; i < numParticles; i++)
+        system.addParticle(1.0);
+    HarmonicAngleForce* force = new HarmonicAngleForce();
+    for (int i = 2; i < numParticles; i++)
+        force->addAngle(i-2, i-1, i, 1.1, i);
+    system.addForce(force);
+    vector<Vec3> positions(numParticles);
+    for (int i = 0; i < numParticles; i++)
+        positions[i] = Vec3(i, i%2, 0);
+    VerletIntegrator integrator1(0.01);
+    ReferencePlatform reference;
+    Context context1(system, integrator1, reference);
+    context1.setPositions(positions);
+    State state1 = context1.getState(State::Forces | State::Energy);
+    VerletIntegrator integrator2(0.01);
+    Context context2(system, integrator2, platform);
+    context2.setPositions(positions);
+    State state2 = context2.getState(State::Forces | State::Energy);
+    ASSERT_EQUAL_TOL(state1.getPotentialEnergy(), state2.getPotentialEnergy(), 1e-5);
+    for (int i = 0; i < numParticles; i++)
+        ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-5);
+}
+
+void runPlatformTests() {
+    testParallelComputation();
+}
--- a/platforms/cpu/tests/TestCpuLangevinIntegrator.cpp
+++ b/platforms/cpu/tests/TestCpuLangevinIntegrator.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2013 Stanford University and the Authors.      *
+ * Portions copyright (c) 2015 Stanford University and the Authors.           *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -29,255 +29,8 @@
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

-/**
- * This tests the reference implementation of LangevinIntegrator.
- */
+#include "CpuTests.h"
+#include "TestLangevinIntegrator.h"

-#include "openmm/internal/AssertionUtilities.h"
-#include "openmm/Context.h"
-#include "CpuPlatform.h"
-#include "openmm/HarmonicBondForce.h"
-#include "openmm/NonbondedForce.h"
-#include "openmm/System.h"
-#include "openmm/LangevinIntegrator.h"
-#include "SimTKOpenMMRealType.h"
-#include "sfmt/SFMT.h"
-#include <iostream>
-#include <vector>
-
-using namespace OpenMM;
-using namespace std;
-
-const double TOL = 1e-5;
-
-void testSingleBond() {
-    CpuPlatform platform;
-    System system;
-    system.addParticle(2.0);
-    system.addParticle(2.0);
-    LangevinIntegrator integrator(0, 0.1, 0.01);
-    HarmonicBondForce* forceField = new HarmonicBondForce();
-    forceField->addBond(0, 1, 1.5, 1);
-    system.addForce(forceField);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(2);
-    positions[0] = Vec3(-1, 0, 0);
-    positions[1] = Vec3(1, 0, 0);
-    context.setPositions(positions);
-    
-    // This is simply a damped harmonic oscillator, so compare it to the analytical solution.
-    
-    double freq = std::sqrt(1-0.05*0.05);
-    for (int i = 0; i < 1000; ++i) {
-        State state = context.getState(State::Positions | State::Velocities);
-        double time = state.getTime();
-        double expectedDist = 1.5+0.5*std::exp(-0.05*time)*std::cos(freq*time);
-        ASSERT_EQUAL_VEC(Vec3(-0.5*expectedDist, 0, 0), state.getPositions()[0], 0.02);
-        ASSERT_EQUAL_VEC(Vec3(0.5*expectedDist, 0, 0), state.getPositions()[1], 0.02);
-        double expectedSpeed = -0.5*std::exp(-0.05*time)*(0.05*std::cos(freq*time)+freq*std::sin(freq*time));
-        ASSERT_EQUAL_VEC(Vec3(-0.5*expectedSpeed, 0, 0), state.getVelocities()[0], 0.02);
-        ASSERT_EQUAL_VEC(Vec3(0.5*expectedSpeed, 0, 0), state.getVelocities()[1], 0.02);
-        integrator.step(1);
-    }
-    
-    // Not set the friction to a tiny value and see if it conserves energy.
-    
-    integrator.setFriction(5e-5);
-    context.setPositions(positions);
-    State state = context.getState(State::Energy);
-    double initialEnergy = state.getKineticEnergy()+state.getPotentialEnergy();
-    for (int i = 0; i < 1000; ++i) {
-        state = context.getState(State::Energy);
-        double energy = state.getKineticEnergy()+state.getPotentialEnergy();
-        ASSERT_EQUAL_TOL(initialEnergy, energy, 0.01);
-        integrator.step(1);
-    }
-}
-
-void testTemperature() {
-    const int numParticles = 8;
-    const double temp = 100.0;
-    CpuPlatform platform;
-    System system;
-    LangevinIntegrator integrator(temp, 2.0, 0.01);
-    NonbondedForce* forceField = new NonbondedForce();
-    for (int i = 0; i < numParticles; ++i) {
-        system.addParticle(2.0);
-        forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
-    }
-    system.addForce(forceField);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(numParticles);
-    for (int i = 0; i < numParticles; ++i)
-        positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
-    context.setPositions(positions);
-    
-    // Let it equilibrate.
-    
-    integrator.step(10000);
-    
-    // Now run it for a while and see if the temperature is correct.
-    
-    double ke = 0.0;
-    for (int i = 0; i < 10000; ++i) {
-        State state = context.getState(State::Energy);
-        ke += state.getKineticEnergy();
-        integrator.step(1);
-    }
-    ke /= 10000;
-    double expected = 0.5*numParticles*3*BOLTZ*temp;
-    ASSERT_USUALLY_EQUAL_TOL(expected, ke, 6/std::sqrt(10000.0));
-}
-
-void testConstraints() {
-    const int numParticles = 8;
-    const double temp = 100.0;
-    CpuPlatform platform;
-    System system;
-    LangevinIntegrator integrator(temp, 2.0, 0.01);
-    integrator.setConstraintTolerance(1e-5);
-    NonbondedForce* forceField = new NonbondedForce();
-    for (int i = 0; i < numParticles; ++i) {
-        system.addParticle(10.0);
-        forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
-    }
-    for (int i = 0; i < numParticles-1; ++i)
-        system.addConstraint(i, i+1, 1.0);
-    system.addForce(forceField);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(numParticles);
-    vector<Vec3> velocities(numParticles);
-    OpenMM_SFMT::SFMT sfmt;
-    init_gen_rand(0, sfmt);
-
-    for (int i = 0; i < numParticles; ++i) {
-        positions[i] = Vec3(i/2, (i+1)/2, 0);
-        velocities[i] = Vec3(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5);
-    }
-    context.setPositions(positions);
-    context.setVelocities(velocities);
-    
-    // Simulate it and see whether the constraints remain satisfied.
-    
-    for (int i = 0; i < 1000; ++i) {
-        State state = context.getState(State::Positions);
-        for (int j = 0; j < numParticles-1; ++j) {
-            Vec3 p1 = state.getPositions()[j];
-            Vec3 p2 = state.getPositions()[j+1];
-            double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
-            ASSERT_EQUAL_TOL(1.0, dist, 2e-5);
-        }
-        integrator.step(1);
-    }
-}
-
-void testConstrainedMasslessParticles() {
-    CpuPlatform platform;
-    System system;
-    system.addParticle(0.0);
-    system.addParticle(1.0);
-    system.addConstraint(0, 1, 1.5);
-    vector<Vec3> positions(2);
-    positions[0] = Vec3(-1, 0, 0);
-    positions[1] = Vec3(1, 0, 0);
-    LangevinIntegrator integrator(300.0, 2.0, 0.01);
-    bool failed = false;
-    try {
-        // This should throw an exception.
-        
-        Context context(system, integrator, platform);
-    }
-    catch (exception& ex) {
-        failed = true;
-    }
-    ASSERT(failed);
-    
-    // Now make both particles massless, which should work.
-    
-    system.setParticleMass(1, 0.0);
-    Context context(system, integrator, platform);
-    context.setPositions(positions);
-    context.setVelocitiesToTemperature(300.0);
-    integrator.step(1);
-    State state = context.getState(State::Velocities | State::Positions);
-    ASSERT_EQUAL(0.0, state.getVelocities()[0][0]);
-}
-
-void testRandomSeed() {
-    const int numParticles = 8;
-    const double temp = 100.0;
-    const double collisionFreq = 10.0;
-    CpuPlatform platform;
-    System system;
-    LangevinIntegrator integrator(temp, 2.0, 0.01);
-    NonbondedForce* forceField = new NonbondedForce();
-    for (int i = 0; i < numParticles; ++i) {
-        system.addParticle(2.0);
-        forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
-    }
-    system.addForce(forceField);
-    vector<Vec3> positions(numParticles);
-    vector<Vec3> velocities(numParticles);
-    for (int i = 0; i < numParticles; ++i) {
-        positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
-        velocities[i] = Vec3(0, 0, 0);
-    }
-
-    // Try twice with the same random seed.
-
-    integrator.setRandomNumberSeed(5);
-    Context context(system, integrator, platform);
-    context.setPositions(positions);
-    context.setVelocities(velocities);
-    integrator.step(10);
-    State state1 = context.getState(State::Positions);
-    context.reinitialize();
-    context.setPositions(positions);
-    context.setVelocities(velocities);
-    integrator.step(10);
-    State state2 = context.getState(State::Positions);
-
-    // Try twice with a different random seed.
-
-    integrator.setRandomNumberSeed(10);
-    context.reinitialize();
-    context.setPositions(positions);
-    context.setVelocities(velocities);
-    integrator.step(10);
-    State state3 = context.getState(State::Positions);
-    context.reinitialize();
-    context.setPositions(positions);
-    context.setVelocities(velocities);
-    integrator.step(10);
-    State state4 = context.getState(State::Positions);
-
-    // Compare the results.
-
-    for (int i = 0; i < numParticles; i++) {
-        for (int j = 0; j < 3; j++) {
-            ASSERT_EQUAL_TOL(state1.getPositions()[i][j], state2.getPositions()[i][j], 1e-5);
-            ASSERT_EQUAL_TOL(state3.getPositions()[i][j], state4.getPositions()[i][j], 1e-5);
-            ASSERT(state1.getPositions()[i][j] != state3.getPositions()[i][j]);
-        }
-    }
-}
-
-int main() {
-    try {
-        if (!CpuPlatform::isProcessorSupported()) {
-            cout << "CPU is not supported.  Exiting." << endl;
-            return 0;
-        }
-        testSingleBond();
-        testTemperature();
-        testConstraints();
-        testConstrainedMasslessParticles();
-        testRandomSeed();
-    }
-    catch(const exception& e) {
-        cout << "exception: " << e.what() << endl;
-        return 1;
-    }
-    cout << "Done" << endl;
-    return 0;
+void runPlatformTests() {
 }
--- a/platforms/cpu/tests/TestCpuNonbondedForce.cpp
+++ b/platforms/cpu/tests/TestCpuNonbondedForce.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2015 Stanford University and the Authors.      *
+ * Portions copyright (c) 2015 Stanford University and the Authors.           *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -29,684 +29,8 @@
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

-/**
- * This tests all the different force terms in the CUDA implementation of NonbondedForce.
- */
+#include "CpuTests.h"
+#include "TestNonbondedForce.h"

-#include "openmm/internal/AssertionUtilities.h"
-#include "openmm/Context.h"
-#include "CpuPlatform.h"
-#include "ReferencePlatform.h"
-#include "openmm/HarmonicBondForce.h"
-#include "openmm/NonbondedForce.h"
-#include "openmm/System.h"
-#include "openmm/LangevinIntegrator.h"
-#include "openmm/VerletIntegrator.h"
-#include "openmm/internal/ContextImpl.h"
-#include "SimTKOpenMMRealType.h"
-#include "sfmt/SFMT.h"
-#include <iostream>
-#include <vector>
-
-using namespace OpenMM;
-using namespace std;
-
-CpuPlatform platform;
-
-const double TOL = 1e-5;
-
-void testCoulomb() {
-    System system;
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    VerletIntegrator integrator(0.01);
-    NonbondedForce* forceField = new NonbondedForce();
-    forceField->addParticle(0.5, 1, 0);
-    forceField->addParticle(-1.5, 1, 0);
-    system.addForce(forceField);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(2);
-    positions[0] = Vec3(0, 0, 0);
-    positions[1] = Vec3(2, 0, 0);
-    context.setPositions(positions);
-    State state = context.getState(State::Forces | State::Energy);
-    const vector<Vec3>& forces = state.getForces();
-    double force = ONE_4PI_EPS0*(-0.75)/4.0;
-    ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
-    ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], TOL);
-    ASSERT_EQUAL_TOL(ONE_4PI_EPS0*(-0.75)/2.0, state.getPotentialEnergy(), TOL);
-}
-
-void testLJ() {
-    System system;
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    VerletIntegrator integrator(0.01);
-    NonbondedForce* forceField = new NonbondedForce();
-    forceField->addParticle(0, 1.2, 1);
-    forceField->addParticle(0, 1.4, 2);
-    system.addForce(forceField);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(2);
-    positions[0] = Vec3(0, 0, 0);
-    positions[1] = Vec3(2, 0, 0);
-    context.setPositions(positions);
-    State state = context.getState(State::Forces | State::Energy);
-    const vector<Vec3>& forces = state.getForces();
-    double x = 1.3/2.0;
-    double eps = SQRT_TWO;
-    double force = 4.0*eps*(12*std::pow(x, 12.0)-6*std::pow(x, 6.0))/2.0;
-    ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
-    ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], TOL);
-    ASSERT_EQUAL_TOL(4.0*eps*(std::pow(x, 12.0)-std::pow(x, 6.0)), state.getPotentialEnergy(), TOL);
-}
-
-void testExclusionsAnd14() {
-    System system;
-    NonbondedForce* nonbonded = new NonbondedForce();
-    for (int i = 0; i < 5; ++i) {
-        system.addParticle(1.0);
-        nonbonded->addParticle(0, 1.5, 0);
-    }
-    vector<pair<int, int> > bonds;
-    bonds.push_back(pair<int, int>(0, 1));
-    bonds.push_back(pair<int, int>(1, 2));
-    bonds.push_back(pair<int, int>(2, 3));
-    bonds.push_back(pair<int, int>(3, 4));
-    nonbonded->createExceptionsFromBonds(bonds, 0.0, 0.0);
-    int first14, second14;
-    for (int i = 0; i < nonbonded->getNumExceptions(); i++) {
-        int particle1, particle2;
-        double chargeProd, sigma, epsilon;
-        nonbonded->getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
-        if ((particle1 == 0 && particle2 == 3) || (particle1 == 3 && particle2 == 0))
-            first14 = i;
-        if ((particle1 == 1 && particle2 == 4) || (particle1 == 4 && particle2 == 1))
-            second14 = i;
-    }
-    system.addForce(nonbonded);
-    VerletIntegrator integrator(0.01);
-    Context context(system, integrator, platform);
-    for (int i = 1; i < 5; ++i) {
-
-        // Test LJ forces
-
-        vector<Vec3> positions(5);
-        const double r = 1.0;
-        for (int j = 0; j < 5; ++j) {
-            nonbonded->setParticleParameters(j, 0, 1.5, 0);
-            positions[j] = Vec3(0, j, 0);
-        }
-        nonbonded->setParticleParameters(0, 0, 1.5, 1);
-        nonbonded->setParticleParameters(i, 0, 1.5, 1);
-        nonbonded->setExceptionParameters(first14, 0, 3, 0, 1.5, i == 3 ? 0.5 : 0.0);
-        nonbonded->setExceptionParameters(second14, 1, 4, 0, 1.5, 0.0);
-        positions[i] = Vec3(r, 0, 0);
-        context.reinitialize();
-        context.setPositions(positions);
-        State state = context.getState(State::Forces | State::Energy);
-        const vector<Vec3>& forces = state.getForces();
-        double x = 1.5/r;
-        double eps = 1.0;
-        double force = 4.0*eps*(12*std::pow(x, 12.0)-6*std::pow(x, 6.0))/r;
-        double energy = 4.0*eps*(std::pow(x, 12.0)-std::pow(x, 6.0));
-        if (i == 3) {
-            force *= 0.5;
-            energy *= 0.5;
-        }
-        if (i < 3) {
-            force = 0;
-            energy = 0;
-        }
-        ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
-        ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[i], TOL);
-        ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);
-
-        // Test Coulomb forces
-
-        nonbonded->setParticleParameters(0, 2, 1.5, 0);
-        nonbonded->setParticleParameters(i, 2, 1.5, 0);
-        nonbonded->setExceptionParameters(first14, 0, 3, i == 3 ? 4/1.2 : 0, 1.5, 0);
-        nonbonded->setExceptionParameters(second14, 1, 4, 0, 1.5, 0);
-        context.reinitialize();
-        context.setPositions(positions);
-        state = context.getState(State::Forces | State::Energy);
-        const vector<Vec3>& forces2 = state.getForces();
-        force = ONE_4PI_EPS0*4/(r*r);
-        energy = ONE_4PI_EPS0*4/r;
-        if (i == 3) {
-            force /= 1.2;
-            energy /= 1.2;
-        }
-        if (i < 3) {
-            force = 0;
-            energy = 0;
-        }
-        ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces2[0], TOL);
-        ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces2[i], TOL);
-        ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);
-    }
-}
-
-void testCutoff() {
-    System system;
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    VerletIntegrator integrator(0.01);
-    NonbondedForce* forceField = new NonbondedForce();
-    forceField->addParticle(1.0, 1, 0);
-    forceField->addParticle(1.0, 1, 0);
-    forceField->addParticle(1.0, 1, 0);
-    forceField->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
-    const double cutoff = 2.9;
-    forceField->setCutoffDistance(cutoff);
-    const double eps = 50.0;
-    forceField->setReactionFieldDielectric(eps);
-    system.addForce(forceField);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(3);
-    positions[0] = Vec3(0, 0, 0);
-    positions[1] = Vec3(0, 2, 0);
-    positions[2] = Vec3(0, 3, 0);
-    context.setPositions(positions);
-    State state = context.getState(State::Forces | State::Energy);
-    const vector<Vec3>& forces = state.getForces();
-    const double krf = (1.0/(cutoff*cutoff*cutoff))*(eps-1.0)/(2.0*eps+1.0);
-    const double crf = (1.0/cutoff)*(3.0*eps)/(2.0*eps+1.0);
-    const double force1 = ONE_4PI_EPS0*(1.0)*(0.25-2.0*krf*2.0);
-    const double force2 = ONE_4PI_EPS0*(1.0)*(1.0-2.0*krf*1.0);
-    ASSERT_EQUAL_VEC(Vec3(0, -force1, 0), forces[0], TOL);
-    ASSERT_EQUAL_VEC(Vec3(0, force1-force2, 0), forces[1], TOL);
-    ASSERT_EQUAL_VEC(Vec3(0, force2, 0), forces[2], TOL);
-    const double energy1 = ONE_4PI_EPS0*(1.0)*(0.5+krf*4.0-crf);
-    const double energy2 = ONE_4PI_EPS0*(1.0)*(1.0+krf*1.0-crf);
-    ASSERT_EQUAL_TOL(energy1+energy2, state.getPotentialEnergy(), TOL);
-}
-
-void testCutoff14() {
-    System system;
-    VerletIntegrator integrator(0.01);
-    NonbondedForce* nonbonded = new NonbondedForce();
-    nonbonded->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
-    for (int i = 0; i < 5; ++i) {
-        system.addParticle(1.0);
-        nonbonded->addParticle(0, 1.5, 0);
-    }
-    const double cutoff = 3.5;
-    nonbonded->setCutoffDistance(cutoff);
-    const double eps = 30.0;
-    nonbonded->setReactionFieldDielectric(eps);
-    vector<pair<int, int> > bonds;
-    bonds.push_back(pair<int, int>(0, 1));
-    bonds.push_back(pair<int, int>(1, 2));
-    bonds.push_back(pair<int, int>(2, 3));
-    bonds.push_back(pair<int, int>(3, 4));
-    nonbonded->createExceptionsFromBonds(bonds, 0.0, 0.0);
-    int first14, second14;
-    for (int i = 0; i < nonbonded->getNumExceptions(); i++) {
-        int particle1, particle2;
-        double chargeProd, sigma, epsilon;
-        nonbonded->getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
-        if ((particle1 == 0 && particle2 == 3) || (particle1 == 3 && particle2 == 0))
-            first14 = i;
-        if ((particle1 == 1 && particle2 == 4) || (particle1 == 4 && particle2 == 1))
-            second14 = i;
-    }
-    system.addForce(nonbonded);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(5);
-    positions[0] = Vec3(0, 0, 0);
-    positions[1] = Vec3(1, 0, 0);
-    positions[2] = Vec3(2, 0, 0);
-    positions[3] = Vec3(3, 0, 0);
-    positions[4] = Vec3(4, 0, 0);
-    for (int i = 1; i < 5; ++i) {
-
-        // Test LJ forces
-
-        nonbonded->setParticleParameters(0, 0, 1.5, 1);
-        for (int j = 1; j < 5; ++j)
-            nonbonded->setParticleParameters(j, 0, 1.5, 0);
-        nonbonded->setParticleParameters(i, 0, 1.5, 1);
-        nonbonded->setExceptionParameters(first14, 0, 3, 0, 1.5, i == 3 ? 0.5 : 0.0);
-        nonbonded->setExceptionParameters(second14, 1, 4, 0, 1.5, 0.0);
-        context.reinitialize();
-        context.setPositions(positions);
-        State state = context.getState(State::Forces | State::Energy);
-        const vector<Vec3>& forces = state.getForces();
-        double r = positions[i][0];
-        double x = 1.5/r;
-        double e = 1.0;
-        double force = 4.0*e*(12*std::pow(x, 12.0)-6*std::pow(x, 6.0))/r;
-        double energy = 4.0*e*(std::pow(x, 12.0)-std::pow(x, 6.0));
-        if (i == 3) {
-            force *= 0.5;
-            energy *= 0.5;
-        }
-        if (i < 3 || r > cutoff) {
-            force = 0;
-            energy = 0;
-        }
-        ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], TOL);
-        ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[i], TOL);
-        ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);
-
-        // Test Coulomb forces
-
-        const double q = 0.7;
-        nonbonded->setParticleParameters(0, q, 1.5, 0);
-        nonbonded->setParticleParameters(i, q, 1.5, 0);
-        nonbonded->setExceptionParameters(first14, 0, 3, i == 3 ? q*q/1.2 : 0, 1.5, 0);
-        nonbonded->setExceptionParameters(second14, 1, 4, 0, 1.5, 0);
-        context.reinitialize();
-        context.setPositions(positions);
-        state = context.getState(State::Forces | State::Energy);
-        const vector<Vec3>& forces2 = state.getForces();
-        force = ONE_4PI_EPS0*q*q/(r*r);
-        energy = ONE_4PI_EPS0*q*q/r;
-        if (i == 3) {
-            force /= 1.2;
-            energy /= 1.2;
-        }
-        if (i < 3 || r > cutoff) {
-            force = 0;
-            energy = 0;
-        }
-        ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces2[0], TOL);
-        ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces2[i], TOL);
-        ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);
-    }
-}
-
-void testPeriodic() {
-    System system;
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    VerletIntegrator integrator(0.01);
-    NonbondedForce* nonbonded = new NonbondedForce();
-    nonbonded->addParticle(1.0, 1, 0);
-    nonbonded->addParticle(1.0, 1, 0);
-    nonbonded->addParticle(1.0, 1, 0);
-    nonbonded->addException(0, 1, 0.0, 1.0, 0.0);
-    nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
-    const double cutoff = 2.0;
-    nonbonded->setCutoffDistance(cutoff);
-    system.setDefaultPeriodicBoxVectors(Vec3(4, 0, 0), Vec3(0, 4, 0), Vec3(0, 0, 4));
-    system.addForce(nonbonded);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(3);
-    positions[0] = Vec3(0, 0, 0);
-    positions[1] = Vec3(2, 0, 0);
-    positions[2] = Vec3(3, 0, 0);
-    context.setPositions(positions);
-    State state = context.getState(State::Forces | State::Energy);
-    const vector<Vec3>& forces = state.getForces();
-    const double eps = 78.3;
-    const double krf = (1.0/(cutoff*cutoff*cutoff))*(eps-1.0)/(2.0*eps+1.0);
-    const double crf = (1.0/cutoff)*(3.0*eps)/(2.0*eps+1.0);
-    const double force = ONE_4PI_EPS0*(1.0)*(1.0-2.0*krf*1.0);
-    ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[0], TOL);
-    ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[1], TOL);
-    ASSERT_EQUAL_VEC(Vec3(0, 0, 0), forces[2], TOL);
-    ASSERT_EQUAL_TOL(2*ONE_4PI_EPS0*(1.0)*(1.0+krf*1.0-crf), state.getPotentialEnergy(), TOL);
-}
-
-void testTriclinic() {
-    System system;
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    Vec3 a(3.1, 0, 0);
-    Vec3 b(0.4, 3.5, 0);
-    Vec3 c(-0.1, -0.5, 4.0);
-    system.setDefaultPeriodicBoxVectors(a, b, c);
-    VerletIntegrator integrator(0.01);
-    NonbondedForce* nonbonded = new NonbondedForce();
-    nonbonded->addParticle(1.0, 1, 0);
-    nonbonded->addParticle(1.0, 1, 0);
-    nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
-    const double cutoff = 1.5;
-    nonbonded->setCutoffDistance(cutoff);
-    system.addForce(nonbonded);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(2);
-    OpenMM_SFMT::SFMT sfmt;
-    init_gen_rand(0, sfmt);
-    const double eps = 78.3;
-    const double krf = (1.0/(cutoff*cutoff*cutoff))*(eps-1.0)/(2.0*eps+1.0);
-    const double crf = (1.0/cutoff)*(3.0*eps)/(2.0*eps+1.0);
-    for (int iteration = 0; iteration < 50; iteration++) {
-        // Generate random positions for the two particles.
-
-        positions[0] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
-        positions[1] = a*genrand_real2(sfmt) + b*genrand_real2(sfmt) + c*genrand_real2(sfmt);
-        context.setPositions(positions);
-
-        // Loop over all possible periodic copies and find the nearest one.
-
-        Vec3 delta;
-        double distance2 = 100.0;
-        for (int i = -1; i < 2; i++)
-            for (int j = -1; j < 2; j++)
-                for (int k = -1; k < 2; k++) {
-                    Vec3 d = positions[1]-positions[0]+a*i+b*j+c*k;
-                    if (d.dot(d) < distance2) {
-                        delta = d;
-                        distance2 = d.dot(d);
-                    }
-                }
-        double distance = sqrt(distance2);
-
-        // See if the force and energy are correct.
-
-        State state = context.getState(State::Forces | State::Energy);
-        if (distance >= cutoff) {
-            ASSERT_EQUAL(0.0, state.getPotentialEnergy());
-            ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[0], 0);
-            ASSERT_EQUAL_VEC(Vec3(0, 0, 0), state.getForces()[1], 0);
-        }
-        else {
-            const Vec3 force = delta*ONE_4PI_EPS0*(-1.0/(distance*distance*distance)+2.0*krf);
-            ASSERT_EQUAL_TOL(ONE_4PI_EPS0*(1.0/distance+krf*distance*distance-crf), state.getPotentialEnergy(), 1e-4);
-            ASSERT_EQUAL_VEC(force, state.getForces()[0], 2e-5);
-            ASSERT_EQUAL_VEC(-force, state.getForces()[1], 2e-5);
-        }
-    }
-}
-
-void testLargeSystem() {
-    const int numMolecules = 600;
-    const int numParticles = numMolecules*2;
-    const double cutoff = 2.0;
-    const double boxSize = 20.0;
-    const double tol = 2e-3;
-    ReferencePlatform reference;
-    System system;
-    for (int i = 0; i < numParticles; i++)
-        system.addParticle(1.0);
-    NonbondedForce* nonbonded = new NonbondedForce();
-    HarmonicBondForce* bonds = new HarmonicBondForce();
-    vector<Vec3> positions(numParticles);
-    vector<Vec3> velocities(numParticles);
-    OpenMM_SFMT::SFMT sfmt;
-    init_gen_rand(0, sfmt);
-
-    for (int i = 0; i < numMolecules; i++) {
-        if (i < numMolecules/2) {
-            nonbonded->addParticle(-1.0, 0.2, 0.1);
-            nonbonded->addParticle(1.0, 0.1, 0.1);
-        }
-        else {
-            nonbonded->addParticle(-1.0, 0.2, 0.2);
-            nonbonded->addParticle(1.0, 0.1, 0.2);
-        }
-        positions[2*i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
-        positions[2*i+1] = Vec3(positions[2*i][0]+1.0, positions[2*i][1], positions[2*i][2]);
-        velocities[2*i] = Vec3(genrand_real2(sfmt), genrand_real2(sfmt), genrand_real2(sfmt));
-        velocities[2*i+1] = Vec3(genrand_real2(sfmt), genrand_real2(sfmt), genrand_real2(sfmt));
-        bonds->addBond(2*i, 2*i+1, 1.0, 0.1);
-        nonbonded->addException(2*i, 2*i+1, 0.0, 0.15, 0.0);
-    }
-
-    // Try with cutoffs but not periodic boundary conditions, and make sure the cl and Reference
-    // platforms agree.
-
-    nonbonded->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
-    nonbonded->setCutoffDistance(cutoff);
-    system.addForce(nonbonded);
-    system.addForce(bonds);
-    VerletIntegrator integrator1(0.01);
-    VerletIntegrator integrator2(0.01);
-    Context cpuContext(system, integrator1, platform);
-    Context referenceContext(system, integrator2, reference);
-    cpuContext.setPositions(positions);
-    cpuContext.setVelocities(velocities);
-    referenceContext.setPositions(positions);
-    referenceContext.setVelocities(velocities);
-    State cpuState = cpuContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
-    State referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
-    for (int i = 0; i < numParticles; i++) {
-        ASSERT_EQUAL_VEC(cpuState.getPositions()[i], referenceState.getPositions()[i], tol);
-        ASSERT_EQUAL_VEC(cpuState.getVelocities()[i], referenceState.getVelocities()[i], tol);
-        ASSERT_EQUAL_VEC(cpuState.getForces()[i], referenceState.getForces()[i], tol);
-    }
-    ASSERT_EQUAL_TOL(cpuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
-
-    // Now do the same thing with periodic boundary conditions.
-
-    nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
-    system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
-    cpuContext.reinitialize();
-    referenceContext.reinitialize();
-    cpuContext.setPositions(positions);
-    cpuContext.setVelocities(velocities);
-    referenceContext.setPositions(positions);
-    referenceContext.setVelocities(velocities);
-    cpuState = cpuContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
-    referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
-    for (int i = 0; i < numParticles; i++) {
-        double dx = cpuState.getPositions()[i][0]-referenceState.getPositions()[i][0];
-        double dy = cpuState.getPositions()[i][1]-referenceState.getPositions()[i][1];
-        double dz = cpuState.getPositions()[i][2]-referenceState.getPositions()[i][2];
-        ASSERT_EQUAL_TOL(fmod(cpuState.getPositions()[i][0]-referenceState.getPositions()[i][0], boxSize), 0, tol);
-        ASSERT_EQUAL_TOL(fmod(cpuState.getPositions()[i][1]-referenceState.getPositions()[i][1], boxSize), 0, tol);
-        ASSERT_EQUAL_TOL(fmod(cpuState.getPositions()[i][2]-referenceState.getPositions()[i][2], boxSize), 0, tol);
-        ASSERT_EQUAL_VEC(cpuState.getVelocities()[i], referenceState.getVelocities()[i], tol);
-        ASSERT_EQUAL_VEC(cpuState.getForces()[i], referenceState.getForces()[i], tol);
-    }
-    ASSERT_EQUAL_TOL(cpuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
-}
-
-void testDispersionCorrection() {
-    // Create a box full of identical particles.
-
-    int gridSize = 5;
-    int numParticles = gridSize*gridSize*gridSize;
-    double boxSize = gridSize*0.7;
-    double cutoff = boxSize/3;
-    System system;
-    VerletIntegrator integrator(0.01);
-    NonbondedForce* nonbonded = new NonbondedForce();
-    vector<Vec3> positions(numParticles);
-    int index = 0;
-    for (int i = 0; i < gridSize; i++)
-        for (int j = 0; j < gridSize; j++)
-            for (int k = 0; k < gridSize; k++) {
-                system.addParticle(1.0);
-                nonbonded->addParticle(0, 1.1, 0.5);
-                positions[index] = Vec3(i*boxSize/gridSize, j*boxSize/gridSize, k*boxSize/gridSize);
-                index++;
-            }
-    nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
-    nonbonded->setCutoffDistance(cutoff);
-    system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
-    system.addForce(nonbonded);
-
-    // See if the correction has the correct value.
-
-    Context context(system, integrator, platform);
-    context.setPositions(positions);
-    double energy1 = context.getState(State::Energy).getPotentialEnergy();
-    nonbonded->setUseDispersionCorrection(false);
-    context.reinitialize();
-    context.setPositions(positions);
-    double energy2 = context.getState(State::Energy).getPotentialEnergy();
-    double term1 = (0.5*pow(1.1, 12)/pow(cutoff, 9))/9;
-    double term2 = (0.5*pow(1.1, 6)/pow(cutoff, 3))/3;
-    double expected = 8*M_PI*numParticles*numParticles*(term1-term2)/(boxSize*boxSize*boxSize);
-    ASSERT_EQUAL_TOL(expected, energy1-energy2, 1e-4);
-
-    // Now modify half the particles to be different, and see if it is still correct.
-
-    int numType2 = 0;
-    for (int i = 0; i < numParticles; i += 2) {
-        nonbonded->setParticleParameters(i, 0, 1, 1);
-        numType2++;
-    }
-    int numType1 = numParticles-numType2;
-    nonbonded->updateParametersInContext(context);
-    energy2 = context.getState(State::Energy).getPotentialEnergy();
-    nonbonded->setUseDispersionCorrection(true);
-    context.reinitialize();
-    context.setPositions(positions);
-    energy1 = context.getState(State::Energy).getPotentialEnergy();
-    term1 = ((numType1*(numType1+1))/2)*(0.5*pow(1.1, 12)/pow(cutoff, 9))/9;
-    term2 = ((numType1*(numType1+1))/2)*(0.5*pow(1.1, 6)/pow(cutoff, 3))/3;
-    term1 += ((numType2*(numType2+1))/2)*(1*pow(1.0, 12)/pow(cutoff, 9))/9;
-    term2 += ((numType2*(numType2+1))/2)*(1*pow(1.0, 6)/pow(cutoff, 3))/3;
-    double combinedSigma = 0.5*(1+1.1);
-    double combinedEpsilon = sqrt(1*0.5);
-    term1 += (numType1*numType2)*(combinedEpsilon*pow(combinedSigma, 12)/pow(cutoff, 9))/9;
-    term2 += (numType1*numType2)*(combinedEpsilon*pow(combinedSigma, 6)/pow(cutoff, 3))/3;
-    term1 /= (numParticles*(numParticles+1))/2;
-    term2 /= (numParticles*(numParticles+1))/2;
-    expected = 8*M_PI*numParticles*numParticles*(term1-term2)/(boxSize*boxSize*boxSize);
-    ASSERT_EQUAL_TOL(expected, energy1-energy2, 1e-4);
-}
-
-void testChangingParameters() {
-    const int numMolecules = 600;
-    const int numParticles = numMolecules*2;
-    const double cutoff = 2.0;
-    const double boxSize = 20.0;
-    const double tol = 2e-3;
-    ReferencePlatform reference;
-    System system;
-    for (int i = 0; i < numParticles; i++)
-        system.addParticle(1.0);
-    NonbondedForce* nonbonded = new NonbondedForce();
-    vector<Vec3> positions(numParticles);
-    OpenMM_SFMT::SFMT sfmt;
-    init_gen_rand(0, sfmt);
-
-    for (int i = 0; i < numMolecules; i++) {
-        if (i < numMolecules/2) {
-            nonbonded->addParticle(-1.0, 0.2, 0.1);
-            nonbonded->addParticle(1.0, 0.1, 0.1);
-        }
-        else {
-            nonbonded->addParticle(-1.0, 0.2, 0.2);
-            nonbonded->addParticle(1.0, 0.1, 0.2);
-        }
-        positions[2*i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
-        positions[2*i+1] = Vec3(positions[2*i][0]+1.0, positions[2*i][1], positions[2*i][2]);
-        system.addConstraint(2*i, 2*i+1, 1.0);
-        nonbonded->addException(2*i, 2*i+1, 0.0, 0.15, 0.0);
-    }
-    nonbonded->setNonbondedMethod(NonbondedForce::PME);
-    nonbonded->setCutoffDistance(cutoff);
-    system.addForce(nonbonded);
-    system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
-    
-    // See if Reference and CPU give the same forces and energies.
-    
-    VerletIntegrator integrator1(0.01);
-    VerletIntegrator integrator2(0.01);
-    Context cpuContext(system, integrator1, platform);
-    Context referenceContext(system, integrator2, reference);
-    cpuContext.setPositions(positions);
-    referenceContext.setPositions(positions);
-    State cpuState = cpuContext.getState(State::Forces | State::Energy);
-    State referenceState = referenceContext.getState(State::Forces | State::Energy);
-    for (int i = 0; i < numParticles; i++)
-        ASSERT_EQUAL_VEC(cpuState.getForces()[i], referenceState.getForces()[i], tol);
-    ASSERT_EQUAL_TOL(cpuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
-    
-    // Now modify parameters and see if they still agree.
-
-    for (int i = 0; i < numParticles; i += 5) {
-        double charge, sigma, epsilon;
-        nonbonded->getParticleParameters(i, charge, sigma, epsilon);
-        nonbonded->setParticleParameters(i, 1.5*charge, 1.1*sigma, 1.7*epsilon);
-    }
-    nonbonded->updateParametersInContext(cpuContext);
-    nonbonded->updateParametersInContext(referenceContext);
-    cpuState = cpuContext.getState(State::Forces | State::Energy);
-    referenceState = referenceContext.getState(State::Forces | State::Energy);
-    for (int i = 0; i < numParticles; i++)
-        ASSERT_EQUAL_VEC(cpuState.getForces()[i], referenceState.getForces()[i], tol);
-    ASSERT_EQUAL_TOL(cpuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
-}
-
-void testSwitchingFunction(NonbondedForce::NonbondedMethod method) {
-    System system;
-    system.setDefaultPeriodicBoxVectors(Vec3(6, 0, 0), Vec3(0, 6, 0), Vec3(0, 0, 6));
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    VerletIntegrator integrator(0.01);
-    NonbondedForce* nonbonded = new NonbondedForce();
-    nonbonded->addParticle(0, 1.2, 1);
-    nonbonded->addParticle(0, 1.4, 2);
-    nonbonded->setNonbondedMethod(method);
-    nonbonded->setCutoffDistance(2.0);
-    nonbonded->setUseSwitchingFunction(true);
-    nonbonded->setSwitchingDistance(1.5);
-    nonbonded->setUseDispersionCorrection(false);
-    system.addForce(nonbonded);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(2);
-    positions[0] = Vec3(0, 0, 0);
-    double eps = SQRT_TWO;
-    
-    // Compute the interaction at various distances.
-    
-    for (double r = 1.0; r < 2.5; r += 0.1) {
-        positions[1] = Vec3(r, 0, 0);
-        context.setPositions(positions);
-        State state = context.getState(State::Forces | State::Energy);
-        
-        // See if the energy is correct.
-        
-        double x = 1.3/r;
-        double expectedEnergy = 4.0*eps*(std::pow(x, 12.0)-std::pow(x, 6.0));
-        double switchValue;
-        if (r <= 1.5)
-            switchValue = 1;
-        else if (r >= 2.0)
-            switchValue = 0;
-        else {
-            double t = (r-1.5)/0.5;
-            switchValue = 1+t*t*t*(-10+t*(15-t*6));
-        }
-        ASSERT_EQUAL_TOL(switchValue*expectedEnergy, state.getPotentialEnergy(), TOL);
-        
-        // See if the force is the gradient of the energy.
-        
-        double delta = 1e-3;
-        positions[1] = Vec3(r-delta, 0, 0);
-        context.setPositions(positions);
-        double e1 = context.getState(State::Energy).getPotentialEnergy();
-        positions[1] = Vec3(r+delta, 0, 0);
-        context.setPositions(positions);
-        double e2 = context.getState(State::Energy).getPotentialEnergy();
-        ASSERT_EQUAL_TOL((e2-e1)/(2*delta), state.getForces()[0][0], 1e-3);
-    }
-}
-
-int main(int argc, char* argv[]) {
-    try {
-        if (!CpuPlatform::isProcessorSupported()) {
-            cout << "CPU is not supported.  Exiting." << endl;
-            return 0;
-        }
-        testCoulomb();
-        testLJ();
-        testExclusionsAnd14();
-        testCutoff();
-        testCutoff14();
-        testPeriodic();
-        testTriclinic();
-        testLargeSystem();
-        testDispersionCorrection();
-        testChangingParameters();
-        testSwitchingFunction(NonbondedForce::CutoffNonPeriodic);
-        testSwitchingFunction(NonbondedForce::PME);
-    }
-    catch(const exception& e) {
-        cout << "exception: " << e.what() << endl;
-        return 1;
-    }
-    cout << "Done" << endl;
-    return 0;
+void runPlatformTests() {
 }
--- a/platforms/cpu/tests/TestCpuPeriodicTorsionForce.cpp
+++ b/platforms/cpu/tests/TestCpuPeriodicTorsionForce.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2014 Stanford University and the Authors.      *
+ * Portions copyright (c) 2008-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -29,69 +29,8 @@
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

-/**
- * This tests the CPU implementation of PeriodicTorsionForce.
- */
-
-#include "openmm/internal/AssertionUtilities.h"
-#include "openmm/Context.h"
-#include "CpuPlatform.h"
-#include "openmm/PeriodicTorsionForce.h"
-#include "openmm/System.h"
-#include "openmm/VerletIntegrator.h"
-#include "SimTKOpenMMRealType.h"
-#include <iostream>
-#include <vector>
-
-using namespace OpenMM;
-using namespace std;
-
-CpuPlatform platform;
-
-const double TOL = 1e-5;
-
-void testPeriodicTorsions() {
-    System system;
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    VerletIntegrator integrator(0.01);
-    PeriodicTorsionForce* forceField = new PeriodicTorsionForce();
-    forceField->addTorsion(0, 1, 2, 3, 2, PI_M/3, 1.1);
-    system.addForce(forceField);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(4);
-    positions[0] = Vec3(0, 1, 0);
-    positions[1] = Vec3(0, 0, 0);
-    positions[2] = Vec3(1, 0, 0);
-    positions[3] = Vec3(1, 0, 2);
-    context.setPositions(positions);
-    State state = context.getState(State::Forces | State::Energy);
-    {
-        const vector<Vec3>& forces = state.getForces();
-        double torque = -2*1.1*std::sin(2*PI_M/3);
-        ASSERT_EQUAL_VEC(Vec3(0, 0, torque), forces[0], TOL);
-        ASSERT_EQUAL_VEC(Vec3(0, 0.5*torque, 0), forces[3], TOL);
-        ASSERT_EQUAL_VEC(Vec3(forces[0][0]+forces[1][0]+forces[2][0]+forces[3][0], forces[0][1]+forces[1][1]+forces[2][1]+forces[3][1], forces[0][2]+forces[1][2]+forces[2][2]+forces[3][2]), Vec3(0, 0, 0), TOL);
-        ASSERT_EQUAL_TOL(1.1*(1+std::cos(2*PI_M/3)), state.getPotentialEnergy(), TOL);
-    }
-    
-    // Try changing the torsion parameters and make sure it's still correct.
-    
-    forceField->setTorsionParameters(0, 0, 1, 2, 3, 3, PI_M/3.2, 1.3);
-    forceField->updateParametersInContext(context);
-    state = context.getState(State::Forces | State::Energy);
-    {
-        const vector<Vec3>& forces = state.getForces();
-        double dtheta = (3*PI_M/2)-(PI_M/3.2);
-        double torque = -3*1.3*std::sin(dtheta);
-        ASSERT_EQUAL_VEC(Vec3(0, 0, torque), forces[0], TOL);
-        ASSERT_EQUAL_VEC(Vec3(0, 0.5*torque, 0), forces[3], TOL);
-        ASSERT_EQUAL_VEC(Vec3(forces[0][0]+forces[1][0]+forces[2][0]+forces[3][0], forces[0][1]+forces[1][1]+forces[2][1]+forces[3][1], forces[0][2]+forces[1][2]+forces[2][2]+forces[3][2]), Vec3(0, 0, 0), TOL);
-        ASSERT_EQUAL_TOL(1.3*(1+std::cos(dtheta)), state.getPotentialEnergy(), TOL);
-    }
-}
+#include "CpuTests.h"
+#include "TestPeriodicTorsionForce.h"

 void testParallelComputation() {
    System system;
@@ -119,15 +58,6 @@ void testParallelComputation() {
        ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-5);
 }

-int main(int argc, char* argv[]) {
-    try {
-        testPeriodicTorsions();
-        testParallelComputation();
-    }
-    catch(const exception& e) {
-        cout << "exception: " << e.what() << endl;
-        return 1;
-    }
-    cout << "Done" << endl;
-    return 0;
+void runPlatformTests() {
+    testParallelComputation();
 }
--- a/platforms/cpu/tests/TestCpuRBTorsionForce.cpp
+++ b/platforms/cpu/tests/TestCpuRBTorsionForce.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2014 Stanford University and the Authors.      *
+ * Portions copyright (c) 2008-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -29,88 +29,8 @@
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

-/**
- * This tests the CUDA implementation of RBTorsionForce.
- */
-
-#include "openmm/internal/AssertionUtilities.h"
-#include "openmm/Context.h"
-#include "CpuPlatform.h"
-#include "openmm/RBTorsionForce.h"
-#include "openmm/System.h"
-#include "openmm/VerletIntegrator.h"
-#include "SimTKOpenMMRealType.h"
-#include <iostream>
-#include <vector>
-
-using namespace OpenMM;
-using namespace std;
-
-CpuPlatform platform;
-
-const double TOL = 1e-5;
-
-void testRBTorsions() {
-    System system;
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    system.addParticle(1.0);
-    VerletIntegrator integrator(0.01);
-    RBTorsionForce* forceField = new RBTorsionForce();
-    forceField->addTorsion(0, 1, 2, 3, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6);
-    system.addForce(forceField);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(4);
-    positions[0] = Vec3(0, 1, 0);
-    positions[1] = Vec3(0, 0, 0);
-    positions[2] = Vec3(1, 0, 0);
-    positions[3] = Vec3(1, 1, 1);
-    context.setPositions(positions);
-    State state = context.getState(State::Forces | State::Energy);
-    {
-        const vector<Vec3>& forces = state.getForces();
-        double psi = 0.25*PI_M - PI_M;
-        double torque = 0.0;
-        for (int i = 1; i < 6; ++i) {
-            double c = 0.1*(i+1);
-            torque += -c*i*std::pow(std::cos(psi), i-1)*std::sin(psi);
-        }
-        ASSERT_EQUAL_VEC(Vec3(0, 0, torque), forces[0], TOL);
-        ASSERT_EQUAL_VEC(Vec3(0, 0.5*torque, -0.5*torque), forces[3], TOL);
-        ASSERT_EQUAL_VEC(Vec3(forces[0][0]+forces[1][0]+forces[2][0]+forces[3][0], forces[0][1]+forces[1][1]+forces[2][1]+forces[3][1], forces[0][2]+forces[1][2]+forces[2][2]+forces[3][2]), Vec3(0, 0, 0), TOL);
-        double energy = 0.0;
-        for (int i = 0; i < 6; ++i) {
-            double c = 0.1*(i+1);
-            energy += c*std::pow(std::cos(psi), i);
-        }
-        ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);
-    }
-    
-    // Try changing the torsion parameters and make sure it's still correct.
-    
-    forceField->setTorsionParameters(0, 0, 1, 2, 3, 0.11, 0.22, 0.33, 0.44, 0.55, 0.66);
-    forceField->updateParametersInContext(context);
-    state = context.getState(State::Forces | State::Energy);
-    {
-        const vector<Vec3>& forces = state.getForces();
-        double psi = 0.25*PI_M - PI_M;
-        double torque = 0.0;
-        for (int i = 1; i < 6; ++i) {
-            double c = 0.11*(i+1);
-            torque += -c*i*std::pow(std::cos(psi), i-1)*std::sin(psi);
-        }
-        ASSERT_EQUAL_VEC(Vec3(0, 0, torque), forces[0], TOL);
-        ASSERT_EQUAL_VEC(Vec3(0, 0.5*torque, -0.5*torque), forces[3], TOL);
-        ASSERT_EQUAL_VEC(Vec3(forces[0][0]+forces[1][0]+forces[2][0]+forces[3][0], forces[0][1]+forces[1][1]+forces[2][1]+forces[3][1], forces[0][2]+forces[1][2]+forces[2][2]+forces[3][2]), Vec3(0, 0, 0), TOL);
-        double energy = 0.0;
-        for (int i = 0; i < 6; ++i) {
-            double c = 0.11*(i+1);
-            energy += c*std::pow(std::cos(psi), i);
-        }
-        ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), TOL);
-    }
-}
+#include "CpuTests.h"
+#include "TestRBTorsionForce.h"

 void testParallelComputation() {
    System system;
@@ -138,15 +58,6 @@ void testParallelComputation() {
        ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-5);
 }

-int main(int argc, char* argv[]) {
-    try {
-        testRBTorsions();
-        testParallelComputation();
-    }
-    catch(const exception& e) {
-        cout << "exception: " << e.what() << endl;
-        return 1;
-    }
-    cout << "Done" << endl;
-    return 0;
+void runPlatformTests() {
+    testParallelComputation();
 }
--- a/platforms/cpu/tests/TestCpuSettle.cpp
+++ b/platforms/cpu/tests/TestCpuSettle.cpp
-
 /* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
 * -------------------------------------------------------------------------- *
@@ -7,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2013 Stanford University and the Authors.      *
+ * Portions copyright (c) 2015 Stanford University and the Authors.           *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -30,91 +29,8 @@
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

-/**
- * This tests the CPU implementation of the SETTLE algorithm.
- */
-
-#include "openmm/internal/AssertionUtilities.h"
-#include "openmm/Context.h"
-#include "CpuPlatform.h"
-#include "openmm/NonbondedForce.h"
-#include "openmm/System.h"
-#include "openmm/LangevinIntegrator.h"
-#include "sfmt/SFMT.h"
-#include <iostream>
-#include <vector>
-
-using namespace OpenMM;
-using namespace std;
-
-void testConstraints() {
-    const int numMolecules = 10;
-    const int numParticles = numMolecules*3;
-    const int numConstraints = numMolecules*3;
-    const double temp = 100.0;
-    CpuPlatform platform;
-    System system;
-    LangevinIntegrator integrator(temp, 2.0, 0.001);
-    integrator.setConstraintTolerance(1e-5);
-    NonbondedForce* forceField = new NonbondedForce();
-    for (int i = 0; i < numMolecules; ++i) {
-        system.addParticle(16.0);
-        system.addParticle(1.0);
-        system.addParticle(1.0);
-        forceField->addParticle(-0.82, 0.317, 0.65);
-        forceField->addParticle(0.41, 1.0, 0.0);
-        forceField->addParticle(0.41, 1.0, 0.0);
-        system.addConstraint(i*3, i*3+1, 0.1);
-        system.addConstraint(i*3, i*3+2, 0.1);
-        system.addConstraint(i*3+1, i*3+2, 0.163);
-    }
-    system.addForce(forceField);
-    Context context(system, integrator, platform);
-    vector<Vec3> positions(numParticles);
-    vector<Vec3> velocities(numParticles);
-    OpenMM_SFMT::SFMT sfmt;
-    init_gen_rand(0, sfmt);
-
-    for (int i = 0; i < numMolecules; ++i) {
-        positions[i*3] = Vec3((i%4)*0.4, (i/4)*0.4, 0);
-        positions[i*3+1] = positions[i*3]+Vec3(0.1, 0, 0);
-        positions[i*3+2] = positions[i*3]+Vec3(-0.03333, 0.09428, 0);
-        velocities[i*3] = Vec3(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5);
-        velocities[i*3+1] = Vec3(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5);
-        velocities[i*3+2] = Vec3(genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5, genrand_real2(sfmt)-0.5);
-    }
-    context.setPositions(positions);
-    context.setVelocities(velocities);
-
-    // Simulate it and see whether the constraints remain satisfied.
-
-    for (int i = 0; i < 1000; ++i) {
-        integrator.step(1);
-        State state = context.getState(State::Positions | State::Forces);
-        for (int j = 0; j < numConstraints; ++j) {
-            int particle1, particle2;
-            double distance;
-            system.getConstraintParameters(j, particle1, particle2, distance);
-            Vec3 p1 = state.getPositions()[particle1];
-            Vec3 p2 = state.getPositions()[particle2];
-            double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
-            ASSERT_EQUAL_TOL(distance, dist, 1e-5);
-        }
-    }
-}
+#include "CpuTests.h"
+#include "TestSettle.h"

-int main(int argc, char* argv[]) {
-    try {
-        if (!CpuPlatform::isProcessorSupported()) {
-            cout << "CPU is not supported.  Exiting." << endl;
-            return 0;
-        }
-        testConstraints();
-    }
-    catch(const exception& e) {
-        cout << "exception: " << e.what() << endl;
-        return 1;
-    }
-    cout << "Done" << endl;
-    return 0;
+void runPlatformTests() {
 }
--- a/platforms/cuda/CMakeLists.txt
+++ b/platforms/cuda/CMakeLists.txt
@@ -13,9 +13,9 @@
 #----------------------------------------------------

 set(OPENMM_BUILD_CUDA_TESTS TRUE CACHE BOOL "Whether to build CUDA test cases")
-if(OPENMM_BUILD_CUDA_TESTS)
+if(BUILD_TESTING AND OPENMM_BUILD_CUDA_TESTS)
    SUBDIRS (tests)
-endif(OPENMM_BUILD_CUDA_TESTS)
+endif(BUILD_TESTING AND OPENMM_BUILD_CUDA_TESTS)

 # The source is organized into subdirectories, but we handle them all from
 # this CMakeLists file rather than letting CMake visit them as SUBDIRS.

--- a/platforms/cuda/include/CudaContext.h
+++ b/platforms/cuda/include/CudaContext.h
@@ -30,6 +30,7 @@
 #include <map>
 #include <queue>
 #include <string>
+#include <utility>
 #define __CL_ENABLE_EXCEPTIONS
 #ifdef _MSC_VER
    // Prevent Windows from defining macros that interfere with other code.
@@ -538,6 +539,11 @@ public:
     */
    void invalidateMolecules();
 private:
+    /**
+     * Compute a sorted list of device indices in decreasing order of desirability
+     */
+    std::vector<int> getDevicePrecedence();
+
    struct Molecule;
    struct MoleculeGroup;
    class VirtualSiteInfo;

--- a/platforms/cuda/include/CudaExpressionUtilities.h
+++ b/platforms/cuda/include/CudaExpressionUtilities.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2009-2014 Stanford University and the Authors.      *
+ * Portions copyright (c) 2009-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -89,6 +89,10 @@ public:
     * @param function   the function for which to get a placeholder
     */
    Lepton::CustomFunction* getFunctionPlaceholder(const TabulatedFunction& function);
+    /**
+     * Get a Lepton::CustomFunction that can be used to represent the periodicdistance() function when parsing expressions.
+     */
+    Lepton::CustomFunction* getPeriodicDistancePlaceholder();
 private:
    class FunctionPlaceholder : public Lepton::CustomFunction {
        public:
@@ -114,13 +118,13 @@ private:
            const std::vector<const TabulatedFunction*>& functions, const std::vector<std::pair<std::string, std::string> >& functionNames,
            const std::string& prefix, const std::vector<std::vector<double> >& functionParams, const std::vector<Lepton::ParsedExpression>& allExpressions, const std::string& tempType);
    std::string getTempName(const Lepton::ExpressionTreeNode& node, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps);
-    void findRelatedTabulatedFunctions(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
+    void findRelatedCustomFunctions(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
            std::vector<const Lepton::ExpressionTreeNode*>& nodes);
    void findRelatedPowers(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
            std::map<int, const Lepton::ExpressionTreeNode*>& powers);
    std::vector<std::vector<double> > computeFunctionParameters(const std::vector<const TabulatedFunction*>& functions);
    CudaContext& context;
-    FunctionPlaceholder fp1, fp2, fp3;
+    FunctionPlaceholder fp1, fp2, fp3, periodicDistance;
 };

 } // namespace OpenMM

--- a/platforms/cuda/include/CudaKernels.h
+++ b/platforms/cuda/include/CudaKernels.h
@@ -620,6 +620,15 @@ public:
     * @param force      the NonbondedForce to copy the parameters from
     */
    void copyParametersToContext(ContextImpl& context, const NonbondedForce& force);
+    /**
+     * Get the parameters being used for PME.
+     * 
+     * @param alpha   the separation parameter
+     * @param nx      the number of grid points along the X axis
+     * @param ny      the number of grid points along the Y axis
+     * @param nz      the number of grid points along the Z axis
+     */
+    void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
 private:
    class SortTrait : public CudaSort::SortTrait {
        int getDataSize() const {return 8;}
@@ -668,7 +677,9 @@ private:
    std::vector<std::pair<int, int> > exceptionAtoms;
    double ewaldSelfEnergy, dispersionCoefficient, alpha;
    int interpolateForceThreads;
+    int gridSizeX, gridSizeY, gridSizeZ;
    bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT;
+    NonbondedMethod nonbondedMethod;
    static const int PmeOrder = 5;
 };

@@ -922,6 +933,58 @@ private:
    CUfunction donorKernel, acceptorKernel;
 };

+/**
+ * This kernel is invoked by CustomCentroidBondForce to calculate the forces acting on the system.
+ */
+class CudaCalcCustomCentroidBondForceKernel : public CalcCustomCentroidBondForceKernel {
+public:
+    CudaCalcCustomCentroidBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomCentroidBondForceKernel(name, platform),
+            cu(cu), params(NULL), globals(NULL), groupParticles(NULL), groupWeights(NULL), groupOffsets(NULL), groupForces(NULL), bondGroups(NULL), centerPositions(NULL), system(system) {
+    }
+    ~CudaCalcCustomCentroidBondForceKernel();
+    /**
+     * Initialize the kernel.
+     *
+     * @param system     the System this kernel will be applied to
+     * @param force      the CustomCentroidBondForce this kernel will be used for
+     */
+    void initialize(const System& system, const CustomCentroidBondForce& force);
+    /**
+     * Execute the kernel to calculate the forces and/or energy.
+     *
+     * @param context        the context in which to execute this kernel
+     * @param includeForces  true if forces should be calculated
+     * @param includeEnergy  true if the energy should be calculated
+     * @return the potential energy due to the force
+     */
+    double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
+    /**
+     * Copy changed parameters over to a context.
+     *
+     * @param context    the context to copy parameters to
+     * @param force      the CustomCentroidBondForce to copy the parameters from
+     */
+    void copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force);
+
+private:
+    int numGroups, numBonds;
+    CudaContext& cu;
+    CudaParameterSet* params;
+    CudaArray* globals;
+    CudaArray* groupParticles;
+    CudaArray* groupWeights;
+    CudaArray* groupOffsets;
+    CudaArray* groupForces;
+    CudaArray* bondGroups;
+    CudaArray* centerPositions;
+    std::vector<std::string> globalParamNames;
+    std::vector<float> globalParamValues;
+    std::vector<CudaArray*> tabulatedFunctions;
+    std::vector<void*> groupForcesArgs;
+    CUfunction computeCentersKernel, groupForcesKernel, applyForcesKernel;
+    const System& system;
+};
+
 /**
 * This kernel is invoked by CustomCompoundBondForce to calculate the forces acting on the system.
 */

--- a/platforms/cuda/include/CudaNonbondedUtilities.h
+++ b/platforms/cuda/include/CudaNonbondedUtilities.h
@@ -138,12 +138,18 @@ public:
    void prepareInteractions(int forceGroups);
    /**
     * Compute the nonbonded interactions.
+     * 
+     * @param forceGroups    the flags specifying which force groups to include
+     * @param includeForces  whether to compute forces
+     * @param includeEnergy  whether to compute the potential energy
     */
-    void computeInteractions(int forceGroups);
+    void computeInteractions(int forceGroups, bool includeForces, bool includeEnergy);
    /**
     * Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
+     *
+     * @return true if the neighbor list needed to be enlarged.
     */
-    void updateNeighborListSize();
+    bool updateNeighborListSize();
    /**
     * Get the array containing the center of each atom block.
     */
@@ -233,8 +239,10 @@ public:
     * @param useExclusions specifies whether exclusions are applied to this interaction
     * @param isSymmetric   specifies whether the interaction is symmetric
     * @param groups        the set of force groups this kernel is for
+     * @param includeForces whether this kernel should compute forces
+     * @param includeEnergy whether this kernel should compute potential energy
     */
-    CUfunction createInteractionKernel(const std::string& source, std::vector<ParameterInfo>& params, std::vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups);
+    CUfunction createInteractionKernel(const std::string& source, std::vector<ParameterInfo>& params, std::vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups, bool includeForces, bool includeEnergy);
    /**
     * Create the set of kernels that will be needed for a particular combination of force groups.
     * 
@@ -280,7 +288,8 @@ class CudaNonbondedUtilities::KernelSet {
 public:
    bool hasForces;
    double cutoffDistance;
-    CUfunction forceKernel;
+    std::string source;
+    CUfunction forceKernel, energyKernel, forceEnergyKernel;
    CUfunction findBlockBoundsKernel;
    CUfunction sortBoxDataKernel;
    CUfunction findInteractingBlocksKernel;

--- a/platforms/cuda/include/CudaParallelKernels.h
+++ b/platforms/cuda/include/CudaParallelKernels.h
@@ -430,6 +430,15 @@ public:
     * @param force      the NonbondedForce to copy the parameters from
     */
    void copyParametersToContext(ContextImpl& context, const NonbondedForce& force);
+    /**
+     * Get the parameters being used for PME.
+     * 
+     * @param alpha   the separation parameter
+     * @param nx      the number of grid points along the X axis
+     * @param ny      the number of grid points along the Y axis
+     * @param nz      the number of grid points along the Z axis
+     */
+    void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
 private:
    class Task;
    CudaPlatform::PlatformData& data;

--- a/platforms/cuda/sharedTarget/CMakeLists.txt
+++ b/platforms/cuda/sharedTarget/CMakeLists.txt
@@ -18,7 +18,7 @@ SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE
 IF (APPLE)
    SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS} -F/Library/Frameworks -framework CUDA")
 ELSE (APPLE)
-    SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}")
+    SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}")
 ENDIF (APPLE)

 INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
--- a/platforms/cuda/src/CudaBondedUtilities.cpp
+++ b/platforms/cuda/src/CudaBondedUtilities.cpp
@@ -99,7 +99,7 @@ void CudaBondedUtilities::initialize(const System& system) {
    s<<CudaKernelSources::vectorOps;
    for (int i = 0; i < (int) prefixCode.size(); i++)
        s<<prefixCode[i];
-    s<<"extern \"C\" __global__ void computeBondedForces(unsigned long long* __restrict__ forceBuffer, real* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups";
+    s<<"extern \"C\" __global__ void computeBondedForces(unsigned long long* __restrict__ forceBuffer, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ";
    for (int force = 0; force < numForces; force++) {
        for (int i = 0; i < (int) atomIndices[force].size(); i++) {
            int indexWidth = atomIndices[force][i]->getElementSize()/4;
@@ -110,7 +110,7 @@ void CudaBondedUtilities::initialize(const System& system) {
    for (int i = 0; i < (int) arguments.size(); i++)
        s<<", "<<argTypes[i]<<"* customArg"<<(i+1);
    s<<") {\n";
-    s<<"real energy = 0;\n";
+    s<<"mixed energy = 0;\n";
    for (int force = 0; force < numForces; force++)
        s<<createForceSource(force, forceAtoms[force].size(), forceAtoms[force][0].size(), forceGroup[force], forceSource[force]);
    s<<"energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;\n";
@@ -161,6 +161,11 @@ void CudaBondedUtilities::computeInteractions(int groups) {
        kernelArgs.push_back(&context.getEnergyBuffer().getDevicePointer());
        kernelArgs.push_back(&context.getPosq().getDevicePointer());
        kernelArgs.push_back(NULL);
+        kernelArgs.push_back(context.getPeriodicBoxSizePointer());
+        kernelArgs.push_back(context.getInvPeriodicBoxSizePointer());
+        kernelArgs.push_back(context.getPeriodicBoxVecXPointer());
+        kernelArgs.push_back(context.getPeriodicBoxVecYPointer());
+        kernelArgs.push_back(context.getPeriodicBoxVecZPointer());
        for (int i = 0; i < (int) atomIndices.size(); i++)
            for (int j = 0; j < (int) atomIndices[i].size(); j++)
                kernelArgs.push_back(&atomIndices[i][j]->getDevicePointer());

--- a/platforms/cuda/src/CudaContext.cpp
+++ b/platforms/cuda/src/CudaContext.cpp
@@ -120,49 +120,50 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
    int numDevices;
    string errorMessage = "Error initializing Context";
    CHECK_RESULT(cuDeviceGetCount(&numDevices));
-    if (deviceIndex < 0 || deviceIndex >= numDevices) {
-        // Try to figure out which device is the fastest.
-
-        int bestSpeed = -1;
-        int bestCompute = -1;
-        for (int i = 0; i < numDevices; i++) {
-            CHECK_RESULT(cuDeviceGet(&device, i));
-            int major, minor, clock, multiprocessors;
-            CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
-            if (major == 1 && minor < 2)
-                continue; // 1.0 and 1.1 are not supported
-            CHECK_RESULT(cuDeviceGetAttribute(&clock, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device));
-            CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device));
-            int speed = clock*multiprocessors;
-            if (major > bestCompute || (major == bestCompute && speed > bestSpeed)) {
-                deviceIndex = i;
-                bestSpeed = speed;
-                bestCompute = major;
-            }
+    if (deviceIndex < -1 || deviceIndex >= numDevices)
+        throw OpenMMException("Illegal value for CudaDeviceIndex: "+intToString(deviceIndex));
+
+    vector<int> devicePrecedence;
+    if (deviceIndex == -1) {
+        devicePrecedence = getDevicePrecedence();
+    } else {
+        devicePrecedence.push_back(deviceIndex);
+    }
+
+    this->deviceIndex = -1;
+    for (int i = 0; i < static_cast<int>(devicePrecedence.size()); i++) {
+        int trialDeviceIndex = devicePrecedence[i];
+        CHECK_RESULT(cuDeviceGet(&device, trialDeviceIndex));
+        defaultOptimizationOptions = "--use_fast_math";
+        unsigned int flags = CU_CTX_MAP_HOST;
+        if (useBlockingSync)
+            flags += CU_CTX_SCHED_BLOCKING_SYNC;
+        else
+            flags += CU_CTX_SCHED_SPIN;
+
+        if (cuCtxCreate(&context, flags, device) == CUDA_SUCCESS) {
+            this->deviceIndex = trialDeviceIndex;
+            break;
        }
    }
-    if (deviceIndex == -1)
-        throw OpenMMException("No compatible CUDA device is available");
-    CHECK_RESULT(cuDeviceGet(&device, deviceIndex));
-    this->deviceIndex = deviceIndex;
+    if (this->deviceIndex == -1)
+        if (deviceIndex != -1)
+            throw OpenMMException("The requested CUDA device could not be loaded");
+        else
+            throw OpenMMException("No compatible CUDA device is available");
+
    int major, minor;
    CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
-    // This is a workaround to support GTX 980 with CUDA 6.5.  It reports its compute capability
-    // as 5.2, but the compiler doesn't support anything beyond 5.0.  We can remove this once
-    // CUDA 7.0 is released.
-    if (major == 5)
-        minor = 0;
+#if __CUDA_API_VERSION < 7000
+        // This is a workaround to support GTX 980 with CUDA 6.5.  It reports
+        // its compute capability as 5.2, but the compiler doesn't support
+        // anything beyond 5.0.
+        if (major == 5)
+            minor = 0;
+#endif
    gpuArchitecture = intToString(major)+intToString(minor);
    computeCapability = major+0.1*minor;
-    if ((useDoublePrecision || useMixedPrecision) && computeCapability < 1.3)
-        throw OpenMMException("This device does not support double precision");
-    defaultOptimizationOptions = "--use_fast_math";
-    unsigned int flags = CU_CTX_MAP_HOST;
-    if (useBlockingSync)
-        flags += CU_CTX_SCHED_BLOCKING_SYNC;
-    else
-        flags += CU_CTX_SCHED_SPIN;
-    CHECK_RESULT(cuCtxCreate(&context, flags, device));
+
    contextIsValid = true;
    CHECK_RESULT(cuCtxSetCacheConfig(CU_FUNC_CACHE_PREFER_SHARED));
    if (contextIndex > 0) {
@@ -243,9 +244,9 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
    compilationDefines["ATAN"] = useDoublePrecision ? "atan" : "atanf";
    compilationDefines["ERF"] = useDoublePrecision ? "erf" : "erff";
    compilationDefines["ERFC"] = useDoublePrecision ? "erfc" : "erfcf";
-    
+
    // Set defines for applying periodic boundary conditions.
-    
+
    Vec3 boxVectors[3];
    system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
    boxIsTriclinic = (boxVectors[0][1] != 0.0 || boxVectors[0][2] != 0.0 ||
@@ -305,11 +306,11 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
    }

    // Create the work thread used for parallelization when running on multiple devices.
-    
+
    thread = new WorkThread();
-    
+
    // Create utilities objects.
-    
+
    bonded = new CudaBondedUtilities(*this);
    nonbonded = new CudaNonbondedUtilities(*this);
    integration = new CudaIntegrationUtilities(*this, system);
@@ -366,7 +367,7 @@ void CudaContext::initialize() {
        CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
    }
    else if (useMixedPrecision) {
-        energyBuffer = CudaArray::create<float>(*this, numEnergyBuffers, "energyBuffer");
+        energyBuffer = CudaArray::create<double>(*this, numEnergyBuffers, "energyBuffer");
        int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
        CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), 0));
    }
@@ -425,7 +426,7 @@ string CudaContext::replaceStrings(const string& input, const std::map<std::stri
            if (index != result.npos) {
                if ((index == 0 || symbolChars.find(result[index-1]) == symbolChars.end()) && (index == result.size()-size || symbolChars.find(result[index+size]) == symbolChars.end())) {
                    // We have found a complete symbol, not part of a longer symbol.
-                    
+
                    result.replace(index, size, iter->second);
                    index += iter->second.size();
                }
@@ -460,11 +461,11 @@ static bool compileInWindows(const string &command) {
        return -1;
    }
    WaitForSingleObject(pi.hProcess, INFINITE);
-    DWORD exitCode = -1;  
+    DWORD exitCode = -1;
    if(!GetExitCodeProcess(pi.hProcess, &exitCode)) {
        throw(OpenMMException("Could not get nvcc.exe's exit code\n"));
    } else {
-        if(exitCode == 0) 
+        if(exitCode == 0)
            return 0;
        else
            return -1;
@@ -520,9 +521,9 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
    if (!defines.empty())
        src << endl;
    src << source << endl;
-    
+
    // See whether we already have PTX for this kernel cached.
-    
+
    CSHA1 sha1;
    sha1.Update((const UINT_8*) src.str().c_str(), src.str().size());
    sha1.Final();
@@ -537,9 +538,9 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
    CUmodule module;
    if (cuModuleLoad(&module, cacheFile.str().c_str()) == CUDA_SUCCESS)
        return module;
-    
+
    // Select names for the various temporary files.
-    
+
    stringstream tempFileName;
    tempFileName << "openmmTempKernel" << this; // Include a pointer to this context as part of the filename to avoid collisions.
 #ifdef WIN32
@@ -553,12 +554,12 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
    int res = 0;

    // If the runtime compiler plugin is available, use it.
-    
+
    if (hasCompilerKernel) {
        string ptx = compilerKernel.getAs<CudaCompilerKernel>().createModule(src.str(), "-arch=compute_"+gpuArchitecture+" "+options, *this);
-        
+
        // If possible, write the PTX out to a temporary file so we can cache it for later use.
-        
+
        bool wroteCache = false;
        try {
            ofstream out(outputFile.c_str());
@@ -572,7 +573,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
        }
        if (!wroteCache) {
            // An error occurred.  Possibly we don't have permission to write to the temp directory.  Just try to load the module directly.
-            
+
            CHECK_RESULT2(cuModuleLoadDataEx(&module, &ptx[0], 0, NULL, NULL), "Error loading CUDA module");
            return module;
        }
@@ -881,7 +882,7 @@ private:

 void CudaContext::findMoleculeGroups() {
    // The first time this is called, we need to identify all the molecules in the system.
-    
+
    if (moleculeGroups.size() == 0) {
        // Add a ForceInfo that makes sure reordering doesn't break virtual sites.

@@ -964,7 +965,7 @@ void CudaContext::findMoleculeGroups() {
                    if (!forces[k]->areParticlesIdentical(mol.atoms[i], mol2.atoms[i]))
                        identical = false;
            }
-            
+
            // See if the constraints are identical.

            for (int i = 0; i < (int) mol.constraints.size() && identical; i++) {
@@ -1045,11 +1046,11 @@ void CudaContext::invalidateMolecules() {
    }
    if (valid)
        return;
-    
+
    // The list of which molecules are identical is no longer valid.  We need to restore the
    // atoms to their original order, rebuild the list of identical molecules, and sort them
    // again.
-    
+
    vector<int4> newCellOffsets(numAtoms);
    if (useDoublePrecision) {
        vector<double4> oldPosq(paddedNumAtoms);
@@ -1194,6 +1195,8 @@ void CudaContext::reorderAtomsImpl() {
            molPos[i].x *= invNumAtoms;
            molPos[i].y *= invNumAtoms;
            molPos[i].z *= invNumAtoms;
+            if (molPos[i].x != molPos[i].x)
+                throw OpenMMException("Particle coordinate is nan");
        }
        if (nonbonded->getUsePeriodic()) {
            // Move each molecule position into the same box.
@@ -1389,3 +1392,41 @@ void CudaContext::WorkThread::flush() {
       pthread_cond_wait(&queueEmptyCondition, &queueLock);
    pthread_mutex_unlock(&queueLock);
 }
+
+
+vector<int> CudaContext::getDevicePrecedence() {
+    int numDevices;
+    CUdevice thisDevice;
+    string errorMessage = "Error initializing Context";
+    vector<pair<pair<int, int>, int> > devices;
+
+    CHECK_RESULT(cuDeviceGetCount(&numDevices));
+    for (int i = 0; i < numDevices; i++) {
+        CHECK_RESULT(cuDeviceGet(&thisDevice, i));
+        int major, minor, clock, multiprocessors, speed;
+        CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, thisDevice));
+        if (major == 1 && minor < 2)
+            continue;
+
+        if ((useDoublePrecision || useMixedPrecision) && (major+0.1*minor < 1.3))
+            continue;
+
+        CHECK_RESULT(cuDeviceGetAttribute(&clock, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, thisDevice));
+        CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, thisDevice));
+        speed = clock*multiprocessors;
+        pair<int, int> deviceProperties = std::make_pair(major, speed);
+        devices.push_back(std::make_pair(deviceProperties, -i));
+    }
+
+    // sort first by compute capability (higher is better), then speed
+    // (higher is better), and finally device index (lower is better)
+    std::sort(devices.begin(), devices.end());
+    std::reverse(devices.begin(), devices.end());
+
+    vector<int> precedence;
+    for (int i = 0; i < static_cast<int>(devices.size()); i++) {
+        precedence.push_back(-devices[i].second);
+    }
+
+    return precedence;
+}
--- a/platforms/cuda/src/CudaExpressionUtilities.cpp
+++ b/platforms/cuda/src/CudaExpressionUtilities.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2009-2014 Stanford University and the Authors.      *
+ * Portions copyright (c) 2009-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -33,7 +33,7 @@ using namespace OpenMM;
 using namespace Lepton;
 using namespace std;

-CudaExpressionUtilities::CudaExpressionUtilities(CudaContext& context) : context(context), fp1(1), fp2(2), fp3(3) {
+CudaExpressionUtilities::CudaExpressionUtilities(CudaContext& context) : context(context), fp1(1), fp2(2), fp3(3), periodicDistance(6) {
 }

 string CudaExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const map<string, string>& variables,
@@ -79,11 +79,6 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
            throw OpenMMException("Unknown variable in expression: "+node.getOperation().getName());
        case Operation::CUSTOM:
        {
-            int i;
-            for (i = 0; i < (int) functionNames.size() && functionNames[i].first != node.getOperation().getName(); i++)
-                ;
-            if (i == functionNames.size())
-                throw OpenMMException("Unknown function in expression: "+node.getOperation().getName());
            out << "0.0f;\n";
            temps.push_back(make_pair(node, name));
            hasRecordedNode = true;
@@ -93,7 +88,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express

            vector<const ExpressionTreeNode*> nodes;
            for (int j = 0; j < (int) allExpressions.size(); j++)
-                findRelatedTabulatedFunctions(node, allExpressions[j].getRootNode(), nodes);
+                findRelatedCustomFunctions(node, allExpressions[j].getRootNode(), nodes);
            vector<string> nodeNames;
            nodeNames.push_back(name);
            for (int j = 1; j < (int) nodes.size(); j++) {
@@ -103,175 +98,223 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
                temps.push_back(make_pair(*nodes[j], name2));
            }
            out << "{\n";
-            vector<string> paramsFloat, paramsInt;
-            for (int j = 0; j < (int) functionParams[i].size(); j++) {
-                paramsFloat.push_back(context.doubleToString(functionParams[i][j]));
-                paramsInt.push_back(context.intToString((int) functionParams[i][j]));
-            }
-            if (dynamic_cast<const Continuous1DFunction*>(functions[i]) != NULL) {
-                out << "real x = " << getTempName(node.getChildren()[0], temps) << ";\n";
-                out << "if (x >= " << paramsFloat[0] << " && x <= " << paramsFloat[1] << ") {\n";
-                out << "x = (x - " << paramsFloat[0] << ")*" << paramsFloat[2] << ";\n";
-                out << "int index = (int) (floor(x));\n";
-                out << "index = min(index, (int) " << paramsInt[3] << ");\n";
-                out << "float4 coeff = " << functionNames[i].second << "[index];\n";
-                out << "real b = x-index;\n";
-                out << "real a = 1.0f-b;\n";
-                for (int j = 0; j < nodes.size(); j++) {
-                    const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
-                    if (derivOrder[0] == 0)
-                        out << nodeNames[j] << " = a*coeff.x+b*coeff.y+((a*a*a-a)*coeff.z+(b*b*b-b)*coeff.w)/(" << paramsFloat[2] << "*" << paramsFloat[2] << ");\n";
-                    else
-                        out << nodeNames[j] << " = (coeff.y-coeff.x)*" << paramsFloat[2] << "+((1.0f-3.0f*a*a)*coeff.z+(3.0f*b*b-1.0f)*coeff.w)/" << paramsFloat[2] << ";\n";
+            if (node.getOperation().getName() == "periodicdistance") {
+                // This is the periodicdistance() function.
+
+                out << tempType << "3 periodicDistance_delta = make_real3(";
+                for (int i = 0; i < 3; i++) {
+                    if (i > 0)
+                        out << ", ";
+                    out << getTempName(node.getChildren()[i], temps) << "-" << getTempName(node.getChildren()[i+3], temps);
                }
-                out << "}\n";
-            }
-            else if (dynamic_cast<const Continuous2DFunction*>(functions[i]) != NULL) {
-                out << "real x = " << getTempName(node.getChildren()[0], temps) << ";\n";
-                out << "real y = " << getTempName(node.getChildren()[1], temps) << ";\n";
-                out << "if (x >= " << paramsFloat[2] << " && x <= " << paramsFloat[3] << " && y >= " << paramsFloat[4] << " && y <= " << paramsFloat[5] << ") {\n";
-                out << "x = (x - " << paramsFloat[2] << ")*" << paramsFloat[6] << ";\n";
-                out << "y = (y - " << paramsFloat[4] << ")*" << paramsFloat[7] << ";\n";
-                out << "int s = min((int) floor(x), " << paramsInt[0] << ");\n";
-                out << "int t = min((int) floor(y), " << paramsInt[1] << ");\n";
-                out << "int coeffIndex = 4*(s+" << paramsInt[0] << "*t);\n";
-                out << "float4 c[4];\n";
-                for (int j = 0; j < 4; j++)
-                    out << "c[" << j << "] = " << functionNames[i].second << "[coeffIndex+" << j << "];\n";
-                out << "real da = x-s;\n";
-                out << "real db = y-t;\n";
+                out << ");\n";
+                out << "APPLY_PERIODIC_TO_DELTA(periodicDistance_delta)\n";
+                out << tempType << " periodicDistance_r2 = periodicDistance_delta.x*periodicDistance_delta.x + periodicDistance_delta.y*periodicDistance_delta.y + periodicDistance_delta.z*periodicDistance_delta.z;\n";
+                out << tempType << " periodicDistance_rinv = RSQRT(periodicDistance_r2);\n";
                for (int j = 0; j < nodes.size(); j++) {
                    const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
-                    if (derivOrder[0] == 0 && derivOrder[1] == 0) {
-                        out << nodeNames[j] << " = da*" << nodeNames[j] << " + ((c[3].w*db + c[3].z)*db + c[3].y)*db + c[3].x;\n";
-                        out << nodeNames[j] << " = da*" << nodeNames[j] << " + ((c[2].w*db + c[2].z)*db + c[2].y)*db + c[2].x;\n";
-                        out << nodeNames[j] << " = da*" << nodeNames[j] << " + ((c[1].w*db + c[1].z)*db + c[1].y)*db + c[1].x;\n";
-                        out << nodeNames[j] << " = da*" << nodeNames[j] << " + ((c[0].w*db + c[0].z)*db + c[0].y)*db + c[0].x;\n";
-                    }
-                    else if (derivOrder[0] == 1 && derivOrder[1] == 0) {
-                        out << nodeNames[j] << " = db*" << nodeNames[j] << " + (3.0f*c[3].w*da + 2.0f*c[2].w)*da + c[1].w;\n";
-                        out << nodeNames[j] << " = db*" << nodeNames[j] << " + (3.0f*c[3].z*da + 2.0f*c[2].z)*da + c[1].z;\n";
-                        out << nodeNames[j] << " = db*" << nodeNames[j] << " + (3.0f*c[3].y*da + 2.0f*c[2].y)*da + c[1].y;\n";
-                        out << nodeNames[j] << " = db*" << nodeNames[j] << " + (3.0f*c[3].x*da + 2.0f*c[2].x)*da + c[1].x;\n";
-                        out << nodeNames[j] << " *= " << paramsFloat[6] << ";\n";
-                    }
-                    else if (derivOrder[0] == 0 && derivOrder[1] == 1) {
-                        out << nodeNames[j] << " = da*" << nodeNames[j] << " + (3.0f*c[3].w*db + 2.0f*c[3].z)*db + c[3].y;\n";
-                        out << nodeNames[j] << " = da*" << nodeNames[j] << " + (3.0f*c[2].w*db + 2.0f*c[2].z)*db + c[2].y;\n";
-                        out << nodeNames[j] << " = da*" << nodeNames[j] << " + (3.0f*c[1].w*db + 2.0f*c[1].z)*db + c[1].y;\n";
-                        out << nodeNames[j] << " = da*" << nodeNames[j] << " + (3.0f*c[0].w*db + 2.0f*c[0].z)*db + c[0].y;\n";
-                        out << nodeNames[j] << " *= " << paramsFloat[7] << ";\n";
+                    int argIndex = -1;
+                    for (int k = 0; k < 6; k++) {
+                        if (derivOrder[k] > 0) {
+                            if (derivOrder[k] > 1 || argIndex != -1)
+                                throw OpenMMException("Unsupported derivative of periodicdistance"); // Should be impossible for this to happen.
+                            argIndex = k;
+                        }
                    }
-                    else
-                        throw OpenMMException("Unsupported derivative order for Continuous2DFunction");
+                    if (argIndex == -1)
+                        out << nodeNames[j] << " = RECIP(periodicDistance_rinv);\n";
+                    else if (argIndex == 0)
+                        out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? periodicDistance_delta.x*periodicDistance_rinv : 0);\n";
+                    else if (argIndex == 1)
+                        out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? periodicDistance_delta.y*periodicDistance_rinv : 0);\n";
+                    else if (argIndex == 2)
+                        out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? periodicDistance_delta.z*periodicDistance_rinv : 0);\n";
+                    else if (argIndex == 3)
+                        out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? -periodicDistance_delta.x*periodicDistance_rinv : 0);\n";
+                    else if (argIndex == 4)
+                        out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? -periodicDistance_delta.y*periodicDistance_rinv : 0);\n";
+                    else if (argIndex == 5)
+                        out << nodeNames[j] << " = (periodicDistance_r2 > 0 ? -periodicDistance_delta.z*periodicDistance_rinv : 0);\n";
                }
-                out << "}\n";
            }
-            else if (dynamic_cast<const Continuous3DFunction*>(functions[i]) != NULL) {
-                out << "real x = " << getTempName(node.getChildren()[0], temps) << ";\n";
-                out << "real y = " << getTempName(node.getChildren()[1], temps) << ";\n";
-                out << "real z = " << getTempName(node.getChildren()[2], temps) << ";\n";
-                out << "if (x >= " << paramsFloat[3] << " && x <= " << paramsFloat[4] << " && y >= " << paramsFloat[5] << " && y <= " << paramsFloat[6] << " && z >= " << paramsFloat[7] << " && z <= " << paramsFloat[8] << ") {\n";
-                out << "x = (x - " << paramsFloat[3] << ")*" << paramsFloat[9] << ";\n";
-                out << "y = (y - " << paramsFloat[5] << ")*" << paramsFloat[10] << ";\n";
-                out << "z = (z - " << paramsFloat[7] << ")*" << paramsFloat[11] << ";\n";
-                out << "int s = min((int) floor(x), " << paramsInt[0] << ");\n";
-                out << "int t = min((int) floor(y), " << paramsInt[1] << ");\n";
-                out << "int u = min((int) floor(z), " << paramsInt[2] << ");\n";
-                out << "int coeffIndex = 16*(s+" << paramsInt[0] << "*(t+" << paramsInt[1] << "*u));\n";
-                out << "float4 c[16];\n";
-                for (int j = 0; j < 16; j++)
-                    out << "c[" << j << "] = " << functionNames[i].second << "[coeffIndex+" << j << "];\n";
-                out << "real da = x-s;\n";
-                out << "real db = y-t;\n";
-                out << "real dc = z-u;\n";
-                for (int j = 0; j < nodes.size(); j++) {
-                    const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
-                    if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 0) {
-                        out << "real value[4] = {0, 0, 0, 0};\n";
-                        for (int k = 3; k >= 0; k--)
-                            for (int m = 0; m < 4; m++) {
-                                int base = k + 4*m;
-                                out << "value[" << m << "] = db*value[" << m << "] + ((c[" << base << "].w*da + c[" << base << "].z)*da + c[" << base << "].y)*da + c[" << base << "].x;\n";
-                            }
-                        out << nodeNames[j] << " = value[0] + dc*(value[1] + dc*(value[2] + dc*value[3]));\n";
-                    }
-                    else if (derivOrder[0] == 1 && derivOrder[1] == 0 && derivOrder[2] == 0) {
-                        out << "real derivx[4] = {0, 0, 0, 0};\n";
-                        for (int k = 3; k >= 0; k--)
-                            for (int m = 0; m < 4; m++) {
-                                int base = k + 4*m;
-                                out << "derivx[" << m << "] = db*derivx[" << m << "] + (3*c[" << base << "].w*da + 2*c[" << base << "].z)*da + c[" << base << "].y;\n";
-                            }
-                        out << nodeNames[j] << " = derivx[0] + dc*(derivx[1] + dc*(derivx[2] + dc*derivx[3]));\n";
-                        out << nodeNames[j] << " *= " << paramsFloat[9] << ";\n";
+            else {
+                // This is a tabulated function.
+                
+                int i;
+                for (i = 0; i < (int) functionNames.size() && functionNames[i].first != node.getOperation().getName(); i++)
+                    ;
+                if (i == functionNames.size())
+                    throw OpenMMException("Unknown function in expression: "+node.getOperation().getName());
+                vector<string> paramsFloat, paramsInt;
+                for (int j = 0; j < (int) functionParams[i].size(); j++) {
+                    paramsFloat.push_back(context.doubleToString(functionParams[i][j]));
+                    paramsInt.push_back(context.intToString((int) functionParams[i][j]));
+                }
+                if (dynamic_cast<const Continuous1DFunction*>(functions[i]) != NULL) {
+                    out << "real x = " << getTempName(node.getChildren()[0], temps) << ";\n";
+                    out << "if (x >= " << paramsFloat[0] << " && x <= " << paramsFloat[1] << ") {\n";
+                    out << "x = (x - " << paramsFloat[0] << ")*" << paramsFloat[2] << ";\n";
+                    out << "int index = (int) (floor(x));\n";
+                    out << "index = min(index, (int) " << paramsInt[3] << ");\n";
+                    out << "float4 coeff = " << functionNames[i].second << "[index];\n";
+                    out << "real b = x-index;\n";
+                    out << "real a = 1.0f-b;\n";
+                    for (int j = 0; j < nodes.size(); j++) {
+                        const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
+                        if (derivOrder[0] == 0)
+                            out << nodeNames[j] << " = a*coeff.x+b*coeff.y+((a*a*a-a)*coeff.z+(b*b*b-b)*coeff.w)/(" << paramsFloat[2] << "*" << paramsFloat[2] << ");\n";
+                        else
+                            out << nodeNames[j] << " = (coeff.y-coeff.x)*" << paramsFloat[2] << "+((1.0f-3.0f*a*a)*coeff.z+(3.0f*b*b-1.0f)*coeff.w)/" << paramsFloat[2] << ";\n";
                    }
-                    else if (derivOrder[0] == 0 && derivOrder[1] == 1 && derivOrder[2] == 0) {
-                        const string suffixes[] = {".x", ".y", ".z", ".w"};
-                        out << "real derivy[4] = {0, 0, 0, 0};\n";
-                        for (int k = 3; k >= 0; k--)
-                            for (int m = 0; m < 4; m++) {
-                                int base = 4*m;
-                                string suffix = suffixes[m];
-                                out << "derivy[" << m << "] = da*derivy[" << m << "] + (3*c[" << (base+3) << "]" << suffix << "*db + 2*c[" << (base+2) << "]" << suffix << ")*db + c[" << (base+1) << "]" << suffix << ";\n";
-                            }
-                        out << nodeNames[j] << " = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));\n";
-                        out << nodeNames[j] << " *= " << paramsFloat[10] << ";\n";
+                    out << "}\n";
+                }
+                else if (dynamic_cast<const Continuous2DFunction*>(functions[i]) != NULL) {
+                    out << "real x = " << getTempName(node.getChildren()[0], temps) << ";\n";
+                    out << "real y = " << getTempName(node.getChildren()[1], temps) << ";\n";
+                    out << "if (x >= " << paramsFloat[2] << " && x <= " << paramsFloat[3] << " && y >= " << paramsFloat[4] << " && y <= " << paramsFloat[5] << ") {\n";
+                    out << "x = (x - " << paramsFloat[2] << ")*" << paramsFloat[6] << ";\n";
+                    out << "y = (y - " << paramsFloat[4] << ")*" << paramsFloat[7] << ";\n";
+                    out << "int s = min((int) floor(x), " << paramsInt[0] << ");\n";
+                    out << "int t = min((int) floor(y), " << paramsInt[1] << ");\n";
+                    out << "int coeffIndex = 4*(s+" << paramsInt[0] << "*t);\n";
+                    out << "float4 c[4];\n";
+                    for (int j = 0; j < 4; j++)
+                        out << "c[" << j << "] = " << functionNames[i].second << "[coeffIndex+" << j << "];\n";
+                    out << "real da = x-s;\n";
+                    out << "real db = y-t;\n";
+                    for (int j = 0; j < nodes.size(); j++) {
+                        const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
+                        if (derivOrder[0] == 0 && derivOrder[1] == 0) {
+                            out << nodeNames[j] << " = da*" << nodeNames[j] << " + ((c[3].w*db + c[3].z)*db + c[3].y)*db + c[3].x;\n";
+                            out << nodeNames[j] << " = da*" << nodeNames[j] << " + ((c[2].w*db + c[2].z)*db + c[2].y)*db + c[2].x;\n";
+                            out << nodeNames[j] << " = da*" << nodeNames[j] << " + ((c[1].w*db + c[1].z)*db + c[1].y)*db + c[1].x;\n";
+                            out << nodeNames[j] << " = da*" << nodeNames[j] << " + ((c[0].w*db + c[0].z)*db + c[0].y)*db + c[0].x;\n";
+                        }
+                        else if (derivOrder[0] == 1 && derivOrder[1] == 0) {
+                            out << nodeNames[j] << " = db*" << nodeNames[j] << " + (3.0f*c[3].w*da + 2.0f*c[2].w)*da + c[1].w;\n";
+                            out << nodeNames[j] << " = db*" << nodeNames[j] << " + (3.0f*c[3].z*da + 2.0f*c[2].z)*da + c[1].z;\n";
+                            out << nodeNames[j] << " = db*" << nodeNames[j] << " + (3.0f*c[3].y*da + 2.0f*c[2].y)*da + c[1].y;\n";
+                            out << nodeNames[j] << " = db*" << nodeNames[j] << " + (3.0f*c[3].x*da + 2.0f*c[2].x)*da + c[1].x;\n";
+                            out << nodeNames[j] << " *= " << paramsFloat[6] << ";\n";
+                        }
+                        else if (derivOrder[0] == 0 && derivOrder[1] == 1) {
+                            out << nodeNames[j] << " = da*" << nodeNames[j] << " + (3.0f*c[3].w*db + 2.0f*c[3].z)*db + c[3].y;\n";
+                            out << nodeNames[j] << " = da*" << nodeNames[j] << " + (3.0f*c[2].w*db + 2.0f*c[2].z)*db + c[2].y;\n";
+                            out << nodeNames[j] << " = da*" << nodeNames[j] << " + (3.0f*c[1].w*db + 2.0f*c[1].z)*db + c[1].y;\n";
+                            out << nodeNames[j] << " = da*" << nodeNames[j] << " + (3.0f*c[0].w*db + 2.0f*c[0].z)*db + c[0].y;\n";
+                            out << nodeNames[j] << " *= " << paramsFloat[7] << ";\n";
+                        }
+                        else
+                            throw OpenMMException("Unsupported derivative order for Continuous2DFunction");
                    }
-                    else if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 1) {
-                        out << "real derivz[4] = {0, 0, 0, 0};\n";
-                        for (int k = 3; k >= 0; k--)
-                            for (int m = 0; m < 4; m++) {
-                                int base = k + 4*m;
-                                out << "derivz[" << m << "] = db*derivz[" << m << "] + ((c[" << base << "].w*da + c[" << base << "].z)*da + c[" << base << "].y)*da + c[" << base << "].x;\n";
-                            }
-                        out << nodeNames[j] << " = derivz[1] + dc*(2*derivz[2] + dc*3*derivz[3]);\n";
-                        out << nodeNames[j] << " *= " << paramsFloat[11] << ";\n";
+                    out << "}\n";
+                }
+                else if (dynamic_cast<const Continuous3DFunction*>(functions[i]) != NULL) {
+                    out << "real x = " << getTempName(node.getChildren()[0], temps) << ";\n";
+                    out << "real y = " << getTempName(node.getChildren()[1], temps) << ";\n";
+                    out << "real z = " << getTempName(node.getChildren()[2], temps) << ";\n";
+                    out << "if (x >= " << paramsFloat[3] << " && x <= " << paramsFloat[4] << " && y >= " << paramsFloat[5] << " && y <= " << paramsFloat[6] << " && z >= " << paramsFloat[7] << " && z <= " << paramsFloat[8] << ") {\n";
+                    out << "x = (x - " << paramsFloat[3] << ")*" << paramsFloat[9] << ";\n";
+                    out << "y = (y - " << paramsFloat[5] << ")*" << paramsFloat[10] << ";\n";
+                    out << "z = (z - " << paramsFloat[7] << ")*" << paramsFloat[11] << ";\n";
+                    out << "int s = min((int) floor(x), " << paramsInt[0] << ");\n";
+                    out << "int t = min((int) floor(y), " << paramsInt[1] << ");\n";
+                    out << "int u = min((int) floor(z), " << paramsInt[2] << ");\n";
+                    out << "int coeffIndex = 16*(s+" << paramsInt[0] << "*(t+" << paramsInt[1] << "*u));\n";
+                    out << "float4 c[16];\n";
+                    for (int j = 0; j < 16; j++)
+                        out << "c[" << j << "] = " << functionNames[i].second << "[coeffIndex+" << j << "];\n";
+                    out << "real da = x-s;\n";
+                    out << "real db = y-t;\n";
+                    out << "real dc = z-u;\n";
+                    for (int j = 0; j < nodes.size(); j++) {
+                        const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
+                        if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 0) {
+                            out << "real value[4] = {0, 0, 0, 0};\n";
+                            for (int k = 3; k >= 0; k--)
+                                for (int m = 0; m < 4; m++) {
+                                    int base = k + 4*m;
+                                    out << "value[" << m << "] = db*value[" << m << "] + ((c[" << base << "].w*da + c[" << base << "].z)*da + c[" << base << "].y)*da + c[" << base << "].x;\n";
+                                }
+                            out << nodeNames[j] << " = value[0] + dc*(value[1] + dc*(value[2] + dc*value[3]));\n";
+                        }
+                        else if (derivOrder[0] == 1 && derivOrder[1] == 0 && derivOrder[2] == 0) {
+                            out << "real derivx[4] = {0, 0, 0, 0};\n";
+                            for (int k = 3; k >= 0; k--)
+                                for (int m = 0; m < 4; m++) {
+                                    int base = k + 4*m;
+                                    out << "derivx[" << m << "] = db*derivx[" << m << "] + (3*c[" << base << "].w*da + 2*c[" << base << "].z)*da + c[" << base << "].y;\n";
+                                }
+                            out << nodeNames[j] << " = derivx[0] + dc*(derivx[1] + dc*(derivx[2] + dc*derivx[3]));\n";
+                            out << nodeNames[j] << " *= " << paramsFloat[9] << ";\n";
+                        }
+                        else if (derivOrder[0] == 0 && derivOrder[1] == 1 && derivOrder[2] == 0) {
+                            const string suffixes[] = {".x", ".y", ".z", ".w"};
+                            out << "real derivy[4] = {0, 0, 0, 0};\n";
+                            for (int k = 3; k >= 0; k--)
+                                for (int m = 0; m < 4; m++) {
+                                    int base = 4*m;
+                                    string suffix = suffixes[m];
+                                    out << "derivy[" << m << "] = da*derivy[" << m << "] + (3*c[" << (base+3) << "]" << suffix << "*db + 2*c[" << (base+2) << "]" << suffix << ")*db + c[" << (base+1) << "]" << suffix << ";\n";
+                                }
+                            out << nodeNames[j] << " = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));\n";
+                            out << nodeNames[j] << " *= " << paramsFloat[10] << ";\n";
+                        }
+                        else if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 1) {
+                            out << "real derivz[4] = {0, 0, 0, 0};\n";
+                            for (int k = 3; k >= 0; k--)
+                                for (int m = 0; m < 4; m++) {
+                                    int base = k + 4*m;
+                                    out << "derivz[" << m << "] = db*derivz[" << m << "] + ((c[" << base << "].w*da + c[" << base << "].z)*da + c[" << base << "].y)*da + c[" << base << "].x;\n";
+                                }
+                            out << nodeNames[j] << " = derivz[1] + dc*(2*derivz[2] + dc*3*derivz[3]);\n";
+                            out << nodeNames[j] << " *= " << paramsFloat[11] << ";\n";
+                        }
+                        else
+                            throw OpenMMException("Unsupported derivative order for Continuous2DFunction");
                    }
-                    else
-                        throw OpenMMException("Unsupported derivative order for Continuous2DFunction");
+                    out << "}\n";
                }
-                out << "}\n";
-            }
-            else if (dynamic_cast<const Discrete1DFunction*>(functions[i]) != NULL) {
-                for (int j = 0; j < nodes.size(); j++) {
-                    const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
-                    if (derivOrder[0] == 0) {
-                        out << "real x = " << getTempName(node.getChildren()[0], temps) << ";\n";
-                        out << "if (x >= 0 && x < " << paramsInt[0] << ") {\n";
-                        out << "int index = (int) floor(x+0.5f);\n";
-                        out << nodeNames[j] << " = " << functionNames[i].second << "[index];\n";
-                        out << "}\n";
+                else if (dynamic_cast<const Discrete1DFunction*>(functions[i]) != NULL) {
+                    for (int j = 0; j < nodes.size(); j++) {
+                        const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
+                        if (derivOrder[0] == 0) {
+                            out << "real x = " << getTempName(node.getChildren()[0], temps) << ";\n";
+                            out << "if (x >= 0 && x < " << paramsInt[0] << ") {\n";
+                            out << "int index = (int) floor(x+0.5f);\n";
+                            out << nodeNames[j] << " = " << functionNames[i].second << "[index];\n";
+                            out << "}\n";
+                        }
                    }
                }
-            }
-            else if (dynamic_cast<const Discrete2DFunction*>(functions[i]) != NULL) {
-                for (int j = 0; j < nodes.size(); j++) {
-                    const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
-                    if (derivOrder[0] == 0 && derivOrder[1] == 0) {
-                        out << "int x = (int) floor(" << getTempName(node.getChildren()[0], temps) << "+0.5f);\n";
-                        out << "int y = (int) floor(" << getTempName(node.getChildren()[1], temps) << "+0.5f);\n";
-                        out << "int xsize = (int) " << paramsInt[0] << ";\n";
-                        out << "int ysize = (int) " << paramsInt[1] << ";\n";
-                        out << "int index = x+y*xsize;\n";
-                        out << "if (index >= 0 && index < xsize*ysize)\n";
-                        out << nodeNames[j] << " = " << functionNames[i].second << "[index];\n";
+                else if (dynamic_cast<const Discrete2DFunction*>(functions[i]) != NULL) {
+                    for (int j = 0; j < nodes.size(); j++) {
+                        const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
+                        if (derivOrder[0] == 0 && derivOrder[1] == 0) {
+                            out << "int x = (int) floor(" << getTempName(node.getChildren()[0], temps) << "+0.5f);\n";
+                            out << "int y = (int) floor(" << getTempName(node.getChildren()[1], temps) << "+0.5f);\n";
+                            out << "int xsize = (int) " << paramsInt[0] << ";\n";
+                            out << "int ysize = (int) " << paramsInt[1] << ";\n";
+                            out << "int index = x+y*xsize;\n";
+                            out << "if (index >= 0 && index < xsize*ysize)\n";
+                            out << nodeNames[j] << " = " << functionNames[i].second << "[index];\n";
+                        }
                    }
                }
-            }
-            else if (dynamic_cast<const Discrete3DFunction*>(functions[i]) != NULL) {
-                for (int j = 0; j < nodes.size(); j++) {
-                    const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
-                    if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 0) {
-                        out << "int x = (int) floor(" << getTempName(node.getChildren()[0], temps) << "+0.5f);\n";
-                        out << "int y = (int) floor(" << getTempName(node.getChildren()[1], temps) << "+0.5f);\n";
-                        out << "int z = (int) floor(" << getTempName(node.getChildren()[2], temps) << "+0.5f);\n";
-                        out << "int xsize = (int) " << paramsInt[0] << ";\n";
-                        out << "int ysize = (int) " << paramsInt[1] << ";\n";
-                        out << "int zsize = (int) " << paramsInt[2] << ";\n";
-                        out << "int index = x+(y+z*ysize)*xsize;\n";
-                        out << "if (index >= 0 && index < xsize*ysize*zsize)\n";
-                        out << nodeNames[j] << " = " << functionNames[i].second << "[index];\n";
+                else if (dynamic_cast<const Discrete3DFunction*>(functions[i]) != NULL) {
+                    for (int j = 0; j < nodes.size(); j++) {
+                        const vector<int>& derivOrder = dynamic_cast<const Operation::Custom*>(&nodes[j]->getOperation())->getDerivOrder();
+                        if (derivOrder[0] == 0 && derivOrder[1] == 0 && derivOrder[2] == 0) {
+                            out << "int x = (int) floor(" << getTempName(node.getChildren()[0], temps) << "+0.5f);\n";
+                            out << "int y = (int) floor(" << getTempName(node.getChildren()[1], temps) << "+0.5f);\n";
+                            out << "int z = (int) floor(" << getTempName(node.getChildren()[2], temps) << "+0.5f);\n";
+                            out << "int xsize = (int) " << paramsInt[0] << ";\n";
+                            out << "int ysize = (int) " << paramsInt[1] << ";\n";
+                            out << "int zsize = (int) " << paramsInt[2] << ";\n";
+                            out << "int index = x+(y+z*ysize)*xsize;\n";
+                            out << "if (index >= 0 && index < xsize*ysize*zsize)\n";
+                            out << nodeNames[j] << " = " << functionNames[i].second << "[index];\n";
+                        }
                    }
                }
            }
@@ -483,7 +526,7 @@ string CudaExpressionUtilities::getTempName(const ExpressionTreeNode& node, cons
    throw OpenMMException(out.str());
 }

-void CudaExpressionUtilities::findRelatedTabulatedFunctions(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode,
+void CudaExpressionUtilities::findRelatedCustomFunctions(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode,
            vector<const Lepton::ExpressionTreeNode*>& nodes) {
    if (searchNode.getOperation().getId() == Operation::CUSTOM && node.getOperation().getName() == searchNode.getOperation().getName()) {
        // Make sure the arguments are identical.
@@ -504,7 +547,7 @@ void CudaExpressionUtilities::findRelatedTabulatedFunctions(const ExpressionTree
    }
    else
        for (int i = 0; i < (int) searchNode.getChildren().size(); i++)
-            findRelatedTabulatedFunctions(node, searchNode.getChildren()[i], nodes);
+            findRelatedCustomFunctions(node, searchNode.getChildren()[i], nodes);
 }

 void CudaExpressionUtilities::findRelatedPowers(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode, map<int, const ExpressionTreeNode*>& powers) {
@@ -730,3 +773,7 @@ Lepton::CustomFunction* CudaExpressionUtilities::getFunctionPlaceholder(const Ta
        return &fp3;
    throw OpenMMException("getFunctionPlaceholder: Unknown function type");
 }
+
+Lepton::CustomFunction* CudaExpressionUtilities::getPeriodicDistancePlaceholder() {
+    return &periodicDistance;
+}
--- a/platforms/cuda/src/CudaIntegrationUtilities.cpp
+++ b/platforms/cuda/src/CudaIntegrationUtilities.cpp
@@ -201,15 +201,17 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
                params.push_back(make_float2(dist13, dist12));
            }
            else
-                throw OpenMMException("Two of the three distances constrained with SETTLE must be the same.");
+                continue; // We can't handle this with SETTLE
            isShakeAtom[atom1] = true;
            isShakeAtom[atom2] = true;
            isShakeAtom[atom3] = true;
        }
-        settleAtoms = CudaArray::create<int4>(context, atoms.size(), "settleAtoms");
-        settleParams = CudaArray::create<float2>(context, params.size(), "settleParams");
-        settleAtoms->upload(atoms);
-        settleParams->upload(params);
+        if (atoms.size() > 0) {
+            settleAtoms = CudaArray::create<int4>(context, atoms.size(), "settleAtoms");
+            settleParams = CudaArray::create<float2>(context, params.size(), "settleParams");
+            settleAtoms->upload(atoms);
+            settleParams->upload(params);
+        }
    }

    // Find clusters consisting of a central atom with up to three peripheral atoms.

--- a/platforms/cuda/src/CudaKernelFactory.cpp
+++ b/platforms/cuda/src/CudaKernelFactory.cpp
@@ -104,6 +104,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
        return new CudaCalcCustomExternalForceKernel(name, platform, cu, context.getSystem());
    if (name == CalcCustomHbondForceKernel::Name())
        return new CudaCalcCustomHbondForceKernel(name, platform, cu, context.getSystem());
+    if (name == CalcCustomCentroidBondForceKernel::Name())
+        return new CudaCalcCustomCentroidBondForceKernel(name, platform, cu, context.getSystem());
    if (name == CalcCustomCompoundBondForceKernel::Name())
        return new CudaCalcCustomCompoundBondForceKernel(name, platform, cu, context.getSystem());
    if (name == CalcCustomManyParticleForceKernel::Name())