Merge branch 'master' of github.com:SimTk/openmm

c83f44dd · Lee-Ping · e9021971 · 9202aec4 · c83f44dd · c83f44dd
Commit c83f44dd authored May 09, 2014 by Lee-Ping
20 changed files
--- a/libraries/validate/src/ValidateOpenMMForces.cpp
+++ b/libraries/validate/src/ValidateOpenMMForces.cpp
@@ -552,7 +552,7 @@ ForceValidationResult* ValidateOpenMMForces::compareForce(Context& context, std:
        if( forceName.compare( "NA" ) == 0 ){
            std::stringstream message;
            message << "Force at index=" << ii << " not found -- aborting!";
-            std::cerr << message << std::endl;
+            std::cerr << message.str() << std::endl;
            throw OpenMM::OpenMMException(message.str());
        }
        systemForceNameMap[forceName] = ii;

--- a/openmmapi/include/openmm/internal/MSVC_erfc.h
+++ b/openmmapi/include/openmm/internal/MSVC_erfc.h
@@ -9,7 +9,9 @@
 */
 #if defined(_MSC_VER) 
+#ifndef M_PI
 #define M_PI 3.14159265358979323846264338327950288
+#endif
 #if _MSC_VER <= 1700 // 1700 is VC11, 1800 is VC12 
 /***************************

--- a/openmmapi/src/CustomNonbondedForceImpl.cpp
+++ b/openmmapi/src/CustomNonbondedForceImpl.cpp
@@ -43,6 +43,7 @@
 #include <cmath>
 #include <sstream>
 #include <utility>
+#include <algorithm>
 using namespace OpenMM;
 using namespace std;

--- a/openmmapi/src/MonteCarloAnisotropicBarostatImpl.cpp
+++ b/openmmapi/src/MonteCarloAnisotropicBarostatImpl.cpp
@@ -35,6 +35,7 @@
 #include "openmm/kernels.h"
 #include <cmath>
 #include <vector>
+#include <algorithm>
 using namespace OpenMM;
 using namespace OpenMM_SFMT;

--- a/openmmapi/src/MonteCarloBarostatImpl.cpp
+++ b/openmmapi/src/MonteCarloBarostatImpl.cpp
@@ -35,6 +35,7 @@
 #include "openmm/kernels.h"
 #include <cmath>
 #include <vector>
+#include <algorithm>
 using namespace OpenMM;
 using namespace OpenMM_SFMT;

--- a/openmmapi/src/NonbondedForceImpl.cpp
+++ b/openmmapi/src/NonbondedForceImpl.cpp
@@ -39,6 +39,7 @@
 #include <cmath>
 #include <map>
 #include <sstream>
+#include <algorithm>
 using namespace OpenMM;
 using namespace std;

--- a/platforms/cpu/tests/TestCpuNeighborList.cpp
+++ b/platforms/cpu/tests/TestCpuNeighborList.cpp
@@ -43,6 +43,7 @@
 #include <set>
 #include <utility>
 #include <vector>
+#include <algorithm>
 using namespace OpenMM;
 using namespace std;

--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
@@ -1520,7 +1520,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
    else
        dispersionCoefficient = 0.0;
    alpha = 0;
-    if (force.getNonbondedMethod() == NonbondedForce::Ewald && cu.getContextIndex() == 0) {
+    if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
        // Compute the Ewald parameters.
        int kmaxx, kmaxy, kmaxz;
@@ -1528,26 +1528,28 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
        defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
        defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
        defines["USE_EWALD"] = "1";
-        ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
+        if (cu.getContextIndex() == 0) {
+            ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
-        // Create the reciprocal space kernels.
+            // Create the reciprocal space kernels.
-        map<string, string> replacements;
-        replacements["NUM_ATOMS"] = cu.intToString(numParticles);
+            map<string, string> replacements;
-        replacements["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
+            replacements["NUM_ATOMS"] = cu.intToString(numParticles);
-        replacements["KMAX_X"] = cu.intToString(kmaxx);
+            replacements["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
-        replacements["KMAX_Y"] = cu.intToString(kmaxy);
+            replacements["KMAX_X"] = cu.intToString(kmaxx);
-        replacements["KMAX_Z"] = cu.intToString(kmaxz);
+            replacements["KMAX_Y"] = cu.intToString(kmaxy);
-        replacements["EXP_COEFFICIENT"] = cu.doubleToString(-1.0/(4.0*alpha*alpha));
+            replacements["KMAX_Z"] = cu.intToString(kmaxz);
-        replacements["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0);
+            replacements["EXP_COEFFICIENT"] = cu.doubleToString(-1.0/(4.0*alpha*alpha));
-        replacements["M_PI"] = cu.doubleToString(M_PI);
+            replacements["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0);
-        CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::ewald, replacements);
+            replacements["M_PI"] = cu.doubleToString(M_PI);
-        ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
+            CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::ewald, replacements);
-        ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
+            ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
-        int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
+            ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
-        cosSinSums = new CudaArray(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
+            int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
-    }
+            cosSinSums = new CudaArray(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
-    else if (force.getNonbondedMethod() == NonbondedForce::PME && cu.getContextIndex() == 0) {
+        }
+    }
+    else if (force.getNonbondedMethod() == NonbondedForce::PME) {
        // Compute the PME parameters.
        int gridSizeX, gridSizeY, gridSizeZ;
@@ -1560,140 +1562,142 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
        defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
        defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
        defines["USE_EWALD"] = "1";
-        ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
+        if (cu.getContextIndex() == 0) {
-        pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder);
+            ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
-        pmeDefines["NUM_ATOMS"] = cu.intToString(numParticles);
+            pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder);
-        pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
+            pmeDefines["NUM_ATOMS"] = cu.intToString(numParticles);
-        pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(alpha*alpha));
+            pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
-        pmeDefines["GRID_SIZE_X"] = cu.intToString(gridSizeX);
+            pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(alpha*alpha));
-        pmeDefines["GRID_SIZE_Y"] = cu.intToString(gridSizeY);
+            pmeDefines["GRID_SIZE_X"] = cu.intToString(gridSizeX);
-        pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
+            pmeDefines["GRID_SIZE_Y"] = cu.intToString(gridSizeY);
-        pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(sqrt(ONE_4PI_EPS0));
+            pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
-        pmeDefines["M_PI"] = cu.doubleToString(M_PI);
+            pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(sqrt(ONE_4PI_EPS0));
-        if (cu.getUseDoublePrecision())
+            pmeDefines["M_PI"] = cu.doubleToString(M_PI);
-            pmeDefines["USE_DOUBLE_PRECISION"] = "1";
+            if (cu.getUseDoublePrecision())
-        CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
+                pmeDefines["USE_DOUBLE_PRECISION"] = "1";
-        if (cu.getPlatformData().useCpuPme) {
+            CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
-            // Create the CPU PME kernel.
+            if (cu.getPlatformData().useCpuPme) {
+                // Create the CPU PME kernel.
-            try {
-                cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cu.getPlatformData().context);
+                try {
-                cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
+                    cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cu.getPlatformData().context);
-                CUfunction addForcesKernel = cu.getKernel(module, "addForces");
+                    cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
-                pmeio = new PmeIO(cu, addForcesKernel);
+                    CUfunction addForcesKernel = cu.getKernel(module, "addForces");
-                cu.addPreComputation(new PmePreComputation(cu, cpuPme, *pmeio));
+                    pmeio = new PmeIO(cu, addForcesKernel);
-                cu.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
+                    cu.addPreComputation(new PmePreComputation(cu, cpuPme, *pmeio));
-            }
+                    cu.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
-            catch (OpenMMException& ex) {
+                }
-                // The CPU PME plugin isn't available.
+                catch (OpenMMException& ex) {
-            }
+                    // The CPU PME plugin isn't available.
-        }
+                }
-        if (pmeio == NULL) {
-            pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
-            pmeSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge");
-            pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
-            pmeInterpolateForceKernel = cu.getKernel(module, "gridInterpolateForce");
-            pmeEvalEnergyKernel = cu.getKernel(module, "gridEvaluateEnergy");
-            pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
-            cuFuncSetCacheConfig(pmeSpreadChargeKernel, CU_FUNC_CACHE_PREFER_L1);
-            cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
-            // Create required data structures.
-            int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
-            directPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, cu.getComputeCapability() >= 2.0 ? elementSize : sizeof(long long), "originalPmeGrid");
-            reciprocalPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*(gridSizeZ/2+1), 2*elementSize, "reciprocalPmeGrid");
-            cu.addAutoclearBuffer(*directPmeGrid);
-            pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
-            pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
-            pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
-            pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
-            pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
-            sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
-            cufftResult result = cufftPlan3d(&fftForward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C);
-            if (result != CUFFT_SUCCESS)
-                throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
-            result = cufftPlan3d(&fftBackward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2D : CUFFT_C2R);
-            if (result != CUFFT_SUCCESS)
-                throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
-            cufftSetCompatibilityMode(fftForward, CUFFT_COMPATIBILITY_NATIVE);
-            cufftSetCompatibilityMode(fftBackward, CUFFT_COMPATIBILITY_NATIVE);
-            hasInitializedFFT = true;
-            // Initialize the b-spline moduli.
-            int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
-            vector<double> data(PmeOrder);
-            vector<double> ddata(PmeOrder);
-            vector<double> bsplines_data(maxSize);
-            data[PmeOrder-1] = 0.0;
-            data[1] = 0.0;
-            data[0] = 1.0;
-            for (int i = 3; i < PmeOrder; i++) {
-                double div = 1.0/(i-1.0);
-                data[i-1] = 0.0;
-                for (int j = 1; j < (i-1); j++)
-                    data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
-                data[0] = div*data[0];
            }
+            if (pmeio == NULL) {
+                pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
+                pmeSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge");
+                pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
+                pmeInterpolateForceKernel = cu.getKernel(module, "gridInterpolateForce");
+                pmeEvalEnergyKernel = cu.getKernel(module, "gridEvaluateEnergy");
+                pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
+                cuFuncSetCacheConfig(pmeSpreadChargeKernel, CU_FUNC_CACHE_PREFER_L1);
+                cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
+                // Create required data structures.
+                int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
+                directPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, cu.getComputeCapability() >= 2.0 ? elementSize : sizeof(long long), "originalPmeGrid");
+                reciprocalPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*(gridSizeZ/2+1), 2*elementSize, "reciprocalPmeGrid");
+                cu.addAutoclearBuffer(*directPmeGrid);
+                pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
+                pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
+                pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
+                pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
+                pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
+                sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
+                cufftResult result = cufftPlan3d(&fftForward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C);
+                if (result != CUFFT_SUCCESS)
+                    throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
+                result = cufftPlan3d(&fftBackward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2D : CUFFT_C2R);
+                if (result != CUFFT_SUCCESS)
+                    throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
+                cufftSetCompatibilityMode(fftForward, CUFFT_COMPATIBILITY_NATIVE);
+                cufftSetCompatibilityMode(fftBackward, CUFFT_COMPATIBILITY_NATIVE);
+                hasInitializedFFT = true;
+                // Initialize the b-spline moduli.
+                int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
+                vector<double> data(PmeOrder);
+                vector<double> ddata(PmeOrder);
+                vector<double> bsplines_data(maxSize);
+                data[PmeOrder-1] = 0.0;
+                data[1] = 0.0;
+                data[0] = 1.0;
+                for (int i = 3; i < PmeOrder; i++) {
+                    double div = 1.0/(i-1.0);
+                    data[i-1] = 0.0;
+                    for (int j = 1; j < (i-1); j++)
+                        data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
+                    data[0] = div*data[0];
+                }
+                // Differentiate.
-            // Differentiate.
+                ddata[0] = -data[0];
+                for (int i = 1; i < PmeOrder; i++)
-            ddata[0] = -data[0];
+                    ddata[i] = data[i-1]-data[i];
-            for (int i = 1; i < PmeOrder; i++)
+                double div = 1.0/(PmeOrder-1);
-                ddata[i] = data[i-1]-data[i];
+                data[PmeOrder-1] = 0.0;
-            double div = 1.0/(PmeOrder-1);
+                for (int i = 1; i < (PmeOrder-1); i++)
-            data[PmeOrder-1] = 0.0;
+                    data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
-            for (int i = 1; i < (PmeOrder-1); i++)
+                data[0] = div*data[0];
-                data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
+                for (int i = 0; i < maxSize; i++)
-            data[0] = div*data[0];
+                    bsplines_data[i] = 0.0;
-            for (int i = 0; i < maxSize; i++)
+                for (int i = 1; i <= PmeOrder; i++)
-                bsplines_data[i] = 0.0;
+                    bsplines_data[i] = data[i-1];
-            for (int i = 1; i <= PmeOrder; i++)
-                bsplines_data[i] = data[i-1];
+                // Evaluate the actual bspline moduli for X/Y/Z.
-            // Evaluate the actual bspline moduli for X/Y/Z.
+                for(int dim = 0; dim < 3; dim++) {
+                    int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
-            for(int dim = 0; dim < 3; dim++) {
+                    vector<double> moduli(ndata);
-                int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
+                    for (int i = 0; i < ndata; i++) {
-                vector<double> moduli(ndata);
+                        double sc = 0.0;
-                for (int i = 0; i < ndata; i++) {
+                        double ss = 0.0;
-                    double sc = 0.0;
+                        for (int j = 0; j < ndata; j++) {
-                    double ss = 0.0;
+                            double arg = (2.0*M_PI*i*j)/ndata;
-                    for (int j = 0; j < ndata; j++) {
+                            sc += bsplines_data[j]*cos(arg);
-                        double arg = (2.0*M_PI*i*j)/ndata;
+                            ss += bsplines_data[j]*sin(arg);
-                        sc += bsplines_data[j]*cos(arg);
+                        }
-                        ss += bsplines_data[j]*sin(arg);
+                        moduli[i] = sc*sc+ss*ss;
                    }
-                    moduli[i] = sc*sc+ss*ss;
-                }
-                for (int i = 0; i < ndata; i++)
-                    if (moduli[i] < 1.0e-7)
-                        moduli[i] = (moduli[i-1]+moduli[i+1])*0.5;
-                if (cu.getUseDoublePrecision()) {
-                    if (dim == 0)
-                        pmeBsplineModuliX->upload(moduli);
-                    else if (dim == 1)
-                        pmeBsplineModuliY->upload(moduli);
-                    else
-                        pmeBsplineModuliZ->upload(moduli);
-                }
-                else {
-                    vector<float> modulif(ndata);
                    for (int i = 0; i < ndata; i++)
-                        modulif[i] = (float) moduli[i];
+                        if (moduli[i] < 1.0e-7)
-                    if (dim == 0)
+                            moduli[i] = (moduli[i-1]+moduli[i+1])*0.5;
-                        pmeBsplineModuliX->upload(modulif);
+                    if (cu.getUseDoublePrecision()) {
-                    else if (dim == 1)
+                        if (dim == 0)
-                        pmeBsplineModuliY->upload(modulif);
+                            pmeBsplineModuliX->upload(moduli);
-                    else
+                        else if (dim == 1)
-                        pmeBsplineModuliZ->upload(modulif);
+                            pmeBsplineModuliY->upload(moduli);
+                        else
+                            pmeBsplineModuliZ->upload(moduli);
+                    }
+                    else {
+                        vector<float> modulif(ndata);
+                        for (int i = 0; i < ndata; i++)
+                            modulif[i] = (float) moduli[i];
+                        if (dim == 0)
+                            pmeBsplineModuliX->upload(modulif);
+                        else if (dim == 1)
+                            pmeBsplineModuliY->upload(modulif);
+                        else
+                            pmeBsplineModuliZ->upload(modulif);
+                    }
                }
            }
        }

--- a/platforms/cuda/tests/TestCudaLocalEnergyMinimizer.cpp
+++ b/platforms/cuda/tests/TestCudaLocalEnergyMinimizer.cpp
@@ -7,7 +7,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2010-2012 Stanford University and the Authors.      *
+ * Portions copyright (c) 2010-2014 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -170,6 +170,7 @@ void testVirtualSites() {
    VerletIntegrator integrator(0.01);
    Context context(system, integrator, platform);
    context.setPositions(positions);
+    context.applyConstraints(1e-5);
    State initialState = context.getState(State::Forces | State::Energy);
    LocalEnergyMinimizer::minimize(context, tolerance);
    State finalState = context.getState(State::Forces | State::Energy | State::Positions);

--- a/platforms/cuda/tests/TestCudaNonbondedForce.cpp
+++ b/platforms/cuda/tests/TestCudaNonbondedForce.cpp
@@ -748,7 +748,7 @@ void testChangingParameters() {
    ASSERT_EQUAL_TOL(cuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
 }
-void testParallelComputation(bool useCutoff) {
+void testParallelComputation(NonbondedForce::NonbondedMethod method) {
    System system;
    const int numParticles = 200;
    for (int i = 0; i < numParticles; i++)
@@ -756,9 +756,9 @@ void testParallelComputation(bool useCutoff) {
    NonbondedForce* force = new NonbondedForce();
    for (int i = 0; i < numParticles; i++)
        force->addParticle(i%2-0.5, 0.5, 1.0);
-    if (useCutoff)
+    force->setNonbondedMethod(method);
-        force->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
    system.addForce(force);
+    system.setDefaultPeriodicBoxVectors(Vec3(5,0,0), Vec3(0,5,0), Vec3(0,0,5));
    OpenMM_SFMT::SFMT sfmt;
    init_gen_rand(0, sfmt);
    vector<Vec3> positions(numParticles);
@@ -877,8 +877,9 @@ int main(int argc, char* argv[]) {
        //testBlockInteractions(true);
        testDispersionCorrection();
        testChangingParameters();
-        testParallelComputation(false);
+        testParallelComputation(NonbondedForce::NoCutoff);
-        testParallelComputation(true);
+        testParallelComputation(NonbondedForce::Ewald);
+        testParallelComputation(NonbondedForce::PME);
        testSwitchingFunction(NonbondedForce::CutoffNonPeriodic);
        testSwitchingFunction(NonbondedForce::PME);
    }

--- a/platforms/opencl/src/OpenCLKernels.cpp
+++ b/platforms/opencl/src/OpenCLKernels.cpp
@@ -1492,7 +1492,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
    else
        dispersionCoefficient = 0.0;
    alpha = 0;
-    if (force.getNonbondedMethod() == NonbondedForce::Ewald && cl.getContextIndex() == 0) {
+    if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
        // Compute the Ewald parameters.
        int kmaxx, kmaxy, kmaxz;
@@ -1500,23 +1500,25 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
        defines["EWALD_ALPHA"] = cl.doubleToString(alpha);
        defines["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI));
        defines["USE_EWALD"] = "1";
-        ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
+        if (cl.getContextIndex() == 0) {
+            ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
-        // Create the reciprocal space kernels.
+            // Create the reciprocal space kernels.
-        map<string, string> replacements;
+            map<string, string> replacements;
-        replacements["NUM_ATOMS"] = cl.intToString(numParticles);
+            replacements["NUM_ATOMS"] = cl.intToString(numParticles);
-        replacements["KMAX_X"] = cl.intToString(kmaxx);
+            replacements["KMAX_X"] = cl.intToString(kmaxx);
-        replacements["KMAX_Y"] = cl.intToString(kmaxy);
+            replacements["KMAX_Y"] = cl.intToString(kmaxy);
-        replacements["KMAX_Z"] = cl.intToString(kmaxz);
+            replacements["KMAX_Z"] = cl.intToString(kmaxz);
-        replacements["EXP_COEFFICIENT"] = cl.doubleToString(-1.0/(4.0*alpha*alpha));
+            replacements["EXP_COEFFICIENT"] = cl.doubleToString(-1.0/(4.0*alpha*alpha));
-        cl::Program program = cl.createProgram(OpenCLKernelSources::ewald, replacements);
+            cl::Program program = cl.createProgram(OpenCLKernelSources::ewald, replacements);
-        ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums");
+            ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums");
-        ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces");
+            ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces");
-        int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2));
+            int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2));
-        cosSinSums = new OpenCLArray(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
+            cosSinSums = new OpenCLArray(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
-    }
+        }
-    else if (force.getNonbondedMethod() == NonbondedForce::PME && cl.getContextIndex() == 0) {
+    }
+    else if (force.getNonbondedMethod() == NonbondedForce::PME) {
        // Compute the PME parameters.
        int gridSizeX, gridSizeY, gridSizeZ;
@@ -1527,119 +1529,121 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
        defines["EWALD_ALPHA"] = cl.doubleToString(alpha);
        defines["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI));
        defines["USE_EWALD"] = "1";
-        ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
+        if (cl.getContextIndex() == 0) {
-        pmeDefines["PME_ORDER"] = cl.intToString(PmeOrder);
+            ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
-        pmeDefines["NUM_ATOMS"] = cl.intToString(numParticles);
+            pmeDefines["PME_ORDER"] = cl.intToString(PmeOrder);
-        pmeDefines["RECIP_EXP_FACTOR"] = cl.doubleToString(M_PI*M_PI/(alpha*alpha));
+            pmeDefines["NUM_ATOMS"] = cl.intToString(numParticles);
-        pmeDefines["GRID_SIZE_X"] = cl.intToString(gridSizeX);
+            pmeDefines["RECIP_EXP_FACTOR"] = cl.doubleToString(M_PI*M_PI/(alpha*alpha));
-        pmeDefines["GRID_SIZE_Y"] = cl.intToString(gridSizeY);
+            pmeDefines["GRID_SIZE_X"] = cl.intToString(gridSizeX);
-        pmeDefines["GRID_SIZE_Z"] = cl.intToString(gridSizeZ);
+            pmeDefines["GRID_SIZE_Y"] = cl.intToString(gridSizeY);
-        pmeDefines["EPSILON_FACTOR"] = cl.doubleToString(sqrt(ONE_4PI_EPS0));
+            pmeDefines["GRID_SIZE_Z"] = cl.intToString(gridSizeZ);
-        bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
+            pmeDefines["EPSILON_FACTOR"] = cl.doubleToString(sqrt(ONE_4PI_EPS0));
-        if (deviceIsCpu)
+            bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
-            pmeDefines["DEVICE_IS_CPU"] = "1";
+            if (deviceIsCpu)
-        if (cl.getPlatformData().useCpuPme) {
+                pmeDefines["DEVICE_IS_CPU"] = "1";
-            // Create the CPU PME kernel.
+            if (cl.getPlatformData().useCpuPme) {
+                // Create the CPU PME kernel.
-            try {
-                cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cl.getPlatformData().context);
+                try {
-                cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
+                    cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cl.getPlatformData().context);
-                cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
+                    cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
-                cl::Kernel addForcesKernel = cl::Kernel(program, "addForces");
+                    cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
-                pmeio = new PmeIO(cl, addForcesKernel);
+                    cl::Kernel addForcesKernel = cl::Kernel(program, "addForces");
-                cl.addPreComputation(new PmePreComputation(cl, cpuPme, *pmeio));
+                    pmeio = new PmeIO(cl, addForcesKernel);
-                cl.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
+                    cl.addPreComputation(new PmePreComputation(cl, cpuPme, *pmeio));
-            }
+                    cl.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
-            catch (OpenMMException& ex) {
-                // The CPU PME plugin isn't available.
-            }
-        }
-        if (pmeio == NULL) {
-            // Create required data structures.
-            int elementSize = (cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
-            pmeGrid = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid");
-            cl.addAutoclearBuffer(*pmeGrid);
-            pmeGrid2 = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid2");
-            pmeBsplineModuliX = new OpenCLArray(cl, gridSizeX, elementSize, "pmeBsplineModuliX");
-            pmeBsplineModuliY = new OpenCLArray(cl, gridSizeY, elementSize, "pmeBsplineModuliY");
-            pmeBsplineModuliZ = new OpenCLArray(cl, gridSizeZ, elementSize, "pmeBsplineModuliZ");
-            pmeBsplineTheta = new OpenCLArray(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
-            pmeAtomRange = OpenCLArray::create<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
-            pmeAtomGridIndex = OpenCLArray::create<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
-            sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms());
-            fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ);
-            // Initialize the b-spline moduli.
-            int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
-            vector<double> data(PmeOrder);
-            vector<double> ddata(PmeOrder);
-            vector<double> bsplines_data(maxSize);
-            data[PmeOrder-1] = 0.0;
-            data[1] = 0.0;
-            data[0] = 1.0;
-            for (int i = 3; i < PmeOrder; i++) {
-                double div = 1.0/(i-1.0);
-                data[i-1] = 0.0;
-                for (int j = 1; j < (i-1); j++)
-                    data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
-                data[0] = div*data[0];
-            }
-            // Differentiate.
-            ddata[0] = -data[0];
-            for (int i = 1; i < PmeOrder; i++)
-                ddata[i] = data[i-1]-data[i];
-            double div = 1.0/(PmeOrder-1);
-            data[PmeOrder-1] = 0.0;
-            for (int i = 1; i < (PmeOrder-1); i++)
-                data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
-            data[0] = div*data[0];
-            for (int i = 0; i < maxSize; i++)
-                bsplines_data[i] = 0.0;
-            for (int i = 1; i <= PmeOrder; i++)
-                bsplines_data[i] = data[i-1];
-            // Evaluate the actual bspline moduli for X/Y/Z.
-            for(int dim = 0; dim < 3; dim++) {
-                int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
-                vector<cl_double> moduli(ndata);
-                for (int i = 0; i < ndata; i++) {
-                    double sc = 0.0;
-                    double ss = 0.0;
-                    for (int j = 0; j < ndata; j++) {
-                        double arg = (2.0*M_PI*i*j)/ndata;
-                        sc += bsplines_data[j]*cos(arg);
-                        ss += bsplines_data[j]*sin(arg);
-                    }
-                    moduli[i] = (float) (sc*sc+ss*ss);
                }
-                for (int i = 0; i < ndata; i++)
+                catch (OpenMMException& ex) {
-                {
+                    // The CPU PME plugin isn't available.
-                    if (moduli[i] < 1.0e-7)
-                        moduli[i] = (moduli[i-1]+moduli[i+1])*0.5f;
                }
-                if (cl.getUseDoublePrecision()) {
+            }
-                    if (dim == 0)
+            if (pmeio == NULL) {
-                        pmeBsplineModuliX->upload(moduli);
+                // Create required data structures.
-                    else if (dim == 1)
-                        pmeBsplineModuliY->upload(moduli);
+                int elementSize = (cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
-                    else
+                pmeGrid = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid");
-                        pmeBsplineModuliZ->upload(moduli);
+                cl.addAutoclearBuffer(*pmeGrid);
+                pmeGrid2 = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid2");
+                pmeBsplineModuliX = new OpenCLArray(cl, gridSizeX, elementSize, "pmeBsplineModuliX");
+                pmeBsplineModuliY = new OpenCLArray(cl, gridSizeY, elementSize, "pmeBsplineModuliY");
+                pmeBsplineModuliZ = new OpenCLArray(cl, gridSizeZ, elementSize, "pmeBsplineModuliZ");
+                pmeBsplineTheta = new OpenCLArray(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
+                pmeAtomRange = OpenCLArray::create<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
+                pmeAtomGridIndex = OpenCLArray::create<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
+                sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms());
+                fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ);
+                // Initialize the b-spline moduli.
+                int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
+                vector<double> data(PmeOrder);
+                vector<double> ddata(PmeOrder);
+                vector<double> bsplines_data(maxSize);
+                data[PmeOrder-1] = 0.0;
+                data[1] = 0.0;
+                data[0] = 1.0;
+                for (int i = 3; i < PmeOrder; i++) {
+                    double div = 1.0/(i-1.0);
+                    data[i-1] = 0.0;
+                    for (int j = 1; j < (i-1); j++)
+                        data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
+                    data[0] = div*data[0];
                }
-                else {
-                    vector<float> modulif(ndata);
+                // Differentiate.
+                ddata[0] = -data[0];
+                for (int i = 1; i < PmeOrder; i++)
+                    ddata[i] = data[i-1]-data[i];
+                double div = 1.0/(PmeOrder-1);
+                data[PmeOrder-1] = 0.0;
+                for (int i = 1; i < (PmeOrder-1); i++)
+                    data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
+                data[0] = div*data[0];
+                for (int i = 0; i < maxSize; i++)
+                    bsplines_data[i] = 0.0;
+                for (int i = 1; i <= PmeOrder; i++)
+                    bsplines_data[i] = data[i-1];
+                // Evaluate the actual bspline moduli for X/Y/Z.
+                for(int dim = 0; dim < 3; dim++) {
+                    int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
+                    vector<cl_double> moduli(ndata);
+                    for (int i = 0; i < ndata; i++) {
+                        double sc = 0.0;
+                        double ss = 0.0;
+                        for (int j = 0; j < ndata; j++) {
+                            double arg = (2.0*M_PI*i*j)/ndata;
+                            sc += bsplines_data[j]*cos(arg);
+                            ss += bsplines_data[j]*sin(arg);
+                        }
+                        moduli[i] = (float) (sc*sc+ss*ss);
+                    }
                    for (int i = 0; i < ndata; i++)
-                        modulif[i] = (float) moduli[i];
+                    {
-                    if (dim == 0)
+                        if (moduli[i] < 1.0e-7)
-                        pmeBsplineModuliX->upload(modulif);
+                            moduli[i] = (moduli[i-1]+moduli[i+1])*0.5f;
-                    else if (dim == 1)
+                    }
-                        pmeBsplineModuliY->upload(modulif);
+                    if (cl.getUseDoublePrecision()) {
-                    else
+                        if (dim == 0)
-                        pmeBsplineModuliZ->upload(modulif);
+                            pmeBsplineModuliX->upload(moduli);
+                        else if (dim == 1)
+                            pmeBsplineModuliY->upload(moduli);
+                        else
+                            pmeBsplineModuliZ->upload(moduli);
+                    }
+                    else {
+                        vector<float> modulif(ndata);
+                        for (int i = 0; i < ndata; i++)
+                            modulif[i] = (float) moduli[i];
+                        if (dim == 0)
+                            pmeBsplineModuliX->upload(modulif);
+                        else if (dim == 1)
+                            pmeBsplineModuliY->upload(modulif);
+                        else
+                            pmeBsplineModuliZ->upload(modulif);
+                    }
                }
            }
        }

--- a/platforms/opencl/tests/TestOpenCLLocalEnergyMinimizer.cpp
+++ b/platforms/opencl/tests/TestOpenCLLocalEnergyMinimizer.cpp
@@ -7,7 +7,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2010 Stanford University and the Authors.           *
+ * Portions copyright (c) 2010-2014 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -170,6 +170,7 @@ void testVirtualSites() {
    VerletIntegrator integrator(0.01);
    Context context(system, integrator, platform);
    context.setPositions(positions);
+    context.applyConstraints(1e-5);
    State initialState = context.getState(State::Forces | State::Energy);
    LocalEnergyMinimizer::minimize(context, tolerance);
    State finalState = context.getState(State::Forces | State::Energy | State::Positions);

--- a/platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
+++ b/platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
@@ -751,7 +751,7 @@ void testChangingParameters() {
    ASSERT_EQUAL_TOL(clState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
 }
-void testParallelComputation(bool useCutoff) {
+void testParallelComputation(NonbondedForce::NonbondedMethod method) {
    System system;
    const int numParticles = 200;
    for (int i = 0; i < numParticles; i++)
@@ -759,9 +759,9 @@ void testParallelComputation(bool useCutoff) {
    NonbondedForce* force = new NonbondedForce();
    for (int i = 0; i < numParticles; i++)
        force->addParticle(i%2-0.5, 0.5, 1.0);
-    if (useCutoff)
+    force->setNonbondedMethod(method);
-        force->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
    system.addForce(force);
+    system.setDefaultPeriodicBoxVectors(Vec3(5,0,0), Vec3(0,5,0), Vec3(0,0,5));
    OpenMM_SFMT::SFMT sfmt;
    init_gen_rand(0, sfmt);
    vector<Vec3> positions(numParticles);
@@ -880,8 +880,9 @@ int main(int argc, char* argv[]) {
 //        testBlockInteractions(true);
        testDispersionCorrection();
        testChangingParameters();
-        testParallelComputation(false);
+        testParallelComputation(NonbondedForce::NoCutoff);
-        testParallelComputation(true);
+        testParallelComputation(NonbondedForce::Ewald);
+        testParallelComputation(NonbondedForce::PME);
        testSwitchingFunction(NonbondedForce::CutoffNonPeriodic);
        testSwitchingFunction(NonbondedForce::PME);
    }

--- a/platforms/reference/src/ReferenceTabulatedFunction.cpp
+++ b/platforms/reference/src/ReferenceTabulatedFunction.cpp
@@ -34,12 +34,19 @@
 #include "openmm/internal/SplineFitter.h"
 #ifdef _MSC_VER
+#if _MSC_VER < 1800
 /**
 * We need to define this ourselves, since Visual Studio is missing round() from cmath.
 */
 static int round(double x) {
    return (int) (x+0.5);
 }
+#else
+#include <cmath>
+#endif  // MSC_VER < 1800
 #else
 #include <cmath>
 #endif

--- a/platforms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
+++ b/platforms/reference/src/SimTKReference/ReferenceLJCoulombIxn.cpp
@@ -25,6 +25,7 @@
 #include <string.h>
 #include <sstream>
 #include <complex>
+#include <algorithm>
 #include "SimTKOpenMMCommon.h"
 #include "SimTKOpenMMLog.h"

--- a/platforms/reference/src/SimTKReference/ReferenceNeighborList.cpp
+++ b/platforms/reference/src/SimTKReference/ReferenceNeighborList.cpp
@@ -4,6 +4,7 @@
 #include <cmath>
 #include <iostream>
 #include <cassert>
+#include <algorithm>
 using namespace std;

--- a/platforms/reference/src/SimTKReference/ReferenceVariableStochasticDynamics.cpp
+++ b/platforms/reference/src/SimTKReference/ReferenceVariableStochasticDynamics.cpp
@@ -24,6 +24,7 @@
 #include <cstring>
 #include <sstream>
+#include <algorithm>
 #include "SimTKOpenMMCommon.h"
 #include "SimTKOpenMMLog.h"

--- a/platforms/reference/src/SimTKReference/ReferenceVariableVerletDynamics.cpp
+++ b/platforms/reference/src/SimTKReference/ReferenceVariableVerletDynamics.cpp
@@ -24,6 +24,7 @@
 #include <string.h>
 #include <sstream>
+#include <algorithm>
 #include "SimTKOpenMMCommon.h"
 #include "SimTKOpenMMLog.h"

--- a/platforms/reference/tests/TestReferenceLocalEnergyMinimizer.cpp
+++ b/platforms/reference/tests/TestReferenceLocalEnergyMinimizer.cpp
@@ -7,7 +7,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2010 Stanford University and the Authors.           *
+ * Portions copyright (c) 2010-2014 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -171,6 +171,7 @@ void testVirtualSites() {
    VerletIntegrator integrator(0.01);
    Context context(system, integrator, platform);
    context.setPositions(positions);
+    context.applyConstraints(1e-5);
    State initialState = context.getState(State::Forces | State::Energy);
    LocalEnergyMinimizer::minimize(context, tolerance);
    State finalState = context.getState(State::Forces | State::Energy | State::Positions);

--- a/plugins/drude/openmmapi/src/DrudeForceImpl.cpp
+++ b/plugins/drude/openmmapi/src/DrudeForceImpl.cpp
@@ -141,7 +141,8 @@ void DrudeForceImpl::initialize(ContextImpl& context) {
 }
 double DrudeForceImpl::calcForcesAndEnergy(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
-    return kernel.getAs<CalcDrudeForceKernel>().execute(context, includeForces, includeEnergy);
+    if ((groups&(1<<owner.getForceGroup())) != 0)
+        return kernel.getAs<CalcDrudeForceKernel>().execute(context, includeForces, includeEnergy);
 }
 std::vector<std::string> DrudeForceImpl::getKernelNames() {