Merge pull request #1837 from peastman/cv

Created CustomCVForce

Merge pull request #1837 from peastman/cv
Created CustomCVForce
7cdd6d16 · peastman · GitHub · ad5cc98c · 711c3a5a · 7cdd6d16
Commit 7cdd6d16 authored Jun 28, 2017 by peastman Committed by GitHub Jun 28, 2017
20 changed files
--- a/platforms/cuda/src/CudaPlatform.cpp
+++ b/platforms/cuda/src/CudaPlatform.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2016 Stanford University and the Authors.      *
+ * Portions copyright (c) 2008-2017 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -91,6 +91,7 @@ CudaPlatform::CudaPlatform() {
    registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
+    registerKernelFactory(CalcCustomCVForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
    registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
    registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
@@ -198,7 +199,23 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
    if (threadsEnv != NULL)
        stringstream(threadsEnv) >> threads;
    context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue,
-            hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads));
+            hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads, NULL));
+}
+void CudaPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const {
+    Platform& platform = originalContext.getPlatform();
+    string devicePropValue = platform.getPropertyValue(originalContext.getOwner(), CudaDeviceIndex());
+    string blockingPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaUseBlockingSync());
+    string precisionPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaPrecision());
+    string cpuPmePropValue = platform.getPropertyValue(originalContext.getOwner(), CudaUseCpuPme());
+    string compilerPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaCompiler());
+    string tempPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaTempDirectory());
+    string hostCompilerPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaHostCompiler());
+    string pmeStreamPropValue = platform.getPropertyValue(originalContext.getOwner(), CudaDisablePmeStream());
+    string deterministicForcesValue = platform.getPropertyValue(originalContext.getOwner(), CudaDeterministicForces());
+    int threads = reinterpret_cast<PlatformData*>(originalContext.getPlatformData())->threads.getNumThreads();
+    context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue,
+            hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads, &originalContext));
 }
 void CudaPlatform::contextDestroyed(ContextImpl& context) const {
@@ -208,7 +225,7 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
 CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
            const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty, const string& pmeStreamProperty,
-            const string& deterministicForcesProperty, int numThreads) :
+            const string& deterministicForcesProperty, int numThreads, ContextImpl* originalContext) :
                context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads) {
    bool blocking = (blockingProperty == "true");
    vector<string> devices;
@@ -218,16 +235,19 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
        searchPos = nextPos+1;
    }
    devices.push_back(deviceIndexProperty.substr(searchPos));
+    PlatformData* originalData = NULL;
+    if (originalContext != NULL)
+        originalData = reinterpret_cast<PlatformData*>(originalContext->getPlatformData());
    try {
        for (int i = 0; i < (int) devices.size(); i++) {
            if (devices[i].length() > 0) {
                int deviceIndex;
                stringstream(devices[i]) >> deviceIndex;
-                contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this));
+                contexts.push_back(new CudaContext(system, deviceIndex, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this, (originalData == NULL ? NULL : originalData->contexts[i])));
            }
        }
        if (contexts.size() == 0)
-            contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this));
+            contexts.push_back(new CudaContext(system, -1, blocking, precisionProperty, compilerProperty, tempProperty, hostCompilerProperty, *this, (originalData == NULL ? NULL : originalData->contexts[0])));
    }
    catch (...) {
        // If an exception was thrown, do our best to clean up memory.

--- a/platforms/cuda/src/kernels/customCVForce.cu
+++ b/platforms/cuda/src/kernels/customCVForce.cu
+/**
+ * Copy the positions and velocities to the inner context.
+ */
+extern "C" __global__ void copyState(real4* posq, real4* posqCorrection, mixed4* velm, int* __restrict__ atomOrder,
+        real4* innerPosq, real4* innerPosqCorrection, mixed4* innerVelm, int* __restrict__ innerInvAtomOrder,
+        int numAtoms) {
+    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x) {
+        int index = innerInvAtomOrder[atomOrder[i]];
+        innerPosq[index] = posq[i];
+        innerVelm[index] = velm[i];
+#ifdef USE_MIXED_PRECISION
+        innerPosqCorrection[index] = posqCorrection[i];
+#endif
+    }
+}
+/**
+ * Copy the forces back to the main context.
+ */
+extern "C" __global__ void copyForces(long long* forces, int* __restrict__ invAtomOrder, long long* innerForces,
+        int* __restrict__ innerAtomOrder, int numAtoms, int paddedNumAtoms) {
+    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x) {
+        int index = invAtomOrder[innerAtomOrder[i]];
+        forces[index] = innerForces[i];
+        forces[index+paddedNumAtoms] = innerForces[i+paddedNumAtoms];
+        forces[index+paddedNumAtoms*2] = innerForces[i+paddedNumAtoms*2];
+    }
+}
+/**
+ * Add all the forces from the CVs.
+ */
+extern "C" __global__ void addForces(long long* forces, int bufferSize
+    PARAMETER_ARGUMENTS) {
+    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < bufferSize; i += blockDim.x*gridDim.x) {
+        ADD_FORCES
+    }
+}
--- a/platforms/cuda/src/kernels/utilities.cu
+++ b/platforms/cuda/src/kernels/utilities.cu
@@ -73,6 +73,25 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
    clearSingleBuffer(buffer6, size6);
 }
+/**
+ * Sum the energy buffer.
+ */
+__global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __restrict__ result, int bufferSize, int workGroupSize) {
+    extern __shared__ mixed tempBuffer[];
+    const unsigned int thread = threadIdx.x;
+    mixed sum = 0;
+    for (unsigned int index = thread; index < bufferSize; index += blockDim.x)
+        sum += energyBuffer[index];
+    tempBuffer[thread] = sum;
+    for (int i = 1; i < workGroupSize; i *= 2) {
+        __syncthreads();
+        if (thread%(i*2) == 0 && thread+i < workGroupSize)
+            tempBuffer[thread] += tempBuffer[thread+i];
+    }
+    if (thread == 0)
+        *result = tempBuffer[0];
+}
 /**
 * Record the atomic charges into the posq array.
 */

--- a/platforms/cuda/tests/TestCudaCustomCVForce.cpp
+++ b/platforms/cuda/tests/TestCudaCustomCVForce.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2017 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+#include "CudaTests.h"
+#include "TestCustomCVForce.h"
+void runPlatformTests() {
+}
--- a/platforms/cuda/tests/TestCudaFFT3D.cpp
+++ b/platforms/cuda/tests/TestCudaFFT3D.cpp
@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
    system.addParticle(0.0);
    CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
            platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
-            platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1);
+            platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1, NULL);
    CudaContext& context = *platformData.contexts[0];
    context.initialize();
    OpenMM_SFMT::SFMT sfmt;

--- a/platforms/cuda/tests/TestCudaRandom.cpp
+++ b/platforms/cuda/tests/TestCudaRandom.cpp
@@ -56,7 +56,7 @@ void testGaussian() {
        system.addParticle(1.0);
    CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
            platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
-            platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1);
+            platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1, NULL);
    CudaContext& context = *platformData.contexts[0];
    context.initialize();
    context.getIntegrationUtilities().initRandomNumberGenerator(0);

--- a/platforms/cuda/tests/TestCudaSort.cpp
+++ b/platforms/cuda/tests/TestCudaSort.cpp
@@ -66,7 +66,7 @@ void verifySorting(vector<float> array) {
    system.addParticle(0.0);
    CudaPlatform::PlatformData platformData(NULL, system, "", "true", platform.getPropertyDefaultValue("CudaPrecision"), "false",
            platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()),
-            platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1);
+            platform.getPropertyDefaultValue(CudaPlatform::CudaHostCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaDisablePmeStream()), "false", 1, NULL);
    CudaContext& context = *platformData.contexts[0];
    context.initialize();
    CudaArray data(context, array.size(), 4, "sortData");

--- a/platforms/opencl/include/OpenCLContext.h
+++ b/platforms/opencl/include/OpenCLContext.h
@@ -163,7 +163,8 @@ public:
    class ForcePostComputation;
    static const int ThreadBlockSize;
    static const int TileSize;
-    OpenCLContext(const System& system, int platformIndex, int deviceIndex, const std::string& precision, OpenCLPlatform::PlatformData& platformData);
+    OpenCLContext(const System& system, int platformIndex, int deviceIndex, const std::string& precision, OpenCLPlatform::PlatformData& platformData,
+        OpenCLContext* originalContext);
    ~OpenCLContext();
    /**
     * This is called to initialize internal data structures after all Forces in the system
@@ -363,9 +364,13 @@ public:
     */
    void reduceBuffer(OpenCLArray& array, int numBuffers);
    /**
-     * Sum the buffesr containing forces.
+     * Sum the buffers containing forces.
     */
    void reduceForces();
+    /**
+     * Sum the buffer containing energy.
+     */
+    double reduceEnergy();
    /**
     * Get the current simulation time.
     */
@@ -749,6 +754,7 @@ private:
    cl::Kernel clearSixBuffersKernel;
    cl::Kernel reduceReal4Kernel;
    cl::Kernel reduceForcesKernel;
+    cl::Kernel reduceEnergyKernel;
    cl::Kernel setChargesKernel;
    std::vector<OpenCLForceInfo*> forces;
    std::vector<Molecule> molecules;
@@ -763,6 +769,7 @@ private:
    OpenCLArray* forceBuffers;
    OpenCLArray* longForceBuffer;
    OpenCLArray* energyBuffer;
+    OpenCLArray* energySum;
    OpenCLArray* energyParamDerivBuffer;
    OpenCLArray* atomIndexDevice;
    OpenCLArray* chargeBuffer;

--- a/platforms/opencl/include/OpenCLKernels.h
+++ b/platforms/opencl/include/OpenCLKernels.h
@@ -37,6 +37,7 @@
 #include "openmm/internal/CompiledExpressionSet.h"
 #include "openmm/internal/CustomIntegratorUtilities.h"
 #include "lepton/CompiledExpression.h"
+#include "lepton/ExpressionProgram.h"
 #include "openmm/System.h"
 namespace OpenMM {
@@ -1207,6 +1208,54 @@ private:
    cl::Kernel framesKernel, blockBoundsKernel, neighborsKernel, forceKernel, torqueKernel;
 };
+/**
+ * This kernel is invoked by CustomCVForce to calculate the forces acting on the system and the energy of the system.
+ */
+class OpenCLCalcCustomCVForceKernel : public CalcCustomCVForceKernel {
+public:
+    OpenCLCalcCustomCVForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcCustomCVForceKernel(name, platform),
+            cl(cl), hasInitializedKernels(false), invAtomOrder(NULL), innerInvAtomOrder(NULL) {
+    }
+    ~OpenCLCalcCustomCVForceKernel();
+    /**
+     * Initialize the kernel.
+     *
+     * @param system     the System this kernel will be applied to
+     * @param force      the CustomCVForce this kernel will be used for
+     * @param innerContext   the context created by the CustomCVForce for computing collective variables
+     */
+    void initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext);
+    /**
+     * Execute the kernel to calculate the forces and/or energy.
+     *
+     * @param context        the context in which to execute this kernel
+     * @param innerContext   the context created by the CustomCVForce for computing collective variables
+     * @param includeForces  true if forces should be calculated
+     * @param includeEnergy  true if the energy should be calculated
+     * @return the potential energy due to the force
+     */
+    double execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy);
+    /**
+     * Copy state information to the inner context.
+     *
+     * @param context        the context in which to execute this kernel
+     * @param innerContext   the context created by the CustomCVForce for computing collective variables
+     */
+    void copyState(ContextImpl& context, ContextImpl& innerContext);
+private:
+    class ReorderListener;
+    OpenCLContext& cl;
+    bool hasInitializedKernels;
+    Lepton::ExpressionProgram energyExpression;
+    std::vector<std::string> variableNames, paramDerivNames, globalParameterNames;
+    std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
+    std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
+    std::vector<OpenCLArray*> cvForces;
+    OpenCLArray* invAtomOrder;
+    OpenCLArray* innerInvAtomOrder;
+    cl::Kernel copyStateKernel, copyForcesKernel, addForcesKernel;
+};
 /**
 * This kernel is invoked by VerletIntegrator to take one time step.
 */

--- a/platforms/opencl/include/OpenCLPlatform.h
+++ b/platforms/opencl/include/OpenCLPlatform.h
@@ -53,6 +53,7 @@ public:
    const std::string& getPropertyValue(const Context& context, const std::string& property) const;
    void setPropertyValue(Context& context, const std::string& property, const std::string& value) const;
    void contextCreated(ContextImpl& context, const std::map<std::string, std::string>& properties) const;
+    void linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const;
    void contextDestroyed(ContextImpl& context) const;
    /**
     * This is the name of the parameter for selecting which OpenCL device or devices to use.
@@ -108,7 +109,7 @@ public:
 class OPENMM_EXPORT_OPENCL OpenCLPlatform::PlatformData {
 public:
    PlatformData(const System& system, const std::string& platformPropValue, const std::string& deviceIndexProperty, const std::string& precisionProperty,
-            const std::string& cpuPmeProperty, const std::string& pmeStreamProperty, int numThreads);
+            const std::string& cpuPmeProperty, const std::string& pmeStreamProperty, int numThreads, ContextImpl* originalContext);
    ~PlatformData();
    void initializeContexts(const System& system);
    void syncContexts();

--- a/platforms/opencl/src/OpenCLContext.cpp
+++ b/platforms/opencl/src/OpenCLContext.cpp
@@ -67,9 +67,9 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
    std::cerr << "OpenCL internal error: " << errinfo << std::endl;
 }
-OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData) :
+OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, const string& precision, OpenCLPlatform::PlatformData& platformData, OpenCLContext* originalContext) :
        system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), atomsWereReordered(false), posq(NULL),
-        posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL),
+        posqCorrection(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), energySum(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL),
        chargeBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
    if (precision == "single") {
        useDoublePrecision = false;
@@ -261,8 +261,14 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
        vector<cl::Device> contextDevices;
        contextDevices.push_back(device);
        cl_context_properties cprops[] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platforms[bestPlatform](), 0};
-        context = cl::Context(contextDevices, cprops, errorCallback);
+        if (originalContext == NULL) {
-        defaultQueue = cl::CommandQueue(context, device);
+            context = cl::Context(contextDevices, cprops, errorCallback);
+            defaultQueue = cl::CommandQueue(context, device);
+        }
+        else {
+            context = originalContext->context;
+            defaultQueue = originalContext->defaultQueue;
+        }
        currentQueue = defaultQueue;
        numAtoms = system.getNumParticles();
        paddedNumAtoms = TileSize*((numAtoms+TileSize-1)/TileSize);
@@ -309,6 +315,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
    reduceReal4Kernel = cl::Kernel(utilities, "reduceReal4Buffer");
    if (supports64BitGlobalAtomics)
        reduceForcesKernel = cl::Kernel(utilities, "reduceForces");
+    reduceEnergyKernel = cl::Kernel(utilities, "reduceEnergy");
    setChargesKernel = cl::Kernel(utilities, "setCharges");
    // Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
@@ -436,6 +443,8 @@ OpenCLContext::~OpenCLContext() {
        delete longForceBuffer;
    if (energyBuffer != NULL)
        delete energyBuffer;
+    if (energySum != NULL)
+        delete energySum;
    if (energyParamDerivBuffer != NULL)
        delete energyParamDerivBuffer;
    if (atomIndexDevice != NULL)
@@ -465,11 +474,19 @@ void OpenCLContext::initialize() {
        forceBuffers = OpenCLArray::create<mm_double4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
        force = OpenCLArray::create<mm_double4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
        energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer");
+        energySum = OpenCLArray::create<cl_double>(*this, 1, "energySum");
    }
-    else {
+    else if (useMixedPrecision) {
        forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
        force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
        energyBuffer = OpenCLArray::create<cl_double>(*this, energyBufferSize, "energyBuffer");
+        energySum = OpenCLArray::create<cl_double>(*this, 1, "energySum");
+    }
+    else {
+        forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
+        force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
+        energyBuffer = OpenCLArray::create<cl_float>(*this, energyBufferSize, "energyBuffer");
+        energySum = OpenCLArray::create<cl_float>(*this, 1, "energySum");
    }
    if (supports64BitGlobalAtomics) {
        longForceBuffer = OpenCLArray::create<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer");
@@ -750,6 +767,28 @@ void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
    executeKernel(reduceReal4Kernel, bufferSize, 128);
 }
+double OpenCLContext::reduceEnergy() {
+    int workGroupSize  = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
+    if (workGroupSize > 512)
+        workGroupSize = 512;
+    reduceEnergyKernel.setArg<cl::Buffer>(0, energyBuffer->getDeviceBuffer());
+    reduceEnergyKernel.setArg<cl::Buffer>(1, energySum->getDeviceBuffer());
+    reduceEnergyKernel.setArg<cl_int>(2, energyBuffer->getSize());
+    reduceEnergyKernel.setArg<cl_int>(3, workGroupSize);
+    reduceEnergyKernel.setArg(4, workGroupSize*energyBuffer->getElementSize(), NULL);
+    executeKernel(reduceEnergyKernel, workGroupSize, workGroupSize);
+    if (getUseDoublePrecision() || getUseMixedPrecision()) {
+        double energy;
+        energySum->download(&energy);
+        return energy;
+    }
+    else {
+        float energy;
+        energySum->download(&energy);
+        return energy;
+    }
+}
 void OpenCLContext::setCharges(const vector<double>& charges) {
    if (chargeBuffer == NULL)
        chargeBuffer = new OpenCLArray(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");

--- a/platforms/opencl/src/OpenCLKernelFactory.cpp
+++ b/platforms/opencl/src/OpenCLKernelFactory.cpp
@@ -106,6 +106,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
        return new OpenCLCalcCustomCentroidBondForceKernel(name, platform, cl, context.getSystem());
    if (name == CalcCustomCompoundBondForceKernel::Name())
        return new OpenCLCalcCustomCompoundBondForceKernel(name, platform, cl, context.getSystem());
+    if (name == CalcCustomCVForceKernel::Name())
+        return new OpenCLCalcCustomCVForceKernel(name, platform, cl);
    if (name == CalcCustomManyParticleForceKernel::Name())
        return new OpenCLCalcCustomManyParticleForceKernel(name, platform, cl, context.getSystem());
    if (name == CalcGayBerneForceKernel::Name())

--- a/platforms/opencl/src/OpenCLKernels.cpp
+++ b/platforms/opencl/src/OpenCLKernels.cpp
@@ -139,21 +139,8 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
        sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
    cl.reduceForces();
    cl.getIntegrationUtilities().distributeForcesFromVirtualSites();
-    if (includeEnergy) {
+    if (includeEnergy)
-        OpenCLArray& energyArray = cl.getEnergyBuffer();
+        sum += cl.reduceEnergy();
-        if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
-            double* energy = (double*) cl.getPinnedBuffer();
-            energyArray.download(energy);
-            for (int i = 0; i < energyArray.getSize(); i++)
-                sum += energy[i];
-        }
-        else {
-            float* energy = (float*) cl.getPinnedBuffer();
-            energyArray.download(energy);
-            for (int i = 0; i < energyArray.getSize(); i++)
-                sum += energy[i];
-        }
-    }
    if (!cl.getForcesValid())
        valid = false;
    return sum;
@@ -6874,6 +6861,191 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
    exclusionStartIndex->upload(startIndexVec);
 }
+class OpenCLCalcCustomCVForceKernel::ReorderListener : public OpenCLContext::ReorderListener {
+public:
+    ReorderListener(OpenCLContext& cl, OpenCLArray& invAtomOrder) : cl(cl), invAtomOrder(invAtomOrder) {
+    }
+    void execute() {
+        vector<cl_int> invOrder(cl.getPaddedNumAtoms());
+        const vector<int>& order = cl.getAtomIndex();
+        for (int i = 0; i < order.size(); i++)
+            invOrder[order[i]] = i;
+        invAtomOrder.upload(invOrder);
+    }
+private:
+    OpenCLContext& cl;
+    OpenCLArray& invAtomOrder;
+};
+OpenCLCalcCustomCVForceKernel::~OpenCLCalcCustomCVForceKernel() {
+    for (auto force : cvForces)
+        delete force;
+    if (invAtomOrder != NULL)
+        delete invAtomOrder;
+    if (innerInvAtomOrder != NULL)
+        delete innerInvAtomOrder;
+}
+void OpenCLCalcCustomCVForceKernel::initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext) {
+    int numCVs = force.getNumCollectiveVariables();
+    cl.addForce(new OpenCLForceInfo(1));
+    for (int i = 0; i < force.getNumGlobalParameters(); i++)
+        globalParameterNames.push_back(force.getGlobalParameterName(i));
+    // Create custom functions for the tabulated functions.
+    map<string, Lepton::CustomFunction*> functions;
+    for (int i = 0; i < (int) force.getNumTabulatedFunctions(); i++)
+        functions[force.getTabulatedFunctionName(i)] = createReferenceTabulatedFunction(force.getTabulatedFunction(i));
+    // Create the expressions.
+    Lepton::ParsedExpression energyExpr = Lepton::Parser::parse(force.getEnergyFunction(), functions);
+    energyExpression = energyExpr.createProgram();
+    for (int i = 0; i < numCVs; i++) {
+        string name = force.getCollectiveVariableName(i);
+        variableNames.push_back(name);
+        variableDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
+    }
+    for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
+        string name = force.getEnergyParameterDerivativeName(i);
+        paramDerivNames.push_back(name);
+        paramDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
+        cl.addEnergyParameterDerivative(name);
+    }
+    // Delete the custom functions.
+    for (auto& function : functions)
+        delete function.second;
+    // Copy parameter derivatives from the inner context.
+    OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
+    for (auto& param : cl2.getEnergyParamDerivNames())
+        cl.addEnergyParameterDerivative(param);
+    // Create arrays for storing information.
+    int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
+    for (int i = 0; i < numCVs; i++)
+        cvForces.push_back(new OpenCLArray(cl, cl.getNumAtoms(), 4*elementSize, "cvForce"));
+    invAtomOrder = OpenCLArray::create<cl_int>(cl, cl.getPaddedNumAtoms(), "invAtomOrder");
+    innerInvAtomOrder = OpenCLArray::create<cl_int>(cl, cl.getPaddedNumAtoms(), "innerInvAtomOrder");
+    // Create the kernels.
+    stringstream args, add;
+    for (int i = 0; i < numCVs; i++) {
+        args << ", __global real4* restrict force" << i << ", real dEdV" << i;
+        add << "f += force" << i << "[i]*dEdV" << i << ";\n";
+    }
+    map<string, string> replacements;
+    replacements["PARAMETER_ARGUMENTS"] = args.str();
+    replacements["ADD_FORCES"] = add.str();
+    cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customCVForce, replacements));
+    copyStateKernel = cl::Kernel(program, "copyState");
+    copyForcesKernel = cl::Kernel(program, "copyForces");
+    addForcesKernel = cl::Kernel(program, "addForces");
+}
+double OpenCLCalcCustomCVForceKernel::execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy) {
+    copyState(context, innerContext);
+    int numCVs = variableNames.size();
+    int numAtoms = cl.getNumAtoms();
+    OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
+    vector<double> cvValues;
+    vector<map<string, double> > cvDerivs(numCVs);
+    for (int i = 0; i < numCVs; i++) {
+        cvValues.push_back(innerContext.calcForcesAndEnergy(true, true, 1<<i));
+        copyForcesKernel.setArg<cl::Buffer>(0, cvForces[i]->getDeviceBuffer());
+        cl.executeKernel(copyForcesKernel, numAtoms);
+        innerContext.getEnergyParameterDerivatives(cvDerivs[i]);
+    }
+    // Compute the energy and forces.
+    map<string, double> variables;
+    for (auto& name : globalParameterNames)
+        variables[name] = context.getParameter(name);
+    for (int i = 0; i < numCVs; i++)
+        variables[variableNames[i]] = cvValues[i];
+    double energy = energyExpression.evaluate(variables);
+    for (int i = 0; i < numCVs; i++) {
+        double dEdV = variableDerivExpressions[i].evaluate(variables);
+        if (cl.getUseDoublePrecision())
+            addForcesKernel.setArg<cl_double>(2*i+3, dEdV);
+        else
+            addForcesKernel.setArg<cl_float>(2*i+3, dEdV);
+    }
+    cl.executeKernel(addForcesKernel, numAtoms);
+    // Compute the energy parameter derivatives.
+    map<string, double>& energyParamDerivs = cl.getEnergyParamDerivWorkspace();
+    for (int i = 0; i < paramDerivExpressions.size(); i++)
+        energyParamDerivs[paramDerivNames[i]] += paramDerivExpressions[i].evaluate(variables);
+    for (int i = 0; i < numCVs; i++) {
+        double dEdV = variableDerivExpressions[i].evaluate(variables);
+        for (auto& deriv : cvDerivs[i])
+            energyParamDerivs[deriv.first] += dEdV*deriv.second;
+    }
+    return energy;
+}
+void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& innerContext) {
+    int numAtoms = cl.getNumAtoms();
+    OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
+    if (!hasInitializedKernels) {
+        hasInitializedKernels = true;
+        // Initialize the listeners.
+        ReorderListener* listener1 = new ReorderListener(cl, *invAtomOrder);
+        ReorderListener* listener2 = new ReorderListener(cl2, *innerInvAtomOrder);
+        cl.addReorderListener(listener1);
+        cl2.addReorderListener(listener2);
+        listener1->execute();
+        listener2->execute();
+        // Initialize the kernels.
+        copyStateKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
+        copyStateKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
+        copyStateKernel.setArg<cl::Buffer>(3, cl.getAtomIndexArray().getDeviceBuffer());
+        copyStateKernel.setArg<cl::Buffer>(4, cl2.getPosq().getDeviceBuffer());
+        copyStateKernel.setArg<cl::Buffer>(6, cl2.getVelm().getDeviceBuffer());
+        copyStateKernel.setArg<cl::Buffer>(7, innerInvAtomOrder->getDeviceBuffer());
+        copyStateKernel.setArg<cl_int>(8, numAtoms);
+        if (cl.getUseMixedPrecision()) {
+            copyStateKernel.setArg<cl::Buffer>(1, cl.getPosqCorrection().getDeviceBuffer());
+            copyStateKernel.setArg<cl::Buffer>(5, cl2.getPosqCorrection().getDeviceBuffer());
+        }
+        else {
+            copyStateKernel.setArg<void*>(1, NULL);
+            copyStateKernel.setArg<void*>(5, NULL);
+        }
+        copyForcesKernel.setArg<cl::Buffer>(1, invAtomOrder->getDeviceBuffer());
+        copyForcesKernel.setArg<cl::Buffer>(2, cl2.getForce().getDeviceBuffer());
+        copyForcesKernel.setArg<cl::Buffer>(3, cl2.getAtomIndexArray().getDeviceBuffer());
+        copyForcesKernel.setArg<cl_int>(4, numAtoms);
+        addForcesKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
+        addForcesKernel.setArg<cl_int>(1, numAtoms);
+        for (int i = 0; i < cvForces.size(); i++)
+            addForcesKernel.setArg<cl::Buffer>(2*i+2, cvForces[i]->getDeviceBuffer());
+    }
+    cl.executeKernel(copyStateKernel, numAtoms);
+    Vec3 a, b, c;
+    context.getPeriodicBoxVectors(a, b, c);
+    innerContext.setPeriodicBoxVectors(a, b, c);
+    innerContext.setTime(context.getTime());
+    map<string, double> innerParameters = innerContext.getParameters();
+    for (auto& param : innerParameters)
+        innerContext.setParameter(param.first, context.getParameter(param.first));
+}
 OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() {
 }

--- a/platforms/opencl/src/OpenCLPlatform.cpp
+++ b/platforms/opencl/src/OpenCLPlatform.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2016 Stanford University and the Authors.      *
+ * Portions copyright (c) 2008-2017 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -82,6 +82,7 @@ OpenCLPlatform::OpenCLPlatform() {
    registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
+    registerKernelFactory(CalcCustomCVForceKernel::Name(), factory);
    registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
    registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
    registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
@@ -179,7 +180,20 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri
    char* threadsEnv = getenv("OPENMM_CPU_THREADS");
    if (threadsEnv != NULL)
        stringstream(threadsEnv) >> threads;
-    context.setPlatformData(new PlatformData(context.getSystem(), platformPropValue, devicePropValue, precisionPropValue, cpuPmePropValue, pmeStreamPropValue, threads));
+    context.setPlatformData(new PlatformData(context.getSystem(), platformPropValue, devicePropValue, precisionPropValue, cpuPmePropValue,
+            pmeStreamPropValue, threads, NULL));
+}
+void OpenCLPlatform::linkedContextCreated(ContextImpl& context, ContextImpl& originalContext) const {
+    Platform& platform = originalContext.getPlatform();
+    string platformPropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLPlatformIndex());
+    string devicePropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLDeviceIndex());
+    string precisionPropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLPrecision());
+    string cpuPmePropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLUseCpuPme());
+    string pmeStreamPropValue = platform.getPropertyValue(originalContext.getOwner(), OpenCLDisablePmeStream());
+    int threads = reinterpret_cast<PlatformData*>(originalContext.getPlatformData())->threads.getNumThreads();
+    context.setPlatformData(new PlatformData(context.getSystem(), platformPropValue, devicePropValue, precisionPropValue, cpuPmePropValue,
+            pmeStreamPropValue, threads, &originalContext));
 }
 void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
@@ -188,7 +202,7 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
 }
 OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& platformPropValue, const string& deviceIndexProperty,
-        const string& precisionProperty, const string& cpuPmeProperty, const string& pmeStreamProperty, int numThreads) :
+        const string& precisionProperty, const string& cpuPmeProperty, const string& pmeStreamProperty, int numThreads, ContextImpl* originalContext) :
            removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads)  {
    int platformIndex = -1;
    if (platformPropValue.length() > 0)
@@ -200,16 +214,19 @@ OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& p
        searchPos = nextPos+1;
    }
    devices.push_back(deviceIndexProperty.substr(searchPos));
+    PlatformData* originalData = NULL;
+    if (originalContext != NULL)
+        originalData = reinterpret_cast<PlatformData*>(originalContext->getPlatformData());
    try {
        for (int i = 0; i < (int) devices.size(); i++) {
            if (devices[i].length() > 0) {
                int deviceIndex;
                stringstream(devices[i]) >> deviceIndex;
-                contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this));
+                contexts.push_back(new OpenCLContext(system, platformIndex, deviceIndex, precisionProperty, *this, (originalData == NULL ? NULL : originalData->contexts[i])));
            }
        }
        if (contexts.size() == 0)
-            contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this));
+            contexts.push_back(new OpenCLContext(system, platformIndex, -1, precisionProperty, *this, (originalData == NULL ? NULL : originalData->contexts[0])));
    }
    catch (...) {
        // If an exception was thrown, do our best to clean up memory.

--- a/platforms/opencl/src/kernels/customCVForce.cl
+++ b/platforms/opencl/src/kernels/customCVForce.cl
+/**
+ * Copy the positions and velocities to the inner context.
+ */
+__kernel void copyState(__global real4* posq, __global real4* posqCorrection, __global mixed4* velm, __global int* restrict atomOrder,
+        __global real4* innerPosq, __global real4* innerPosqCorrection, __global mixed4* innerVelm, __global int* restrict innerInvAtomOrder,
+        int numAtoms) {
+    for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
+        int index = innerInvAtomOrder[atomOrder[i]];
+        innerPosq[index] = posq[i];
+        innerVelm[index] = velm[i];
+#ifdef USE_MIXED_PRECISION
+        innerPosqCorrection[index] = posqCorrection[i];
+#endif
+    }
+}
+/**
+ * Copy the forces back to the main context.
+ */
+__kernel void copyForces(__global real4* forces, __global int* restrict invAtomOrder, __global real4* innerForces,
+        __global int* restrict innerAtomOrder, int numAtoms) {
+    for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
+        int index = invAtomOrder[innerAtomOrder[i]];
+        forces[index] = innerForces[i];
+    }
+}
+/**
+ * Add all the forces from the CVs.
+ */
+__kernel void addForces(__global real4* forces, int numAtoms
+    PARAMETER_ARGUMENTS) {
+    for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
+        real4 f = forces[i];
+        ADD_FORCES
+        forces[i] = f;
+    }
+}
\ No newline at end of file
--- a/platforms/opencl/src/kernels/utilities.cl
+++ b/platforms/opencl/src/kernels/utilities.cl
@@ -97,6 +97,24 @@ __kernel void reduceForces(__global const long* restrict longBuffer, __global re
 }
 #endif
+/**
+ * Sum the energy buffer.
+ */
+__kernel void reduceEnergy(__global const mixed* restrict energyBuffer, __global mixed* restrict result, int bufferSize, int workGroupSize, __local mixed* tempBuffer) {
+    const unsigned int thread = get_local_id(0);
+    mixed sum = 0;
+    for (unsigned int index = thread; index < bufferSize; index += get_local_size(0))
+        sum += energyBuffer[index];
+    tempBuffer[thread] = sum;
+    for (int i = 1; i < workGroupSize; i *= 2) {
+        barrier(CLK_LOCAL_MEM_FENCE);
+        if (thread%(i*2) == 0 && thread+i < workGroupSize)
+            tempBuffer[thread] += tempBuffer[thread+i];
+    }
+    if (thread == 0)
+        *result = tempBuffer[0];
+}
 /**
 * This is called to determine the accuracy of various native functions.
 */

--- a/platforms/opencl/tests/TestOpenCLCustomCVForce.cpp
+++ b/platforms/opencl/tests/TestOpenCLCustomCVForce.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2017 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+#include "OpenCLTests.h"
+#include "TestCustomCVForce.h"
+void runPlatformTests() {
+}
--- a/platforms/opencl/tests/TestOpenCLFFT.cpp
+++ b/platforms/opencl/tests/TestOpenCLFFT.cpp
@@ -54,7 +54,7 @@ template <class Real2>
 void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
    System system;
    system.addParticle(0.0);
-    OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1);
+    OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1, NULL);
    OpenCLContext& context = *platformData.contexts[0];
    context.initialize();
    OpenMM_SFMT::SFMT sfmt;

--- a/platforms/opencl/tests/TestOpenCLRandom.cpp
+++ b/platforms/opencl/tests/TestOpenCLRandom.cpp
@@ -54,7 +54,7 @@ void testGaussian() {
    System system;
    for (int i = 0; i < numAtoms; i++)
        system.addParticle(1.0);
-    OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1);
+    OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1, NULL);
    OpenCLContext& context = *platformData.contexts[0];
    context.initialize();
    context.getIntegrationUtilities().initRandomNumberGenerator(0);

--- a/platforms/opencl/tests/TestOpenCLSort.cpp
+++ b/platforms/opencl/tests/TestOpenCLSort.cpp
@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
    System system;
    system.addParticle(0.0);
-    OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1);
+    OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1, NULL);
    OpenCLContext& context = *platformData.contexts[0];
    context.initialize();
    OpenCLArray data(context, array.size(), sizeof(float), "sortData");