Created (not yet fully debugged) OpenCL implementation of...

Created (not yet fully debugged) OpenCL implementation of VariableVerletIntegrator and VariableLangevinIntegrator.

Created (not yet fully debugged) OpenCL implementation of...
Created (not yet fully debugged) OpenCL implementation of VariableVerletIntegrator and VariableLangevinIntegrator.
3cf5495a · Peter Eastman · 50f8b9fa · 3cf5495a · 3cf5495a · 3cf5495a
Commit 3cf5495a authored Nov 06, 2009 by Peter Eastman
10 changed files
--- a/platforms/opencl/src/OpenCLContext.cpp
+++ b/platforms/opencl/src/OpenCLContext.cpp
@@ -60,7 +60,7 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
        if (deviceIndex == -1)
            throw OpenMMException("No compatible OpenCL device is available");
        device = devices[deviceIndex];
-        if (device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0] < minThreadBlockSize)
+        if (device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>() < minThreadBlockSize)
            throw OpenMMException("The specified OpenCL device is not compatible with OpenMM");
        compilationOptions = "-cl-fast-relaxed-math";
        string vendor = device.getInfo<CL_DEVICE_VENDOR>();
@@ -182,12 +182,12 @@ cl::Program OpenCLContext::createProgram(const string source, const map<string,
    return program;
 }

-void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int workUnitSize) {
-    if (workUnitSize == -1)
-        workUnitSize = ThreadBlockSize;
-    int size = std::min((workUnits+workUnitSize-1)/workUnitSize, numThreadBlocks)*workUnitSize;
+void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSize) {
+    if (blockSize == -1)
+        blockSize = ThreadBlockSize;
+    int size = std::min((workUnits+blockSize-1)/blockSize, numThreadBlocks)*blockSize;
    try {
-        queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(size), cl::NDRange(workUnitSize));
+        queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(size), cl::NDRange(blockSize));
    }
    catch (cl::Error err) {
        stringstream str;

--- a/platforms/opencl/src/OpenCLContext.h
+++ b/platforms/opencl/src/OpenCLContext.h
@@ -91,6 +91,12 @@ public:
    cl::Context& getContext() {
        return context;
    }
+    /**
+     * Get the cl::Device associated with this object.
+     */
+    cl::Device& getDevice() {
+        return device;
+    }
    /**
     * Get the cl::CommandQueue associated with this object.
     */
@@ -165,9 +171,9 @@ public:
     *
     * @param kernel       the kernel to execute
     * @param workUnits    the maximum number of work units that should be used
-     * @param workUnitSize the size of each work unit to use
+     * @param blockSize    the size of each thread block to use
     */
-    void executeKernel(cl::Kernel& kernel, int workUnits, int workUnitSize = -1);
+    void executeKernel(cl::Kernel& kernel, int workUnits, int blockSize = -1);
    /**
     * Set all elements of an array to 0.
     */

--- a/platforms/opencl/src/OpenCLKernelFactory.cpp
+++ b/platforms/opencl/src/OpenCLKernelFactory.cpp
@@ -58,10 +58,10 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
        return new OpenCLIntegrateLangevinStepKernel(name, platform, cl);
 //    if (name == IntegrateBrownianStepKernel::Name())
 //        return new OpenCLIntegrateBrownianStepKernel(name, platform, cl);
-//    if (name == IntegrateVariableVerletStepKernel::Name())
-//        return new OpenCLIntegrateVariableVerletStepKernel(name, platform, cl);
-//    if (name == IntegrateVariableLangevinStepKernel::Name())
-//        return new OpenCLIntegrateVariableLangevinStepKernel(name, platform, cl);
+    if (name == IntegrateVariableVerletStepKernel::Name())
+        return new OpenCLIntegrateVariableVerletStepKernel(name, platform, cl);
+    if (name == IntegrateVariableLangevinStepKernel::Name())
+        return new OpenCLIntegrateVariableLangevinStepKernel(name, platform, cl);
 //    if (name == ApplyAndersenThermostatKernel::Name())
 //        return new OpenCLApplyAndersenThermostatKernel(name, platform, cl);
    if (name == CalcKineticEnergyKernel::Name())

--- a/platforms/opencl/src/OpenCLKernels.cpp
+++ b/platforms/opencl/src/OpenCLKernels.cpp
@@ -1171,6 +1171,8 @@ double OpenCLCalcGBSAOBCForceKernel::executeEnergy(ContextImpl& context) {
 }

 OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() {
+    if (stepSize != NULL)
+        delete stepSize;
 }

 void OpenCLIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) {
@@ -1178,6 +1180,8 @@ void OpenCLIntegrateVerletStepKernel::initialize(const System& system, const Ver
    cl::Program program = cl.createProgram(cl.loadSourceFromFile("verlet.cl"));
    kernel1 = cl::Kernel(program, "integrateVerletPart1");
    kernel2 = cl::Kernel(program, "integrateVerletPart2");
+    stepSize = new OpenCLArray<mm_float2>(cl, 1, "stepSize");
+    prevStepSize = -1.0;
 }

 void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIntegrator& integrator) {
@@ -1187,17 +1191,23 @@ void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const Verlet
    if (!hasInitializedKernels) {
        hasInitializedKernels = true;
        kernel1.setArg<cl_int>(0, numAtoms);
-        kernel1.setArg<cl_float>(1, dt);
+        kernel1.setArg<cl::Buffer>(1, stepSize->getDeviceBuffer());
        kernel1.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
        kernel1.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
        kernel1.setArg<cl::Buffer>(4, cl.getForce().getDeviceBuffer());
        kernel1.setArg<cl::Buffer>(5, integration.getPosDelta().getDeviceBuffer());
        kernel2.setArg<cl_int>(0, numAtoms);
-        kernel2.setArg<cl_float>(1, dt);
+        kernel2.setArg<cl::Buffer>(1, stepSize->getDeviceBuffer());
        kernel2.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
        kernel2.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
        kernel2.setArg<cl::Buffer>(4, integration.getPosDelta().getDeviceBuffer());
    }
+    if (dt != prevStepSize) {
+        vector<mm_float2> stepSizeVec(1);
+        stepSizeVec[0] = (mm_float2) {dt, dt};
+        stepSize->upload(stepSizeVec);
+        prevStepSize = dt;
+    }

    // Call the first integration kernel.

@@ -1282,16 +1292,14 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
        double EP = exp(GDT);
        double EM = exp(-GDT);
        double B, C, D;
-        if (GDT >= 0.1)
-        {
+        if (GDT >= 0.1) {
            double term1 = EPH - 1.0;
            term1 *= term1;
            B = GDT*(EP - 1.0) - 4.0*term1;
            C = GDT - 3.0 + 4.0*EMH - EM;
            D = 2.0 - EPH - EMH;
        }
-        else
-        {
+        else {
            double term1 = 0.5*GDT;
            double term2 = term1*term1;
            double term4 = term2*term2;
@@ -1403,88 +1411,186 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
 //    data.stepCount++;
 //}
 //
-//OpenCLIntegrateVariableVerletStepKernel::~OpenCLIntegrateVariableVerletStepKernel() {
-//}
-//
-//void OpenCLIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) {
-//    initializeIntegration(system, data, integrator);
-//    prevErrorTol = -1.0;
-//}
-//
-//void OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) {
-//    _gpuContext* gpu = data.gpu;
-//    double errorTol = integrator.getErrorTolerance();
-//    if (errorTol != prevErrorTol) {
-//        // Initialize the GPU parameters.
-//
-//        gpuSetVerletIntegrationParameters(gpu, 0.0f, (float) errorTol);
-//        gpuSetConstants(gpu);
-//        prevErrorTol = errorTol;
-//    }
-//    float maxStepSize = (float)(maxTime-data.time);
-//    kSelectVerletStepSize(gpu, maxStepSize);
-//    kVerletUpdatePart1(gpu);
-//    kApplyFirstShake(gpu);
-//    kApplyFirstSettle(gpu);
-//    kApplyFirstCCMA(gpu);
-//    if (data.removeCM)
-//        if (data.stepCount%data.cmMotionFrequency == 0)
-//            gpu->bCalculateCM = true;
-//    kVerletUpdatePart2(gpu);
-//    gpu->psStepSize->Download();
-//    data.time += (*gpu->psStepSize)[0].y;
-//    if ((*gpu->psStepSize)[0].y == maxStepSize)
-//        data.time = maxTime; // Avoid round-off error
-//    data.stepCount++;
-//}
-//
-//OpenCLIntegrateVariableLangevinStepKernel::~OpenCLIntegrateVariableLangevinStepKernel() {
-//}
-//
-//void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
-//    initializeIntegration(system, data, integrator);
-//    _gpuContext* gpu = data.gpu;
-//    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
-//    gpuInitializeRandoms(gpu);
-//    prevErrorTol = -1.0;
-//}
-//
-//void OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
-//    _gpuContext* gpu = data.gpu;
-//    double temperature = integrator.getTemperature();
-//    double friction = integrator.getFriction();
-//    double errorTol = integrator.getErrorTolerance();
-//    if (temperature != prevTemp || friction != prevFriction || errorTol != prevErrorTol) {
-//        // Initialize the GPU parameters.
-//
-//        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
-//        gpuSetLangevinIntegrationParameters(gpu, (float) tau, 0.0f, (float) temperature, errorTol);
-//        gpuSetConstants(gpu);
-//        kGenerateRandoms(gpu);
-//        prevTemp = temperature;
-//        prevFriction = friction;
-//        prevErrorTol = errorTol;
-//    }
-//    float maxStepSize = (float)(maxTime-data.time);
-//    kSelectLangevinStepSize(gpu, maxStepSize);
-//    kLangevinUpdatePart1(gpu);
-//    kApplyFirstShake(gpu);
-//    kApplyFirstSettle(gpu);
-//    kApplyFirstCCMA(gpu);
-//    if (data.removeCM)
-//        if (data.stepCount%data.cmMotionFrequency == 0)
-//            gpu->bCalculateCM = true;
-//    kLangevinUpdatePart2(gpu);
-//    kApplySecondShake(gpu);
-//    kApplySecondSettle(gpu);
-//    kApplySecondCCMA(gpu);
-//    gpu->psStepSize->Download();
-//    data.time += (*gpu->psStepSize)[0].y;
-//    if ((*gpu->psStepSize)[0].y == maxStepSize)
-//        data.time = maxTime; // Avoid round-off error
-//    data.stepCount++;
-//}
-//
+
+OpenCLIntegrateVariableVerletStepKernel::~OpenCLIntegrateVariableVerletStepKernel() {
+    if (stepSize != NULL)
+        delete stepSize;
+}
+
+void OpenCLIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) {
+    cl.initialize(system);
+    cl::Program program = cl.createProgram(cl.loadSourceFromFile("verlet.cl"));
+    kernel1 = cl::Kernel(program, "integrateVerletPart1");
+    kernel2 = cl::Kernel(program, "integrateVerletPart2");
+    selectSizeKernel = cl::Kernel(program, "selectVerletStepSize");
+    stepSize = new OpenCLArray<mm_float2>(cl, 1, "stepSize", true);
+    stepSize->set(0, (mm_float2) {0.0f, 0.0f});
+    stepSize->upload();
+    blockSize = std::min(std::min(256, system.getNumParticles()), (int) cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
+}
+
+void OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) {
+    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilties();
+    int numAtoms = cl.getNumAtoms();
+    if (!hasInitializedKernels) {
+        hasInitializedKernels = true;
+        kernel1.setArg<cl_int>(0, numAtoms);
+        kernel1.setArg<cl::Buffer>(1, stepSize->getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(4, cl.getForce().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(5, integration.getPosDelta().getDeviceBuffer());
+        kernel2.setArg<cl_int>(0, numAtoms);
+        kernel2.setArg<cl::Buffer>(1, stepSize->getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(4, integration.getPosDelta().getDeviceBuffer());
+        selectSizeKernel.setArg<cl_int>(0, numAtoms);
+        selectSizeKernel.setArg<cl::Buffer>(3, stepSize->getDeviceBuffer());
+        selectSizeKernel.setArg<cl::Buffer>(4, cl.getVelm().getDeviceBuffer());
+        selectSizeKernel.setArg<cl::Buffer>(5, cl.getForce().getDeviceBuffer());
+        selectSizeKernel.setArg(6, blockSize*sizeof(cl_float), NULL);
+    }
+
+    // Select the step size to use.
+
+    float maxStepSize = (float)(maxTime-cl.getTime());
+    selectSizeKernel.setArg<cl_float>(1, maxStepSize);
+    selectSizeKernel.setArg<cl_float>(2, integrator.getErrorTolerance());
+    cl.executeKernel(selectSizeKernel, blockSize, blockSize);
+
+    // Call the first integration kernel.
+
+    cl.executeKernel(kernel1, numAtoms);
+
+    // Apply constraints.
+
+    integration.applyConstraints(integrator.getConstraintTolerance());
+
+    // Call the second integration kernel.
+
+    cl.executeKernel(kernel2, numAtoms);
+
+    // Update the time and step count.
+
+    stepSize->download();
+    double dt = stepSize->get(0).y;
+    double time = cl.getTime()+dt;
+    if (dt == maxStepSize)
+        time = maxTime; // Avoid round-off error
+    cl.setTime(time);
+    cl.setStepCount(cl.getStepCount()+1);
+}
+
+OpenCLIntegrateVariableLangevinStepKernel::~OpenCLIntegrateVariableLangevinStepKernel() {
+    if (params != NULL)
+        delete params;
+    if (xVector != NULL)
+        delete xVector;
+    if (vVector != NULL)
+        delete vVector;
+    if (stepSize != NULL)
+        delete stepSize;
+}
+
+void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
+    cl.initialize(system);
+    cl.getIntegrationUtilties().initRandomNumberGenerator(integrator.getRandomNumberSeed());
+    cl::Program program = cl.createProgram(cl.loadSourceFromFile("langevin.cl"));
+    kernel1 = cl::Kernel(program, "integrateLangevinPart1");
+    kernel2 = cl::Kernel(program, "integrateLangevinPart2");
+    kernel3 = cl::Kernel(program, "integrateLangevinPart3");
+    selectSizeKernel = cl::Kernel(program, "selectLangevinStepSize");
+    params = new OpenCLArray<cl_float>(cl, 11, "langevinParams");
+    xVector = new OpenCLArray<mm_float4>(cl, cl.getPaddedNumAtoms(), "xVector");
+    vVector = new OpenCLArray<mm_float4>(cl, cl.getPaddedNumAtoms(), "vVector");
+    vector<mm_float4> initialXVector(xVector->getSize(), (mm_float4) {0.0f, 0.0f, 0.0f, 0.0f});
+    xVector->upload(initialXVector);
+    stepSize = new OpenCLArray<mm_float2>(cl, 1, "stepSize", true);
+    stepSize->set(0, (mm_float2) {0.0f, 0.0f});
+    stepSize->upload();
+    blockSize = std::min(256, system.getNumParticles());
+    blockSize = std::max(blockSize, params->getSize());
+    blockSize = std::min(blockSize, (int) cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
+}
+
+void OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
+    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilties();
+    int numAtoms = cl.getNumAtoms();
+    if (!hasInitializedKernels) {
+        hasInitializedKernels = true;
+        kernel1.setArg<cl_int>(0, numAtoms);
+        kernel1.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(2, cl.getForce().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(3, integration.getPosDelta().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(4, params->getDeviceBuffer());
+        kernel1.setArg(5, params->getSize()*sizeof(cl_float), NULL);
+        kernel1.setArg<cl::Buffer>(6, xVector->getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(7, vVector->getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(8,integration.getRandom().getDeviceBuffer());
+        kernel2.setArg<cl_int>(0, numAtoms);
+        kernel2.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(3, params->getDeviceBuffer());
+        kernel2.setArg(4, params->getSize()*sizeof(cl_float), NULL);
+        kernel2.setArg<cl::Buffer>(5, xVector->getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(6, vVector->getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(7,integration.getRandom().getDeviceBuffer());
+        kernel3.setArg<cl_int>(0, numAtoms);
+        kernel3.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
+        kernel3.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
+        selectSizeKernel.setArg<cl_int>(0, numAtoms);
+        selectSizeKernel.setArg<cl::Buffer>(5, stepSize->getDeviceBuffer());
+        selectSizeKernel.setArg<cl::Buffer>(6, cl.getVelm().getDeviceBuffer());
+        selectSizeKernel.setArg<cl::Buffer>(7, cl.getForce().getDeviceBuffer());
+        selectSizeKernel.setArg<cl::Buffer>(8, params->getDeviceBuffer());
+        selectSizeKernel.setArg(9, params->getSize()*sizeof(cl_float), NULL);
+        selectSizeKernel.setArg(10, blockSize*sizeof(cl_float), NULL);
+    }
+
+    // Select the step size to use.
+
+    float maxStepSize = (float)(maxTime-cl.getTime());
+    selectSizeKernel.setArg<cl_float>(1, maxStepSize);
+    selectSizeKernel.setArg<cl_float>(2, (cl_float) integrator.getErrorTolerance());
+    selectSizeKernel.setArg<cl_float>(3, (cl_float) (integrator.getFriction() == 0.0 ? 0.0 : 1.0/integrator.getFriction()));
+    selectSizeKernel.setArg<cl_float>(4, (cl_float) (BOLTZ*integrator.getTemperature()));
+    cl.executeKernel(selectSizeKernel, blockSize, blockSize);
+
+    // Call the first integration kernel.
+
+    int numThreads = cl.getNumThreadBlocks()*cl.ThreadBlockSize;
+    kernel1.setArg<cl_uint>(9, integration.prepareRandomNumbers(2*numThreads));
+    cl.executeKernel(kernel1, numAtoms);
+
+    // Apply constraints.
+
+    integration.applyConstraints(integrator.getConstraintTolerance());
+
+    // Call the second integration kernel.
+
+    kernel2.setArg<cl_uint>(8, integration.prepareRandomNumbers(2*numThreads));
+    cl.executeKernel(kernel2, numAtoms);
+
+    // Reapply constraints.
+
+    integration.applyConstraints(integrator.getConstraintTolerance());
+
+    // Call the third integration kernel.
+
+    cl.executeKernel(kernel3, numAtoms);
+
+    // Update the time and step count.
+
+    stepSize->download();
+    double dt = stepSize->get(0).y;
+    double time = cl.getTime()+dt;
+    if (dt == maxStepSize)
+        time = maxTime; // Avoid round-off error
+    cl.setTime(time);
+    cl.setStepCount(cl.getStepCount()+1);
+}
+
 //OpenCLApplyAndersenThermostatKernel::~OpenCLApplyAndersenThermostatKernel() {
 //}
 //

--- a/platforms/opencl/src/OpenCLKernels.h
+++ b/platforms/opencl/src/OpenCLKernels.h
@@ -435,7 +435,8 @@ private:
 */
 class OpenCLIntegrateVerletStepKernel : public IntegrateVerletStepKernel {
 public:
-    OpenCLIntegrateVerletStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVerletStepKernel(name, platform), cl(cl), hasInitializedKernels(false) {
+    OpenCLIntegrateVerletStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVerletStepKernel(name, platform), cl(cl),
+            hasInitializedKernels(false), stepSize(NULL) {
    }
    ~OpenCLIntegrateVerletStepKernel();
    /**
@@ -454,7 +455,9 @@ public:
    void execute(ContextImpl& context, const VerletIntegrator& integrator);
 private:
    OpenCLContext& cl;
+    double prevStepSize;
    bool hasInitializedKernels;
+    OpenCLArray<mm_float2>* stepSize;
    cl::Kernel kernel1, kernel2;
 };

@@ -464,7 +467,7 @@ private:
 class OpenCLIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
 public:
    OpenCLIntegrateLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateLangevinStepKernel(name, platform), cl(cl),
-            params(NULL), xVector(NULL), vVector(NULL), hasInitializedKernels(false) {
+            hasInitializedKernels(false), params(NULL), xVector(NULL), vVector(NULL) {
    }
    ~OpenCLIntegrateLangevinStepKernel();
    /**
@@ -517,62 +520,74 @@ private:
 //    OpenCLContext& cl;
 //    double prevTemp, prevFriction, prevStepSize;
 //};
-//
-///**
-// * This kernel is invoked by VariableVerletIntegrator to take one time step.
-// */
-//class OpenCLIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStepKernel {
-//public:
-//    OpenCLIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVariableVerletStepKernel(name, platform), cl(cl) {
-//    }
-//    ~OpenCLIntegrateVariableVerletStepKernel();
-//    /**
-//     * Initialize the kernel.
-//     *
-//     * @param system     the System this kernel will be applied to
-//     * @param integrator the VerletIntegrator this kernel will be used for
-//     */
-//    void initialize(const System& system, const VariableVerletIntegrator& integrator);
-//    /**
-//     * Execute the kernel.
-//     *
-//     * @param context    the context in which to execute this kernel
-//     * @param integrator the VerletIntegrator this kernel is being used for
-//     * @param maxTime    the maximum time beyond which the simulation should not be advanced
-//     */
-//    void execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime);
-//private:
-//    OpenCLContext& cl;
-//    double prevErrorTol;
-//};
-//
-///**
-// * This kernel is invoked by VariableLangevinIntegrator to take one time step.
-// */
-//class OpenCLIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
-//public:
-//    OpenCLIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVariableLangevinStepKernel(name, platform), cl(cl) {
-//    }
-//    ~OpenCLIntegrateVariableLangevinStepKernel();
-//    /**
-//     * Initialize the kernel, setting up the particle masses.
-//     *
-//     * @param system     the System this kernel will be applied to
-//     * @param integrator the VariableLangevinIntegrator this kernel will be used for
-//     */
-//    void initialize(const System& system, const VariableLangevinIntegrator& integrator);
-//    /**
-//     * Execute the kernel.
-//     *
-//     * @param context    the context in which to execute this kernel
-//     * @param integrator the VariableLangevinIntegrator this kernel is being used for
-//     * @param maxTime    the maximum time beyond which the simulation should not be advanced
-//     */
-//    void execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime);
-//private:
-//    OpenCLContext& cl;
-//    double prevTemp, prevFriction, prevErrorTol;
-//};
+
+/**
+ * This kernel is invoked by VariableVerletIntegrator to take one time step.
+ */
+class OpenCLIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStepKernel {
+public:
+    OpenCLIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVariableVerletStepKernel(name, platform), cl(cl),
+            hasInitializedKernels(false), stepSize(NULL) {
+    }
+    ~OpenCLIntegrateVariableVerletStepKernel();
+    /**
+     * Initialize the kernel.
+     *
+     * @param system     the System this kernel will be applied to
+     * @param integrator the VerletIntegrator this kernel will be used for
+     */
+    void initialize(const System& system, const VariableVerletIntegrator& integrator);
+    /**
+     * Execute the kernel.
+     *
+     * @param context    the context in which to execute this kernel
+     * @param integrator the VerletIntegrator this kernel is being used for
+     * @param maxTime    the maximum time beyond which the simulation should not be advanced
+     */
+    void execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime);
+private:
+    OpenCLContext& cl;
+    bool hasInitializedKernels;
+    int blockSize;
+    OpenCLArray<mm_float2>* stepSize;
+    cl::Kernel kernel1, kernel2, selectSizeKernel;
+};
+
+/**
+ * This kernel is invoked by VariableLangevinIntegrator to take one time step.
+ */
+class OpenCLIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
+public:
+    OpenCLIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVariableLangevinStepKernel(name, platform), cl(cl),
+            hasInitializedKernels(false), stepSize(NULL) {
+    }
+    ~OpenCLIntegrateVariableLangevinStepKernel();
+    /**
+     * Initialize the kernel, setting up the particle masses.
+     *
+     * @param system     the System this kernel will be applied to
+     * @param integrator the VariableLangevinIntegrator this kernel will be used for
+     */
+    void initialize(const System& system, const VariableLangevinIntegrator& integrator);
+    /**
+     * Execute the kernel.
+     *
+     * @param context    the context in which to execute this kernel
+     * @param integrator the VariableLangevinIntegrator this kernel is being used for
+     * @param maxTime    the maximum time beyond which the simulation should not be advanced
+     */
+    void execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime);
+private:
+    OpenCLContext& cl;
+    bool hasInitializedKernels;
+    int blockSize;
+    OpenCLArray<cl_float>* params;
+    OpenCLArray<mm_float4>* xVector;
+    OpenCLArray<mm_float4>* vVector;
+    OpenCLArray<mm_float2>* stepSize;
+    cl::Kernel kernel1, kernel2, kernel3, selectSizeKernel;
+    double prevTemp, prevFriction, prevErrorTol;
+};
 //
 ///**
 // * This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities.

--- a/platforms/opencl/src/OpenCLPlatform.cpp
+++ b/platforms/opencl/src/OpenCLPlatform.cpp
@@ -57,8 +57,8 @@ OpenCLPlatform::OpenCLPlatform() {
    registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
    registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
 //    registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
-//    registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
-//    registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
+    registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
+    registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
 //    registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
    registerKernelFactory(CalcKineticEnergyKernel::Name(), factory);
    registerKernelFactory(RemoveCMMotionKernel::Name(), factory);

--- a/platforms/opencl/src/kernels/langevin.cl
+++ b/platforms/opencl/src/kernels/langevin.cl
@@ -75,3 +75,96 @@ __kernel void integrateLangevinPart3(int numAtoms, __global float4* posq, __glob
        index += get_global_size(0);
    }
 }
+
+/**
+ * Select the step size to use for the next step.
+ */
+
+__kernel void selectLangevinStepSize(int numAtoms, float maxStepSize, float errorTol, float tau, float kT, __global float2* dt,
+        __global float4* velm, __global float4* force, __global float* paramBuffer, __local float* params, __local float* error) {
+    // Calculate the error.
+
+    float err = 0.0f;
+    unsigned int index = get_local_id(0);
+    while (index < numAtoms) {
+        float4 force = force[index];
+        float invMass = velm[index].w;
+        err += (force.x*force.x + force.y*force.y + force.z*force.z)*invMass;
+        index += get_global_size(0);
+    }
+    error[get_local_id(0)] = err;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // Sum the errors from all threads.
+
+    for (int offset = 1; offset < get_local_size(0); offset *= 2) {
+        if (get_local_id(0)+offset < get_local_size(0) && (get_local_id(0)&(2*offset-1)) == 0)
+            error[get_local_id(0)] += error[get_local_id(0)+offset];
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    if (get_global_id(0) == 0) {
+        // Select the new step size.
+
+        float totalError = sqrt(error[0]/(numAtoms*3));
+        float newStepSize = sqrt(errorTol/totalError);
+        float oldStepSize = dt[0].y;
+        if (oldStepSize > 0.0f)
+            newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
+        if (newStepSize > oldStepSize && newStepSize < 1.1f*oldStepSize)
+            newStepSize = oldStepSize; // Keeping dt constant between steps improves the behavior of the integrator.
+        if (newStepSize > maxStepSize)
+            newStepSize = maxStepSize;
+        dt[0].y = newStepSize;
+
+        // Recalculate the integration parameters.
+
+        float gdt                  = newStepSize/tau;
+        float eph                  = exp(0.5f*gdt);
+        float emh                  = exp(-0.5f*gdt);
+        float ep                   = exp(gdt);
+        float em                   = exp(-gdt);
+        float em_v                 = exp(-0.5f*(oldStepSize+newStepSize)/tau);
+        float b, c, d;
+        if (gdt >= 0.1f) {
+            float term1 = eph-1.0f;
+            term1                 *= term1;
+            b                      = gdt*(ep-1.0f) - 4.0f*term1;
+            c                      = gdt - 3.0f + 4.0f*emh - em;
+            d                      = 2.0f - eph - emh;
+        }
+        else {
+            float term1            = 0.5f*gdt;
+            float term2            = term1*term1;
+            float term4            = term2*term2;
+
+            float third            = 1.0f/3.0f;
+            float o7_9             = 7.0f/9.0f;
+            float o1_12            = 1.0f/12.0f;
+            float o17_90           = 17.0f/90.0f;
+            float o7_30            = 7.0f/30.0f;
+            float o31_1260         = 31.0f/1260.0f;
+            float o_360            = 1.0f/360.0f;
+
+            b                      = term4*(third + term1*(third + term1*(o17_90 + term1*o7_9)));
+            c                      = term2*term1*(2.0f*third + term1*(-0.5f + term1*(o7_30 + term1*(-o1_12 + term1*o31_1260))));
+            d                      = term2*(-1.0f + term2*(-o1_12 - term2*o_360));
+        }
+        float fix1                 = tau*(eph - emh);
+        if (fix1 == 0.0f)
+            fix1 = newStepSize;
+        params[EM]                 = em;
+        params[EM_V]               = em_v;
+        params[DOverTauC]          = d/(tau*c);
+        params[TauOneMinusEM_V]    = tau*(1.0f-em_v);
+        params[TauDOverEMMinusOne] = tau*d/(em - 1.0f);
+        params[Fix1]               = fix1;
+        params[OneOverFix1]        = 1.0f/fix1;
+        params[V]                  = sqrt(kT*(1.0f - em));
+        params[X]                  = tau*sqrt(kT*c);
+        params[Yv]                 = sqrt(kT*b/c);
+        params[Yx]                 = tau*sqrt(kT*b/(1.0f - em));
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if (get_local_id(0) < MaxParams)
+        paramBuffer[get_local_id(0)] = params[get_local_id(0)];
+}
--- a/platforms/opencl/src/kernels/verlet.cl
+++ b/platforms/opencl/src/kernels/verlet.cl
@@ -2,13 +2,21 @@
 * Perform the first step of verlet integration.
 */

-__kernel void integrateVerletPart1(int numAtoms, float dt, __global float4* posq, __global float4* velm, __global float4* force, __global float4* posDelta) {
+__kernel void integrateVerletPart1(int numAtoms, __global float2* dt, __global float4* posq, __global float4* velm, __global float4* force, __global float4* posDelta) {
+    __local float dtPos;
+    __local float dtVel;
+    if (get_local_id(0) == 0) {
+        float2 stepSize = dt[0];
+        dtPos = stepSize.y;
+        dtVel = 0.5f*(stepSize.x+stepSize.y);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
    int index = get_global_id(0);
    while (index < numAtoms) {
        float4 pos = posq[index];
        float4 velocity = velm[index];
-        velocity.xyz += force[index].xyz*dt*velocity.w;
-        pos.xyz = velocity.xyz*dt;
+        velocity.xyz += force[index].xyz*dtVel*velocity.w;
+        pos.xyz = velocity.xyz*dtPos;
        posDelta[index] = pos;
        velm[index] = velocity;
        index += get_global_size(0);
@@ -19,9 +27,16 @@ __kernel void integrateVerletPart1(int numAtoms, float dt, __global float4* posq
 * Perform the second step of verlet integration.
 */

-__kernel void integrateVerletPart2(int numAtoms, float dt, __global float4* posq, __global float4* velm, __global float4* posDelta) {
+__kernel void integrateVerletPart2(int numAtoms, __global float2* dt, __global float4* posq, __global float4* velm, __global float4* posDelta) {
+    __local float oneOverDt;
+    if (get_local_id(0) == 0) {
+        float2 stepSize = dt[0];
+        oneOverDt = 1.0f/stepSize.y;
+        if (get_global_id(0) == 0)
+            dt[0].x = stepSize.y;
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
    int index = get_global_id(0);
-    float oneOverDt = 1.0f/dt;
    while (index < numAtoms) {
        float4 pos = posq[index];
        float4 delta = posDelta[index];
@@ -33,3 +48,42 @@ __kernel void integrateVerletPart2(int numAtoms, float dt, __global float4* posq
        index += get_global_size(0);
    }
 }
+
+/**
+ * Select the step size to use for the next step.
+ */
+
+__kernel void selectVerletStepSize(int numAtoms, float maxStepSize, float errorTol, __global float2* dt, __global float4* velm, __global float4* force, __local float* error) {
+    // Calculate the error.
+
+    float err = 0.0f;
+    unsigned int index = get_local_id(0);
+    while (index < numAtoms) {
+        float4 force = force[index];
+        float invMass = velm[index].w;
+        err += (force.x*force.x + force.y*force.y + force.z*force.z)*invMass;
+        index += get_global_size(0);
+    }
+    error[get_local_id(0)] = err;
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // Sum the errors from all threads.
+
+    for (int offset = 1; offset < get_local_size(0); offset *= 2) {
+        if (get_local_id(0)+offset < get_local_size(0) && (get_local_id(0)&(2*offset-1)) == 0)
+            error[get_local_id(0)] += error[get_local_id(0)+offset];
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    if (get_local_id(0) == 0) {
+        float totalError = sqrt(error[0]/(numAtoms*3));
+        float newStepSize = sqrt(errorTol/totalError);
+        float oldStepSize = dt[0].y;
+        if (oldStepSize > 0.0f)
+            newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
+        if (newStepSize > oldStepSize && newStepSize < 1.1f*oldStepSize)
+            newStepSize = oldStepSize; // Keeping dt constant between steps improves the behavior of the integrator.
+        if (newStepSize > maxStepSize)
+            newStepSize = maxStepSize;
+        dt[0].y = newStepSize;
+    }
+}
--- a/platforms/opencl/tests/TestOpenCLVariableLangevinIntegrator.cpp
+++ b/platforms/opencl/tests/TestOpenCLVariableLangevinIntegrator.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2008-2009 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+/**
+ * This tests the OpenCL implementation of VariableLangevinIntegrator.
+ */
+
+#include "../../../tests/AssertionUtilities.h"
+#include "openmm/Context.h"
+#include "OpenCLPlatform.h"
+#include "openmm/HarmonicBondForce.h"
+#include "openmm/NonbondedForce.h"
+#include "openmm/System.h"
+#include "openmm/VariableLangevinIntegrator.h"
+#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
+#include "../src/sfmt/SFMT.h"
+#include <iostream>
+#include <vector>
+
+using namespace OpenMM;
+using namespace std;
+
+const double TOL = 1e-5;
+
+void testSingleBond() {
+    OpenCLPlatform platform;
+    System system;
+    system.addParticle(2.0);
+    system.addParticle(2.0);
+    VariableLangevinIntegrator integrator(0, 0.1, 1e-6);
+    HarmonicBondForce* forceField = new HarmonicBondForce();
+    forceField->addBond(0, 1, 1.5, 1);
+    system.addForce(forceField);
+    Context context(system, integrator, platform);
+    vector<Vec3> positions(2);
+    positions[0] = Vec3(-1, 0, 0);
+    positions[1] = Vec3(1, 0, 0);
+    context.setPositions(positions);
+
+    // This is simply a damped harmonic oscillator, so compare it to the analytical solution.
+
+    double freq = std::sqrt(1-0.05*0.05);
+    for (int i = 0; i < 1000; ++i) {
+        State state = context.getState(State::Positions | State::Velocities);
+        double time = state.getTime();
+        double expectedDist = 1.5+0.5*std::exp(-0.05*time)*std::cos(freq*time);
+        ASSERT_EQUAL_VEC(Vec3(-0.5*expectedDist, 0, 0), state.getPositions()[0], 0.02);
+        ASSERT_EQUAL_VEC(Vec3(0.5*expectedDist, 0, 0), state.getPositions()[1], 0.02);
+        double expectedSpeed = -0.5*std::exp(-0.05*time)*(0.05*std::cos(freq*time)+freq*std::sin(freq*time));
+        ASSERT_EQUAL_VEC(Vec3(-0.5*expectedSpeed, 0, 0), state.getVelocities()[0], 0.02);
+        ASSERT_EQUAL_VEC(Vec3(0.5*expectedSpeed, 0, 0), state.getVelocities()[1], 0.02);
+        integrator.step(1);
+    }
+
+    // Now set the friction to a tiny value and see if it conserves energy.
+
+    integrator.setFriction(5e-5);
+    context.setPositions(positions);
+    State state = context.getState(State::Energy);
+    double initialEnergy = state.getKineticEnergy()+state.getPotentialEnergy();
+    for (int i = 0; i < 1000; ++i) {
+        state = context.getState(State::Energy);
+        double energy = state.getKineticEnergy()+state.getPotentialEnergy();
+        ASSERT_EQUAL_TOL(initialEnergy, energy, 0.05);
+        integrator.step(1);
+    }
+}
+
+void testTemperature() {
+    const int numParticles = 8;
+    const double temp = 100.0;
+    OpenCLPlatform platform;
+    System system;
+    VariableLangevinIntegrator integrator(temp, 2.0, 1e-4);
+    NonbondedForce* forceField = new NonbondedForce();
+    for (int i = 0; i < numParticles; ++i) {
+        system.addParticle(2.0);
+        forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
+    }
+    system.addForce(forceField);
+    Context context(system, integrator, platform);
+    vector<Vec3> positions(numParticles);
+    for (int i = 0; i < numParticles; ++i)
+        positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
+    context.setPositions(positions);
+
+    // Let it equilibrate.
+
+    integrator.step(10000);
+
+    // Now run it for a while and see if the temperature is correct.
+
+    double ke = 0.0;
+    for (int i = 0; i < 1000; ++i) {
+        State state = context.getState(State::Energy);
+        ke += state.getKineticEnergy();
+        integrator.step(1);
+    }
+    ke /= 1000;
+    double expected = 0.5*numParticles*3*BOLTZ*temp;
+    ASSERT_EQUAL_TOL(expected, ke, 3*expected/std::sqrt(1000.0));
+}
+
+void testConstraints() {
+    const int numParticles = 8;
+    const double temp = 100.0;
+    OpenCLPlatform platform;
+    System system;
+    VariableLangevinIntegrator integrator(temp, 2.0, 1e-5);
+    integrator.setConstraintTolerance(1e-5);
+    integrator.setRandomNumberSeed(0);
+    NonbondedForce* forceField = new NonbondedForce();
+    for (int i = 0; i < numParticles; ++i) {
+        system.addParticle(10.0);
+        forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
+    }
+    for (int i = 0; i < numParticles-1; ++i)
+        system.addConstraint(i, i+1, 1.0);
+    system.addForce(forceField);
+    Context context(system, integrator, platform);
+    vector<Vec3> positions(numParticles);
+    vector<Vec3> velocities(numParticles);
+    init_gen_rand(0);
+    for (int i = 0; i < numParticles; ++i) {
+        positions[i] = Vec3(i/2, (i+1)/2, 0);
+        velocities[i] = Vec3(genrand_real2()-0.5, genrand_real2()-0.5, genrand_real2()-0.5);
+    }
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+
+    // Simulate it and see whether the constraints remain satisfied.
+
+    for (int i = 0; i < 1000; ++i) {
+        State state = context.getState(State::Positions);
+        for (int j = 0; j < numParticles-1; ++j) {
+            Vec3 p1 = state.getPositions()[j];
+            Vec3 p2 = state.getPositions()[j+1];
+            double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
+            ASSERT_EQUAL_TOL(1.0, dist, 2e-5);
+        }
+        integrator.step(1);
+    }
+}
+
+void testRandomSeed() {
+    const int numParticles = 8;
+    const double temp = 100.0;
+    OpenCLPlatform platform;
+    System system;
+    VariableLangevinIntegrator integrator(temp, 2.0, 1e-5);
+    NonbondedForce* forceField = new NonbondedForce();
+    for (int i = 0; i < numParticles; ++i) {
+        system.addParticle(2.0);
+        forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
+    }
+    system.addForce(forceField);
+    vector<Vec3> positions(numParticles);
+    vector<Vec3> velocities(numParticles);
+    for (int i = 0; i < numParticles; ++i) {
+        positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
+        velocities[i] = Vec3(0, 0, 0);
+    }
+
+    // Try twice with the same random seed.
+
+    integrator.setRandomNumberSeed(5);
+    Context context(system, integrator, platform);
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+    integrator.step(10);
+    State state1 = context.getState(State::Positions);
+    context.reinitialize();
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+    integrator.step(10);
+    State state2 = context.getState(State::Positions);
+
+    // Try twice with a different random seed.
+
+    integrator.setRandomNumberSeed(10);
+    context.reinitialize();
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+    integrator.step(10);
+    State state3 = context.getState(State::Positions);
+    context.reinitialize();
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+    integrator.step(10);
+    State state4 = context.getState(State::Positions);
+
+    // Compare the results.
+
+    for (int i = 0; i < numParticles; i++) {
+        for (int j = 0; j < 3; j++) {
+            ASSERT(state1.getPositions()[i][j] == state2.getPositions()[i][j]);
+            ASSERT(state3.getPositions()[i][j] == state4.getPositions()[i][j]);
+            ASSERT(state1.getPositions()[i][j] != state3.getPositions()[i][j]);
+        }
+    }
+}
+
+int main() {
+    try {
+        testSingleBond();
+        testTemperature();
+//        testConstraints();
+        testRandomSeed();
+    }
+    catch(const exception& e) {
+        cout << "exception: " << e.what() << endl;
+        return 1;
+    }
+    cout << "Done" << endl;
+    return 0;
+}
+
+
--- a/platforms/opencl/tests/TestOpenCLVariableVerletIntegrator.cpp
+++ b/platforms/opencl/tests/TestOpenCLVariableVerletIntegrator.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2008-2009 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+/**
+ * This tests the OpenCL implementation of VariableVerletIntegrator.
+ */
+
+#include "../../../tests/AssertionUtilities.h"
+#include "openmm/Context.h"
+#include "OpenCLPlatform.h"
+#include "openmm/HarmonicBondForce.h"
+#include "openmm/NonbondedForce.h"
+#include "openmm/System.h"
+#include "openmm/VariableVerletIntegrator.h"
+#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
+#include "../src/sfmt/SFMT.h"
+#include <iostream>
+#include <vector>
+
+using namespace OpenMM;
+using namespace std;
+
+const double TOL = 1e-5;
+
+void testSingleBond() {
+    OpenCLPlatform platform;
+    System system;
+    system.addParticle(2.0);
+    system.addParticle(2.0);
+    VariableVerletIntegrator integrator(1e-6);
+    HarmonicBondForce* forceField = new HarmonicBondForce();
+    forceField->addBond(0, 1, 1.5, 1);
+    system.addForce(forceField);
+    Context context(system, integrator, platform);
+    vector<Vec3> positions(2);
+    positions[0] = Vec3(-1, 0, 0);
+    positions[1] = Vec3(1, 0, 0);
+    context.setPositions(positions);
+
+    // This is simply a harmonic oscillator, so compare it to the analytical solution.
+
+    const double freq = 1.0;;
+    State state = context.getState(State::Energy);
+    const double initialEnergy = state.getKineticEnergy()+state.getPotentialEnergy();
+    for (int i = 0; i < 1000; ++i) {
+        state = context.getState(State::Positions | State::Velocities | State::Energy);
+        double time = state.getTime();
+        double expectedDist = 1.5+0.5*std::cos(freq*time);
+        ASSERT_EQUAL_VEC(Vec3(-0.5*expectedDist, 0, 0), state.getPositions()[0], 0.02);
+        ASSERT_EQUAL_VEC(Vec3(0.5*expectedDist, 0, 0), state.getPositions()[1], 0.02);
+        double expectedSpeed = -0.5*freq*std::sin(freq*time);
+        ASSERT_EQUAL_VEC(Vec3(-0.5*expectedSpeed, 0, 0), state.getVelocities()[0], 0.02);
+        ASSERT_EQUAL_VEC(Vec3(0.5*expectedSpeed, 0, 0), state.getVelocities()[1], 0.02);
+        double energy = state.getKineticEnergy()+state.getPotentialEnergy();
+        ASSERT_EQUAL_TOL(initialEnergy, energy, 0.05);
+        integrator.step(1);
+    }
+}
+
+void testConstraints() {
+    const int numParticles = 8;
+    const int numConstraints = 5;
+    const double temp = 100.0;
+    OpenCLPlatform platform;
+    System system;
+    VariableVerletIntegrator integrator(1e-5);
+    integrator.setConstraintTolerance(1e-5);
+    NonbondedForce* forceField = new NonbondedForce();
+    for (int i = 0; i < numParticles; ++i) {
+        system.addParticle(10.0);
+        forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
+    }
+    system.addConstraint(0, 1, 1.0);
+    system.addConstraint(1, 2, 1.0);
+    system.addConstraint(2, 3, 1.0);
+    system.addConstraint(4, 5, 1.0);
+    system.addConstraint(6, 7, 1.0);
+    system.addForce(forceField);
+    Context context(system, integrator, platform);
+    vector<Vec3> positions(numParticles);
+    vector<Vec3> velocities(numParticles);
+    init_gen_rand(0);
+    for (int i = 0; i < numParticles; ++i) {
+        positions[i] = Vec3(i/2, (i+1)/2, 0);
+        velocities[i] = Vec3(genrand_real2()-0.5, genrand_real2()-0.5, genrand_real2()-0.5);
+    }
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+
+    // Simulate it and see whether the constraints remain satisfied.
+
+    double initialEnergy = 0.0;
+    for (int i = 0; i < 1000; ++i) {
+        State state = context.getState(State::Positions | State::Energy);
+            for (int j = 0; j < numConstraints; ++j) {
+                int particle1, particle2;
+                double distance;
+                system.getConstraintParameters(j, particle1, particle2, distance);
+                Vec3 p1 = state.getPositions()[particle1];
+                Vec3 p2 = state.getPositions()[particle2];
+                double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
+                ASSERT_EQUAL_TOL(distance, dist, 1e-4);
+            }
+        double energy = state.getKineticEnergy()+state.getPotentialEnergy();
+        if (i == 1)
+            initialEnergy = energy;
+        else if (i > 1)
+            ASSERT_EQUAL_TOL(initialEnergy, energy, 0.1);
+        integrator.step(1);
+    }
+    double finalTime = context.getState(State::Positions).getTime();
+    ASSERT(finalTime > 0.1);
+
+    // Now try the stepTo() method.
+
+    finalTime += 0.5;
+    integrator.stepTo(finalTime);
+    ASSERT_EQUAL(finalTime, context.getState(State::Positions).getTime());
+}
+
+void testConstrainedClusters() {
+    const int numParticles = 7;
+    const double temp = 500.0;
+    OpenCLPlatform platform;
+    System system;
+    VariableVerletIntegrator integrator(1e-5);
+    integrator.setConstraintTolerance(1e-5);
+    NonbondedForce* forceField = new NonbondedForce();
+    for (int i = 0; i < numParticles; ++i) {
+        system.addParticle(i > 1 ? 1.0 : 10.0);
+        forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
+    }
+    system.addConstraint(0, 1, 1.0);
+    system.addConstraint(0, 2, 1.0);
+    system.addConstraint(0, 3, 1.0);
+    system.addConstraint(0, 4, 1.0);
+    system.addConstraint(1, 5, 1.0);
+    system.addConstraint(1, 6, 1.0);
+    system.addConstraint(2, 3, sqrt(2.0));
+    system.addConstraint(2, 4, sqrt(2.0));
+    system.addConstraint(3, 4, sqrt(2.0));
+    system.addConstraint(5, 6, sqrt(2.0));
+    system.addForce(forceField);
+    Context context(system, integrator, platform);
+    vector<Vec3> positions(numParticles);
+    positions[0] = Vec3(0, 0, 0);
+    positions[1] = Vec3(1, 0, 0);
+    positions[2] = Vec3(-1, 0, 0);
+    positions[3] = Vec3(0, 1, 0);
+    positions[4] = Vec3(0, 0, 1);
+    positions[5] = Vec3(2, 0, 0);
+    positions[6] = Vec3(1, 1, 0);
+    vector<Vec3> velocities(numParticles);
+    init_gen_rand(0);
+    for (int i = 0; i < numParticles; ++i)
+        velocities[i] = Vec3(genrand_real2()-0.5, genrand_real2()-0.5, genrand_real2()-0.5);
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+
+    // Simulate it and see whether the constraints remain satisfied.
+
+    double initialEnergy = 0.0;
+    for (int i = 0; i < 1000; ++i) {
+        State state = context.getState(State::Positions | State::Energy);
+        for (int j = 0; j < system.getNumConstraints(); ++j) {
+            int particle1, particle2;
+            double distance;
+            system.getConstraintParameters(j, particle1, particle2, distance);
+            Vec3 p1 = state.getPositions()[particle1];
+            Vec3 p2 = state.getPositions()[particle2];
+            double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
+            ASSERT_EQUAL_TOL(distance, dist, 2e-5);
+        }
+        double energy = state.getKineticEnergy()+state.getPotentialEnergy();
+        if (i == 1)
+            initialEnergy = energy;
+        else if (i > 1)
+            ASSERT_EQUAL_TOL(initialEnergy, energy, 0.05);
+        integrator.step(1);
+    }
+    ASSERT(context.getState(State::Positions).getTime() > 0.1);
+}
+
+int main() {
+    try {
+        testSingleBond();
+//        testConstraints();
+//        testConstrainedClusters();
+    }
+    catch(const exception& e) {
+        cout << "exception: " << e.what() << endl;
+        return 1;
+    }
+    cout << "Done" << endl;
+    return 0;
+}
+
+