Created OpenCL implementations of AndersenThermostat and BrownianIntegrator. Also fixed some bugs.

cc5467e7 · Peter Eastman · 13d89753 · cc5467e7 · cc5467e7 · cc5467e7
Commit cc5467e7 authored Nov 06, 2009 by Peter Eastman
13 changed files
--- a/platforms/opencl/src/OpenCLContext.h
+++ b/platforms/opencl/src/OpenCLContext.h
@@ -266,7 +266,7 @@ public:
    /**
     * Get the OpenCLIntegrationUtilities for this context.
     */
-    OpenCLIntegrationUtilities& getIntegrationUtilties() {
+    OpenCLIntegrationUtilities& getIntegrationUtilities() {
        return *integration;
    }
    /**

--- a/platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+++ b/platforms/opencl/src/OpenCLIntegrationUtilities.cpp
@@ -68,10 +68,13 @@ struct OpenCLIntegrationUtilities::ShakeCluster {

 OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, const System& system) : context(context),
        posDelta(NULL), settleAtoms(NULL), settleParams(NULL), shakeAtoms(NULL), shakeParams(NULL),
-        random(NULL), randomSeed(NULL), randomPos(NULL) {
+        random(NULL), randomSeed(NULL), randomPos(NULL), stepSize(NULL) {
    // Create workspace arrays.

    posDelta = new OpenCLArray<mm_float4>(context, context.getPaddedNumAtoms(), "posDelta");
+    stepSize = new OpenCLArray<mm_float2>(context, 1, "stepSize", true);
+    stepSize->set(0, (mm_float2) {0.0f, 0.0f});
+    stepSize->upload();

    // Create kernels for enforcing constraints.

@@ -260,6 +263,8 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() {
        delete random;
    if (randomSeed != NULL)
        delete randomSeed;
+    if (stepSize != NULL)
+        delete stepSize;
 }

 void OpenCLIntegrationUtilities::applyConstraints(double tol) {

--- a/platforms/opencl/src/OpenCLIntegrationUtilities.h
+++ b/platforms/opencl/src/OpenCLIntegrationUtilities.h
@@ -53,6 +53,12 @@ public:
    OpenCLArray<mm_float4>& getRandom() {
        return *random;
    }
+    /**
+     * Get the array which contains the current step size.
+     */
+    OpenCLArray<mm_float2>& getStepSize() {
+        return *stepSize;
+    }
    /**
     * Apply constraints to the atom positions.
     *
@@ -82,6 +88,7 @@ private:
    OpenCLArray<mm_float4>* shakeParams;
    OpenCLArray<mm_float4>* random;
    OpenCLArray<mm_int4>* randomSeed;
+    OpenCLArray<mm_float2>* stepSize;
    int randomPos;
    int lastSeed;
    struct ShakeCluster;

--- a/platforms/opencl/src/OpenCLKernelFactory.cpp
+++ b/platforms/opencl/src/OpenCLKernelFactory.cpp
@@ -56,14 +56,14 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
        return new OpenCLIntegrateVerletStepKernel(name, platform, cl);
    if (name == IntegrateLangevinStepKernel::Name())
        return new OpenCLIntegrateLangevinStepKernel(name, platform, cl);
-//    if (name == IntegrateBrownianStepKernel::Name())
-//        return new OpenCLIntegrateBrownianStepKernel(name, platform, cl);
+    if (name == IntegrateBrownianStepKernel::Name())
+        return new OpenCLIntegrateBrownianStepKernel(name, platform, cl);
    if (name == IntegrateVariableVerletStepKernel::Name())
        return new OpenCLIntegrateVariableVerletStepKernel(name, platform, cl);
    if (name == IntegrateVariableLangevinStepKernel::Name())
        return new OpenCLIntegrateVariableLangevinStepKernel(name, platform, cl);
-//    if (name == ApplyAndersenThermostatKernel::Name())
-//        return new OpenCLApplyAndersenThermostatKernel(name, platform, cl);
+    if (name == ApplyAndersenThermostatKernel::Name())
+        return new OpenCLApplyAndersenThermostatKernel(name, platform, cl);
    if (name == CalcKineticEnergyKernel::Name())
        return new OpenCLCalcKineticEnergyKernel(name, platform, cl);
    if (name == RemoveCMMotionKernel::Name())

--- a/platforms/opencl/src/OpenCLKernels.cpp
+++ b/platforms/opencl/src/OpenCLKernels.cpp
@@ -1171,8 +1171,6 @@ double OpenCLCalcGBSAOBCForceKernel::executeEnergy(ContextImpl& context) {
 }

 OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() {
-    if (stepSize != NULL)
-        delete stepSize;
 }

 void OpenCLIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) {
@@ -1180,24 +1178,23 @@ void OpenCLIntegrateVerletStepKernel::initialize(const System& system, const Ver
    cl::Program program = cl.createProgram(cl.loadSourceFromFile("verlet.cl"));
    kernel1 = cl::Kernel(program, "integrateVerletPart1");
    kernel2 = cl::Kernel(program, "integrateVerletPart2");
-    stepSize = new OpenCLArray<mm_float2>(cl, 1, "stepSize");
    prevStepSize = -1.0;
 }

 void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIntegrator& integrator) {
-    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilties();
+    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
    int numAtoms = cl.getNumAtoms();
    double dt = integrator.getStepSize();
    if (!hasInitializedKernels) {
        hasInitializedKernels = true;
        kernel1.setArg<cl_int>(0, numAtoms);
-        kernel1.setArg<cl::Buffer>(1, stepSize->getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(1, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
        kernel1.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
        kernel1.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
        kernel1.setArg<cl::Buffer>(4, cl.getForce().getDeviceBuffer());
        kernel1.setArg<cl::Buffer>(5, integration.getPosDelta().getDeviceBuffer());
        kernel2.setArg<cl_int>(0, numAtoms);
-        kernel2.setArg<cl::Buffer>(1, stepSize->getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(1, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
        kernel2.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
        kernel2.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
        kernel2.setArg<cl::Buffer>(4, integration.getPosDelta().getDeviceBuffer());
@@ -1205,7 +1202,7 @@ void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const Verlet
    if (dt != prevStepSize) {
        vector<mm_float2> stepSizeVec(1);
        stepSizeVec[0] = (mm_float2) {dt, dt};
-        stepSize->upload(stepSizeVec);
+        cl.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
        prevStepSize = dt;
    }

@@ -1238,8 +1235,11 @@ OpenCLIntegrateLangevinStepKernel::~OpenCLIntegrateLangevinStepKernel() {

 void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
    cl.initialize(system);
-    cl.getIntegrationUtilties().initRandomNumberGenerator(integrator.getRandomNumberSeed());
-    cl::Program program = cl.createProgram(cl.loadSourceFromFile("langevin.cl"));
+    cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
+    map<string, string> defines;
+    defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
+    defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
+    cl::Program program = cl.createProgram(cl.loadSourceFromFile("langevin.cl"), defines);
    kernel1 = cl::Kernel(program, "integrateLangevinPart1");
    kernel2 = cl::Kernel(program, "integrateLangevinPart2");
    kernel3 = cl::Kernel(program, "integrateLangevinPart3");
@@ -1252,32 +1252,28 @@ void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const L
 }

 void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const LangevinIntegrator& integrator) {
-    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilties();
+    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
    int numAtoms = cl.getNumAtoms();
    if (!hasInitializedKernels) {
        hasInitializedKernels = true;
-        kernel1.setArg<cl_int>(0, numAtoms);
-        kernel1.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(2, cl.getForce().getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(3, integration.getPosDelta().getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(4, params->getDeviceBuffer());
-        kernel1.setArg(5, params->getSize()*sizeof(cl_float), NULL);
-        kernel1.setArg<cl::Buffer>(6, xVector->getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(7, vVector->getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(8,integration.getRandom().getDeviceBuffer());
-        kernel2.setArg<cl_int>(0, numAtoms);
-        kernel2.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
-        kernel2.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
-        kernel2.setArg<cl::Buffer>(3, params->getDeviceBuffer());
-        kernel2.setArg(4, params->getSize()*sizeof(cl_float), NULL);
-        kernel2.setArg<cl::Buffer>(5, xVector->getDeviceBuffer());
-        kernel2.setArg<cl::Buffer>(6, vVector->getDeviceBuffer());
-        kernel2.setArg<cl::Buffer>(7,integration.getRandom().getDeviceBuffer());
-        kernel3.setArg<cl_int>(0, numAtoms);
-        kernel3.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
-        kernel3.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
-    }
-    int numThreads = cl.getNumThreadBlocks()*cl.ThreadBlockSize;
+        kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(3, params->getDeviceBuffer());
+        kernel1.setArg(4, params->getSize()*sizeof(cl_float), NULL);
+        kernel1.setArg<cl::Buffer>(5, xVector->getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(6, vVector->getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(7,integration.getRandom().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(1, integration.getPosDelta().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(2, params->getDeviceBuffer());
+        kernel2.setArg(3, params->getSize()*sizeof(cl_float), NULL);
+        kernel2.setArg<cl::Buffer>(4, xVector->getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(5, vVector->getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(6,integration.getRandom().getDeviceBuffer());
+        kernel3.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
+        kernel3.setArg<cl::Buffer>(1, integration.getPosDelta().getDeviceBuffer());
+    }
    double temperature = integrator.getTemperature();
    double friction = integrator.getFriction();
    double stepSize = integrator.getStepSize();
@@ -1346,7 +1342,7 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang

    // Call the first integration kernel.

-    kernel1.setArg<cl_uint>(9, integration.prepareRandomNumbers(2*numThreads));
+    kernel1.setArg<cl_uint>(8, integration.prepareRandomNumbers(2*cl.getPaddedNumAtoms()));
    cl.executeKernel(kernel1, numAtoms);

    // Apply constraints.
@@ -1355,7 +1351,7 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang

    // Call the second integration kernel.

-    kernel2.setArg<cl_uint>(8, integration.prepareRandomNumbers(2*numThreads));
+    kernel2.setArg<cl_uint>(7, integration.prepareRandomNumbers(2*cl.getPaddedNumAtoms()));
    cl.executeKernel(kernel2, numAtoms);

    // Reapply constraints.
@@ -1371,50 +1367,66 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
    cl.setTime(cl.getTime()+stepSize);
    cl.setStepCount(cl.getStepCount()+1);
 }
-//
-//OpenCLIntegrateBrownianStepKernel::~OpenCLIntegrateBrownianStepKernel() {
-//}
-//
-//void OpenCLIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) {
-//    initializeIntegration(system, data, integrator);
-//    _gpuContext* gpu = data.gpu;
-//    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
-//    gpuInitializeRandoms(gpu);
-//    prevStepSize = -1.0;
-//}
-//
-//void OpenCLIntegrateBrownianStepKernel::execute(ContextImpl& context, const BrownianIntegrator& integrator) {
-//    _gpuContext* gpu = data.gpu;
-//    double temperature = integrator.getTemperature();
-//    double friction = integrator.getFriction();
-//    double stepSize = integrator.getStepSize();
-//    if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
-//        // Initialize the GPU parameters.
-//
-//        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
-//        gpuSetBrownianIntegrationParameters(gpu, (float) tau, (float) stepSize, (float) temperature);
-//        gpuSetConstants(gpu);
-//        kGenerateRandoms(gpu);
-//        prevTemp = temperature;
-//        prevFriction = friction;
-//        prevStepSize = stepSize;
-//    }
-//    kBrownianUpdatePart1(gpu);
-//    kApplyFirstShake(gpu);
-//    kApplyFirstSettle(gpu);
-//    kApplyFirstCCMA(gpu);
-//    if (data.removeCM)
-//        if (data.stepCount%data.cmMotionFrequency == 0)
-//            gpu->bCalculateCM = true;
-//    kBrownianUpdatePart2(gpu);
-//    data.time += stepSize;
-//    data.stepCount++;
-//}
-//
+
+OpenCLIntegrateBrownianStepKernel::~OpenCLIntegrateBrownianStepKernel() {
+}
+
+void OpenCLIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) {
+    cl.initialize(system);
+    cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
+    map<string, string> defines;
+    defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
+    cl::Program program = cl.createProgram(cl.loadSourceFromFile("brownian.cl"), defines);
+    kernel1 = cl::Kernel(program, "integrateBrownianPart1");
+    kernel2 = cl::Kernel(program, "integrateBrownianPart2");
+    prevStepSize = -1.0;
+}
+
+void OpenCLIntegrateBrownianStepKernel::execute(ContextImpl& context, const BrownianIntegrator& integrator) {
+    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
+    int numAtoms = cl.getNumAtoms();
+    if (!hasInitializedKernels) {
+        hasInitializedKernels = true;
+        kernel1.setArg<cl::Buffer>(2, cl.getForce().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(3, integration.getPosDelta().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(4,integration.getRandom().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(3, integration.getPosDelta().getDeviceBuffer());
+    }
+    double temperature = integrator.getTemperature();
+    double friction = integrator.getFriction();
+    double stepSize = integrator.getStepSize();
+    if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
+        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
+        kernel1.setArg<cl_float>(0, (cl_float) (tau*stepSize));
+        kernel1.setArg<cl_float>(1, (cl_float) (sqrt(2.0f*BOLTZ*temperature*stepSize*tau)));
+        kernel2.setArg<cl_float>(0, (cl_float) (1.0/stepSize));
+        prevTemp = temperature;
+        prevFriction = friction;
+        prevStepSize = stepSize;
+    }
+
+    // Call the first integration kernel.
+
+    kernel1.setArg<cl_uint>(5, integration.prepareRandomNumbers(cl.getPaddedNumAtoms()));
+    cl.executeKernel(kernel1, numAtoms);
+
+    // Apply constraints.
+
+    integration.applyConstraints(integrator.getConstraintTolerance());
+
+    // Call the second integration kernel.
+
+    cl.executeKernel(kernel2, numAtoms);
+
+    // Update the time and step count.
+
+    cl.setTime(cl.getTime()+stepSize);
+    cl.setStepCount(cl.getStepCount()+1);
+}

 OpenCLIntegrateVariableVerletStepKernel::~OpenCLIntegrateVariableVerletStepKernel() {
-    if (stepSize != NULL)
-        delete stepSize;
 }

 void OpenCLIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) {
@@ -1423,30 +1435,27 @@ void OpenCLIntegrateVariableVerletStepKernel::initialize(const System& system, c
    kernel1 = cl::Kernel(program, "integrateVerletPart1");
    kernel2 = cl::Kernel(program, "integrateVerletPart2");
    selectSizeKernel = cl::Kernel(program, "selectVerletStepSize");
-    stepSize = new OpenCLArray<mm_float2>(cl, 1, "stepSize", true);
-    stepSize->set(0, (mm_float2) {0.0f, 0.0f});
-    stepSize->upload();
    blockSize = std::min(std::min(256, system.getNumParticles()), (int) cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
 }

 void OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) {
-    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilties();
+    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
    int numAtoms = cl.getNumAtoms();
    if (!hasInitializedKernels) {
        hasInitializedKernels = true;
        kernel1.setArg<cl_int>(0, numAtoms);
-        kernel1.setArg<cl::Buffer>(1, stepSize->getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(1, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
        kernel1.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
        kernel1.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
        kernel1.setArg<cl::Buffer>(4, cl.getForce().getDeviceBuffer());
        kernel1.setArg<cl::Buffer>(5, integration.getPosDelta().getDeviceBuffer());
        kernel2.setArg<cl_int>(0, numAtoms);
-        kernel2.setArg<cl::Buffer>(1, stepSize->getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(1, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
        kernel2.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
        kernel2.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
        kernel2.setArg<cl::Buffer>(4, integration.getPosDelta().getDeviceBuffer());
        selectSizeKernel.setArg<cl_int>(0, numAtoms);
-        selectSizeKernel.setArg<cl::Buffer>(3, stepSize->getDeviceBuffer());
+        selectSizeKernel.setArg<cl::Buffer>(3, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
        selectSizeKernel.setArg<cl::Buffer>(4, cl.getVelm().getDeviceBuffer());
        selectSizeKernel.setArg<cl::Buffer>(5, cl.getForce().getDeviceBuffer());
        selectSizeKernel.setArg(6, blockSize*sizeof(cl_float), NULL);
@@ -1473,8 +1482,8 @@ void OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons

    // Update the time and step count.

-    stepSize->download();
-    double dt = stepSize->get(0).y;
+    cl.getIntegrationUtilities().getStepSize().download();
+    double dt = cl.getIntegrationUtilities().getStepSize()[0].y;
    double time = cl.getTime()+dt;
    if (dt == maxStepSize)
        time = maxTime; // Avoid round-off error
@@ -1489,14 +1498,15 @@ OpenCLIntegrateVariableLangevinStepKernel::~OpenCLIntegrateVariableLangevinStepK
        delete xVector;
    if (vVector != NULL)
        delete vVector;
-    if (stepSize != NULL)
-        delete stepSize;
 }

 void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
    cl.initialize(system);
-    cl.getIntegrationUtilties().initRandomNumberGenerator(integrator.getRandomNumberSeed());
-    cl::Program program = cl.createProgram(cl.loadSourceFromFile("langevin.cl"));
+    cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
+    map<string, string> defines;
+    defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
+    defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
+    cl::Program program = cl.createProgram(cl.loadSourceFromFile("langevin.cl"), defines);
    kernel1 = cl::Kernel(program, "integrateLangevinPart1");
    kernel2 = cl::Kernel(program, "integrateLangevinPart2");
    kernel3 = cl::Kernel(program, "integrateLangevinPart3");
@@ -1506,61 +1516,53 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system,
    vVector = new OpenCLArray<mm_float4>(cl, cl.getPaddedNumAtoms(), "vVector");
    vector<mm_float4> initialXVector(xVector->getSize(), (mm_float4) {0.0f, 0.0f, 0.0f, 0.0f});
    xVector->upload(initialXVector);
-    stepSize = new OpenCLArray<mm_float2>(cl, 1, "stepSize", true);
-    stepSize->set(0, (mm_float2) {0.0f, 0.0f});
-    stepSize->upload();
    blockSize = std::min(256, system.getNumParticles());
    blockSize = std::max(blockSize, params->getSize());
    blockSize = std::min(blockSize, (int) cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
 }

 void OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
-    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilties();
+    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
    int numAtoms = cl.getNumAtoms();
    if (!hasInitializedKernels) {
        hasInitializedKernels = true;
-        kernel1.setArg<cl_int>(0, numAtoms);
-        kernel1.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(2, cl.getForce().getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(3, integration.getPosDelta().getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(4, params->getDeviceBuffer());
-        kernel1.setArg(5, params->getSize()*sizeof(cl_float), NULL);
-        kernel1.setArg<cl::Buffer>(6, xVector->getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(7, vVector->getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(8,integration.getRandom().getDeviceBuffer());
-        kernel2.setArg<cl_int>(0, numAtoms);
-        kernel2.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
-        kernel2.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
-        kernel2.setArg<cl::Buffer>(3, params->getDeviceBuffer());
-        kernel2.setArg(4, params->getSize()*sizeof(cl_float), NULL);
-        kernel2.setArg<cl::Buffer>(5, xVector->getDeviceBuffer());
-        kernel2.setArg<cl::Buffer>(6, vVector->getDeviceBuffer());
-        kernel2.setArg<cl::Buffer>(7,integration.getRandom().getDeviceBuffer());
-        kernel3.setArg<cl_int>(0, numAtoms);
-        kernel3.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
-        kernel3.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
-        selectSizeKernel.setArg<cl_int>(0, numAtoms);
-        selectSizeKernel.setArg<cl::Buffer>(5, stepSize->getDeviceBuffer());
-        selectSizeKernel.setArg<cl::Buffer>(6, cl.getVelm().getDeviceBuffer());
-        selectSizeKernel.setArg<cl::Buffer>(7, cl.getForce().getDeviceBuffer());
-        selectSizeKernel.setArg<cl::Buffer>(8, params->getDeviceBuffer());
-        selectSizeKernel.setArg(9, params->getSize()*sizeof(cl_float), NULL);
-        selectSizeKernel.setArg(10, blockSize*sizeof(cl_float), NULL);
+        kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(3, params->getDeviceBuffer());
+        kernel1.setArg(4, params->getSize()*sizeof(cl_float), NULL);
+        kernel1.setArg<cl::Buffer>(5, xVector->getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(6, vVector->getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(7,integration.getRandom().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(1, integration.getPosDelta().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(2, params->getDeviceBuffer());
+        kernel2.setArg(3, params->getSize()*sizeof(cl_float), NULL);
+        kernel2.setArg<cl::Buffer>(4, xVector->getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(5, vVector->getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(6,integration.getRandom().getDeviceBuffer());
+        kernel3.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
+        kernel3.setArg<cl::Buffer>(1, integration.getPosDelta().getDeviceBuffer());
+        selectSizeKernel.setArg<cl::Buffer>(4, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
+        selectSizeKernel.setArg<cl::Buffer>(5, cl.getVelm().getDeviceBuffer());
+        selectSizeKernel.setArg<cl::Buffer>(6, cl.getForce().getDeviceBuffer());
+        selectSizeKernel.setArg<cl::Buffer>(7, params->getDeviceBuffer());
+        selectSizeKernel.setArg(8, params->getSize()*sizeof(cl_float), NULL);
+        selectSizeKernel.setArg(9, blockSize*sizeof(cl_float), NULL);
    }

    // Select the step size to use.

    float maxStepSize = (float)(maxTime-cl.getTime());
-    selectSizeKernel.setArg<cl_float>(1, maxStepSize);
-    selectSizeKernel.setArg<cl_float>(2, (cl_float) integrator.getErrorTolerance());
-    selectSizeKernel.setArg<cl_float>(3, (cl_float) (integrator.getFriction() == 0.0 ? 0.0 : 1.0/integrator.getFriction()));
-    selectSizeKernel.setArg<cl_float>(4, (cl_float) (BOLTZ*integrator.getTemperature()));
+    selectSizeKernel.setArg<cl_float>(0, maxStepSize);
+    selectSizeKernel.setArg<cl_float>(1, (cl_float) integrator.getErrorTolerance());
+    selectSizeKernel.setArg<cl_float>(2, (cl_float) (integrator.getFriction() == 0.0 ? 0.0 : 1.0/integrator.getFriction()));
+    selectSizeKernel.setArg<cl_float>(3, (cl_float) (BOLTZ*integrator.getTemperature()));
    cl.executeKernel(selectSizeKernel, blockSize, blockSize);

    // Call the first integration kernel.

-    int numThreads = cl.getNumThreadBlocks()*cl.ThreadBlockSize;
-    kernel1.setArg<cl_uint>(9, integration.prepareRandomNumbers(2*numThreads));
+    kernel1.setArg<cl_uint>(8, integration.prepareRandomNumbers(2*cl.getPaddedNumAtoms()));
    cl.executeKernel(kernel1, numAtoms);

    // Apply constraints.
@@ -1569,7 +1571,7 @@ void OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co

    // Call the second integration kernel.

-    kernel2.setArg<cl_uint>(8, integration.prepareRandomNumbers(2*numThreads));
+    kernel2.setArg<cl_uint>(7, integration.prepareRandomNumbers(2*cl.getPaddedNumAtoms()));
    cl.executeKernel(kernel2, numAtoms);

    // Reapply constraints.
@@ -1582,8 +1584,8 @@ void OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co

    // Update the time and step count.

-    stepSize->download();
-    double dt = stepSize->get(0).y;
+    cl.getIntegrationUtilities().getStepSize().download();
+    double dt = cl.getIntegrationUtilities().getStepSize()[0].y;
    double time = cl.getTime()+dt;
    if (dt == maxStepSize)
        time = maxTime; // Avoid round-off error
@@ -1591,34 +1593,32 @@ void OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
    cl.setStepCount(cl.getStepCount()+1);
 }

-//OpenCLApplyAndersenThermostatKernel::~OpenCLApplyAndersenThermostatKernel() {
-//}
-//
-//void OpenCLApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) {
-//    _gpuContext* gpu = data.gpu;
-//    gpu->seed = (unsigned long) thermostat.getRandomNumberSeed();
-//    gpuInitializeRandoms(gpu);
-//    prevStepSize = -1.0;
-//}
-//
-//void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) {
-//    _gpuContext* gpu = data.gpu;
-//    double temperature = context.getParameter(AndersenThermostat::Temperature());
-//    double frequency = context.getParameter(AndersenThermostat::CollisionFrequency());
-//    double stepSize = context.getIntegrator().getStepSize();
-//    if (temperature != prevTemp || frequency != prevFrequency || stepSize != prevStepSize) {
-//        // Initialize the GPU parameters.
-//
-//        gpuSetAndersenThermostatParameters(gpu, (float) temperature, frequency);
-//        gpuSetConstants(gpu);
-//        kGenerateRandoms(gpu);
-//        prevTemp = temperature;
-//        prevFrequency = frequency;
-//        prevStepSize = stepSize;
-//    }
-//    kCalculateAndersenThermostat(gpu);
-//}
-//
+OpenCLApplyAndersenThermostatKernel::~OpenCLApplyAndersenThermostatKernel() {
+}
+
+void OpenCLApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) {
+    randomSeed = thermostat.getRandomNumberSeed();
+    map<string, string> defines;
+    defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
+    defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
+    cl::Program program = cl.createProgram(cl.loadSourceFromFile("andersenThermostat.cl"), defines);
+    kernel = cl::Kernel(program, "applyAndersenThermostat");
+}
+
+void OpenCLApplyAndersenThermostatKernel::execute(ContextImpl& context) {
+    if (!hasInitializedKernels) {
+        hasInitializedKernels = true;
+        cl.getIntegrationUtilities().initRandomNumberGenerator(randomSeed);
+        kernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
+        kernel.setArg<cl::Buffer>(3, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
+        kernel.setArg<cl::Buffer>(4, cl.getIntegrationUtilities().getRandom().getDeviceBuffer());
+    }
+    kernel.setArg<cl_float>(0, (cl_float) context.getParameter(AndersenThermostat::CollisionFrequency()));
+    kernel.setArg<cl_float>(1, (cl_float) (BOLTZ*context.getParameter(AndersenThermostat::Temperature())));
+    kernel.setArg<cl_uint>(5, cl.getIntegrationUtilities().prepareRandomNumbers(cl.getPaddedNumAtoms()));
+    cl.executeKernel(kernel, cl.getNumAtoms());
+}
+
 void OpenCLCalcKineticEnergyKernel::initialize(const System& system) {
    int numParticles = system.getNumParticles();
    masses.resize(numParticles);

--- a/platforms/opencl/src/OpenCLKernels.h
+++ b/platforms/opencl/src/OpenCLKernels.h
@@ -436,7 +436,7 @@ private:
 class OpenCLIntegrateVerletStepKernel : public IntegrateVerletStepKernel {
 public:
    OpenCLIntegrateVerletStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVerletStepKernel(name, platform), cl(cl),
-            hasInitializedKernels(false), stepSize(NULL) {
+            hasInitializedKernels(false) {
    }
    ~OpenCLIntegrateVerletStepKernel();
    /**
@@ -457,7 +457,6 @@ private:
    OpenCLContext& cl;
    double prevStepSize;
    bool hasInitializedKernels;
-    OpenCLArray<mm_float2>* stepSize;
    cl::Kernel kernel1, kernel2;
 };

@@ -494,32 +493,34 @@ private:
    cl::Kernel kernel1, kernel2, kernel3;
 };

-///**
-// * This kernel is invoked by BrownianIntegrator to take one time step.
-// */
-//class OpenCLIntegrateBrownianStepKernel : public IntegrateBrownianStepKernel {
-//public:
-//    OpenCLIntegrateBrownianStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateBrownianStepKernel(name, platform), cl(cl) {
-//    }
-//    ~OpenCLIntegrateBrownianStepKernel();
-//    /**
-//     * Initialize the kernel.
-//     *
-//     * @param system     the System this kernel will be applied to
-//     * @param integrator the BrownianIntegrator this kernel will be used for
-//     */
-//    void initialize(const System& system, const BrownianIntegrator& integrator);
-//    /**
-//     * Execute the kernel.
-//     *
-//     * @param context    the context in which to execute this kernel
-//     * @param integrator the BrownianIntegrator this kernel is being used for
-//     */
-//    void execute(ContextImpl& context, const BrownianIntegrator& integrator);
-//private:
-//    OpenCLContext& cl;
-//    double prevTemp, prevFriction, prevStepSize;
-//};
+/**
+ * This kernel is invoked by BrownianIntegrator to take one time step.
+ */
+class OpenCLIntegrateBrownianStepKernel : public IntegrateBrownianStepKernel {
+public:
+    OpenCLIntegrateBrownianStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateBrownianStepKernel(name, platform), cl(cl) {
+    }
+    ~OpenCLIntegrateBrownianStepKernel();
+    /**
+     * Initialize the kernel.
+     *
+     * @param system     the System this kernel will be applied to
+     * @param integrator the BrownianIntegrator this kernel will be used for
+     */
+    void initialize(const System& system, const BrownianIntegrator& integrator);
+    /**
+     * Execute the kernel.
+     *
+     * @param context    the context in which to execute this kernel
+     * @param integrator the BrownianIntegrator this kernel is being used for
+     */
+    void execute(ContextImpl& context, const BrownianIntegrator& integrator);
+private:
+    OpenCLContext& cl;
+    double prevTemp, prevFriction, prevStepSize;
+    bool hasInitializedKernels;
+    cl::Kernel kernel1, kernel2;
+};

 /**
 * This kernel is invoked by VariableVerletIntegrator to take one time step.
@@ -527,7 +528,7 @@ private:
 class OpenCLIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStepKernel {
 public:
    OpenCLIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVariableVerletStepKernel(name, platform), cl(cl),
-            hasInitializedKernels(false), stepSize(NULL) {
+            hasInitializedKernels(false) {
    }
    ~OpenCLIntegrateVariableVerletStepKernel();
    /**
@@ -549,7 +550,6 @@ private:
    OpenCLContext& cl;
    bool hasInitializedKernels;
    int blockSize;
-    OpenCLArray<mm_float2>* stepSize;
    cl::Kernel kernel1, kernel2, selectSizeKernel;
 };

@@ -559,7 +559,7 @@ private:
 class OpenCLIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
 public:
    OpenCLIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateVariableLangevinStepKernel(name, platform), cl(cl),
-            hasInitializedKernels(false), stepSize(NULL) {
+            hasInitializedKernels(false) {
    }
    ~OpenCLIntegrateVariableLangevinStepKernel();
    /**
@@ -584,36 +584,39 @@ private:
    OpenCLArray<cl_float>* params;
    OpenCLArray<mm_float4>* xVector;
    OpenCLArray<mm_float4>* vVector;
-    OpenCLArray<mm_float2>* stepSize;
    cl::Kernel kernel1, kernel2, kernel3, selectSizeKernel;
    double prevTemp, prevFriction, prevErrorTol;
 };
-//
-///**
-// * This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities.
-// */
-//class OpenCLApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel {
-//public:
-//    OpenCLApplyAndersenThermostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyAndersenThermostatKernel(name, platform), cl(cl) {
-//    }
-//    ~OpenCLApplyAndersenThermostatKernel();
-//    /**
-//     * Initialize the kernel.
-//     *
-//     * @param system     the System this kernel will be applied to
-//     * @param thermostat the AndersenThermostat this kernel will be used for
-//     */
-//    void initialize(const System& system, const AndersenThermostat& thermostat);
-//    /**
-//     * Execute the kernel.
-//     *
-//     * @param context    the context in which to execute this kernel
-//     */
-//    void execute(ContextImpl& context);
-//private:
-//    OpenCLContext& cl;
-//    double prevTemp, prevFrequency, prevStepSize;
-//};
+
+/**
+ * This kernel is invoked by AndersenThermostat at the start of each time step to adjust the particle velocities.
+ */
+class OpenCLApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel {
+public:
+    OpenCLApplyAndersenThermostatKernel(std::string name, const Platform& platform, OpenCLContext& cl) : ApplyAndersenThermostatKernel(name, platform), cl(cl),
+            hasInitializedKernels(false) {
+    }
+    ~OpenCLApplyAndersenThermostatKernel();
+    /**
+     * Initialize the kernel.
+     *
+     * @param system     the System this kernel will be applied to
+     * @param thermostat the AndersenThermostat this kernel will be used for
+     */
+    void initialize(const System& system, const AndersenThermostat& thermostat);
+    /**
+     * Execute the kernel.
+     *
+     * @param context    the context in which to execute this kernel
+     */
+    void execute(ContextImpl& context);
+private:
+    OpenCLContext& cl;
+    bool hasInitializedKernels;
+    int randomSeed;
+    cl::Kernel kernel;
+    double prevTemp, prevFriction, prevStepSize;
+};

 /**
 * This kernel is invoked to calculate the kinetic energy of the system.

--- a/platforms/opencl/src/OpenCLPlatform.cpp
+++ b/platforms/opencl/src/OpenCLPlatform.cpp
@@ -56,10 +56,10 @@ OpenCLPlatform::OpenCLPlatform() {
    registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
    registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
    registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
-//    registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
+    registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
    registerKernelFactory(IntegrateVariableVerletStepKernel::Name(), factory);
    registerKernelFactory(IntegrateVariableLangevinStepKernel::Name(), factory);
-//    registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
+    registerKernelFactory(ApplyAndersenThermostatKernel::Name(), factory);
    registerKernelFactory(CalcKineticEnergyKernel::Name(), factory);
    registerKernelFactory(RemoveCMMotionKernel::Name(), factory);
    platformProperties.push_back(OpenCLDeviceIndex());

--- a/platforms/opencl/src/kernels/andersenThermostat.cl
+++ b/platforms/opencl/src/kernels/andersenThermostat.cl
+/**
+ * Apply the Andersen thermostat to adjust particle velocities.
+ */
+
+__kernel void applyAndersenThermostat(float collisionFrequency, float kT, __global float4* velm, __global float2* stepSize, __global float4* random, unsigned int randomIndex) {
+    randomIndex += get_global_id(0);
+    float collisionProbability = 1.0f-exp(-collisionFrequency*stepSize[0].y);
+    for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
+        float4 velocity = velm[index];
+        float4 rand = random[randomIndex];
+        float scale = (rand.w < collisionProbability ? 0.0 : 1.0);
+        float add = (1.0-scale)*sqrt(kT*velocity.w);
+        velocity.xyz = scale*velocity.xyz + add*rand.xyz;
+        velm[index] = velocity;
+        randomIndex += get_global_size(0);
+    }
+}
--- a/platforms/opencl/src/kernels/brownian.cl
+++ b/platforms/opencl/src/kernels/brownian.cl
+/**
+ * Perform the first step of Brownian integration.
+ */
+
+__kernel void integrateBrownianPart1(float tauDeltaT, float noiseAmplitude, __global float4* force,
+        __global float4* posDelta, __global float4* random, unsigned int randomIndex) {
+    randomIndex += get_global_id(0);
+    for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
+        posDelta[index] = (float4) (tauDeltaT*force[index].xyz + noiseAmplitude*random[randomIndex].xyz, 0.0f);
+        randomIndex += get_global_size(0);
+    }
+}
+
+/**
+ * Perform the second step of Brownian integration.
+ */
+
+__kernel void integrateBrownianPart2(float oneOverDeltaT, __global float4* posq, __global float4* velm, __global float4* posDelta) {
+    for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
+        float4 delta = posDelta[index];
+        velm[index].xyz = oneOverDeltaT*delta.xyz;
+        posq[index].xyz = posq[index].xyz + delta.xyz;
+    }
+}
--- a/platforms/opencl/src/kernels/langevin.cl
+++ b/platforms/opencl/src/kernels/langevin.cl
@@ -4,7 +4,7 @@ enum {EM, EM_V, DOverTauC, TauOneMinusEM_V, TauDOverEMMinusOne, V, X, Yv, Yx, Fi
 * Perform the first step of Langevin integration.
 */

-__kernel void integrateLangevinPart1(int numAtoms, __global float4* velm, __global float4* force, __global float4* posDelta,
+__kernel void integrateLangevinPart1(__global float4* velm, __global float4* force, __global float4* posDelta,
        __global float* paramBuffer, __local float* params, __global float4* xVector, __global float4* vVector,
        __global float4* random, unsigned int randomIndex) {

@@ -15,12 +15,11 @@ __kernel void integrateLangevinPart1(int numAtoms, __global float4* velm, __glob
        params[index] = paramBuffer[index];
    barrier(CLK_LOCAL_MEM_FENCE);
    randomIndex += index;
-    while (index < numAtoms) {
+    while (index < NUM_ATOMS) {
        float4 velocity = velm[index];
        float sqrtInvMass = sqrt(velocity.w);
        float4 vmh = (float4) (xVector[index].xyz*params[DOverTauC] + sqrtInvMass*params[Yv]*random[randomIndex].xyz, 0.0f);
-        randomIndex += get_global_size(0);
-        float4 vVec = (float4) (sqrtInvMass*params[V]*random[randomIndex].xyz, 0.0f);
+        float4 vVec = (float4) (sqrtInvMass*params[V]*random[randomIndex+PADDED_NUM_ATOMS].xyz, 0.0f);
        randomIndex += get_global_size(0);
        vVector[index] = vVec;
        velocity.xyz = velocity.xyz*params[EM_V] + velocity.w*force[index].xyz*params[TauOneMinusEM_V] + vVec.xyz - params[EM]*vmh.xyz;
@@ -34,7 +33,7 @@ __kernel void integrateLangevinPart1(int numAtoms, __global float4* velm, __glob
 * Perform the second step of Langevin integration.
 */

-__kernel void integrateLangevinPart2(int numAtoms, __global float4* velm, __global float4* posDelta, __global float* paramBuffer,
+__kernel void integrateLangevinPart2(__global float4* velm, __global float4* posDelta, __global float* paramBuffer,
        __local float* params, __global float4* xVector, __global float4* vVector, __global float4* random, unsigned int randomIndex) {

    // Load the parameters into local memory for faster access.
@@ -44,14 +43,13 @@ __kernel void integrateLangevinPart2(int numAtoms, __global float4* velm, __glob
        params[index] = paramBuffer[index];
    barrier(CLK_LOCAL_MEM_FENCE);
    randomIndex += index;
-    while (index < numAtoms) {
+    while (index < NUM_ATOMS) {
        float4 delta = posDelta[index];
        float4 velocity = velm[index];
        float sqrtInvMass = sqrt(velocity.w);
        velocity.xyz = delta.xyz*params[OneOverFix1];
        float4 xmh = (float4) (vVector[index].xyz*params[TauDOverEMMinusOne] + sqrtInvMass*params[Yx]*random[randomIndex].xyz, 0.0f);
-        randomIndex += get_global_size(0);
-        float4 xVec = (float4) (sqrtInvMass*params[X]*random[randomIndex].xyz, 0.0f);
+        float4 xVec = (float4) (sqrtInvMass*params[X]*random[randomIndex+PADDED_NUM_ATOMS].xyz, 0.0f);
        randomIndex += get_global_size(0);
        delta.xyz += xVec.xyz - xmh.xyz;
        posDelta[index] = delta;
@@ -65,9 +63,9 @@ __kernel void integrateLangevinPart2(int numAtoms, __global float4* velm, __glob
 * Perform the third step of Langevin integration.
 */

-__kernel void integrateLangevinPart3(int numAtoms, __global float4* posq, __global float4* posDelta) {
+__kernel void integrateLangevinPart3(__global float4* posq, __global float4* posDelta) {
    int index = get_global_id(0);
-    while (index < numAtoms) {
+    while (index < NUM_ATOMS) {
        float4 pos = posq[index];
        float4 delta = posDelta[index];
        pos.xyz += delta.xyz;
@@ -80,13 +78,13 @@ __kernel void integrateLangevinPart3(int numAtoms, __global float4* posq, __glob
 * Select the step size to use for the next step.
 */

-__kernel void selectLangevinStepSize(int numAtoms, float maxStepSize, float errorTol, float tau, float kT, __global float2* dt,
+__kernel void selectLangevinStepSize(float maxStepSize, float errorTol, float tau, float kT, __global float2* dt,
        __global float4* velm, __global float4* force, __global float* paramBuffer, __local float* params, __local float* error) {
    // Calculate the error.

    float err = 0.0f;
    unsigned int index = get_local_id(0);
-    while (index < numAtoms) {
+    while (index < NUM_ATOMS) {
        float4 f = force[index];
        float invMass = velm[index].w;
        err += (f.x*f.x + f.y*f.y + f.z*f.z)*invMass;
@@ -105,7 +103,7 @@ __kernel void selectLangevinStepSize(int numAtoms, float maxStepSize, float erro
    if (get_global_id(0) == 0) {
        // Select the new step size.

-        float totalError = sqrt(error[0]/(numAtoms*3));
+        float totalError = sqrt(error[0]/(NUM_ATOMS*3));
        float newStepSize = sqrt(errorTol/totalError);
        float oldStepSize = dt[0].y;
        if (oldStepSize > 0.0f)

--- a/platforms/opencl/tests/TestOpenCLAndersenThermostat.cpp
+++ b/platforms/opencl/tests/TestOpenCLAndersenThermostat.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2008-2009 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+/**
+ * This tests the OpenCL implementation of AndersenThermostat.
+ */
+
+#include "../../../tests/AssertionUtilities.h"
+#include "openmm/AndersenThermostat.h"
+#include "openmm/Context.h"
+#include "OpenCLPlatform.h"
+#include "openmm/NonbondedForce.h"
+#include "openmm/System.h"
+#include "openmm/VerletIntegrator.h"
+#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
+#include "../src/sfmt/SFMT.h"
+#include <iostream>
+#include <vector>
+
+using namespace OpenMM;
+using namespace std;
+
+void testTemperature() {
+    const int numParticles = 8;
+    const double temp = 100.0;
+    const double collisionFreq = 10.0;
+    OpenCLPlatform platform;
+    System system;
+    VerletIntegrator integrator(0.01);
+    NonbondedForce* forceField = new NonbondedForce();
+    for (int i = 0; i < numParticles; ++i) {
+        system.addParticle(2.0);
+        forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
+    }
+    system.addForce(forceField);
+    AndersenThermostat* thermstat = new AndersenThermostat(temp, collisionFreq);
+    system.addForce(thermstat);
+    Context context(system, integrator, platform);
+    vector<Vec3> positions(numParticles);
+    for (int i = 0; i < numParticles; ++i)
+        positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
+    context.setPositions(positions);
+
+    // Let it equilibrate.
+
+    integrator.step(10000);
+
+    // Now run it for a while and see if the temperature is correct.
+
+    double ke = 0.0;
+    for (int i = 0; i < 1000; ++i) {
+        State state = context.getState(State::Energy);
+        ke += state.getKineticEnergy();
+        integrator.step(1);
+    }
+    ke /= 1000;
+    double expected = 0.5*numParticles*3*BOLTZ*temp;
+    ASSERT_EQUAL_TOL(expected, ke, 3*expected/std::sqrt(1000.0));
+}
+
+void testRandomSeed() {
+    const int numParticles = 8;
+    const double temp = 100.0;
+    const double collisionFreq = 10.0;
+    OpenCLPlatform platform;
+    System system;
+    VerletIntegrator integrator(0.01);
+    NonbondedForce* forceField = new NonbondedForce();
+    for (int i = 0; i < numParticles; ++i) {
+        system.addParticle(2.0);
+        forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
+    }
+    system.addForce(forceField);
+    AndersenThermostat* thermostat = new AndersenThermostat(temp, collisionFreq);
+    system.addForce(thermostat);
+    vector<Vec3> positions(numParticles);
+    vector<Vec3> velocities(numParticles);
+    for (int i = 0; i < numParticles; ++i) {
+        positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
+        velocities[i] = Vec3(0, 0, 0);
+    }
+
+    // Try twice with the same random seed.
+
+    thermostat->setRandomNumberSeed(5);
+    Context context(system, integrator, platform);
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+    integrator.step(10);
+    State state1 = context.getState(State::Positions);
+    context.reinitialize();
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+    integrator.step(10);
+    State state2 = context.getState(State::Positions);
+
+    // Try twice with a different random seed.
+
+    thermostat->setRandomNumberSeed(10);
+    context.reinitialize();
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+    integrator.step(10);
+    State state3 = context.getState(State::Positions);
+    context.reinitialize();
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+    integrator.step(10);
+    State state4 = context.getState(State::Positions);
+
+    // Compare the results.
+
+    for (int i = 0; i < numParticles; i++) {
+        for (int j = 0; j < 3; j++) {
+            ASSERT(state1.getPositions()[i][j] == state2.getPositions()[i][j]);
+            ASSERT(state3.getPositions()[i][j] == state4.getPositions()[i][j]);
+            ASSERT(state1.getPositions()[i][j] != state3.getPositions()[i][j]);
+        }
+    }
+}
+
+int main() {
+    try {
+        testTemperature();
+        testRandomSeed();
+    }
+    catch(const exception& e) {
+        cout << "exception: " << e.what() << endl;
+        return 1;
+    }
+    cout << "Done" << endl;
+    return 0;
+}
+
--- a/platforms/opencl/tests/TestOpenCLBrownianIntegrator.cpp
+++ b/platforms/opencl/tests/TestOpenCLBrownianIntegrator.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2008-2009 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/System.h"
+
+
+/**
+ * This tests the OpenCL implementation of BrownianIntegrator.
+ */
+
+#include "../../../tests/AssertionUtilities.h"
+#include "openmm/Context.h"
+#include "OpenCLPlatform.h"
+#include "openmm/HarmonicBondForce.h"
+#include "openmm/NonbondedForce.h"
+#include "openmm/System.h"
+#include "openmm/BrownianIntegrator.h"
+#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
+#include "../src/sfmt/SFMT.h"
+#include <iostream>
+#include <vector>
+
+using namespace OpenMM;
+using namespace std;
+
+const double TOL = 1e-5;
+
+void testSingleBond() {
+    OpenCLPlatform platform;
+    System system;
+    system.addParticle(2.0);
+    system.addParticle(2.0);
+    double dt = 0.01;
+    BrownianIntegrator integrator(0, 0.1, dt);
+    HarmonicBondForce* forceField = new HarmonicBondForce();
+    forceField->addBond(0, 1, 1.5, 1);
+    system.addForce(forceField);
+    Context context(system, integrator, platform);
+    vector<Vec3> positions(2);
+    positions[0] = Vec3(-1, 0, 0);
+    positions[1] = Vec3(1, 0, 0);
+    context.setPositions(positions);
+
+    // This is simply an overdamped harmonic oscillator, so compare it to the analytical solution.
+
+    double rate = 2*1.0/0.1;
+    for (int i = 0; i < 1000; ++i) {
+        State state = context.getState(State::Positions | State::Velocities);
+        double time = state.getTime();
+        double expectedDist = 1.5+0.5*std::exp(-rate*time);
+        ASSERT_EQUAL_VEC(Vec3(-0.5*expectedDist, 0, 0), state.getPositions()[0], 0.02);
+        ASSERT_EQUAL_VEC(Vec3(0.5*expectedDist, 0, 0), state.getPositions()[1], 0.02);
+        if (i > 0) {
+            double expectedSpeed = -0.5*rate*std::exp(-rate*(time-0.5*dt));
+            ASSERT_EQUAL_VEC(Vec3(-0.5*expectedSpeed, 0, 0), state.getVelocities()[0], 0.11);
+            ASSERT_EQUAL_VEC(Vec3(0.5*expectedSpeed, 0, 0), state.getVelocities()[1], 0.11);
+        }
+        integrator.step(1);
+    }
+}
+
+void testTemperature() {
+    const int numParticles = 8;
+    const int numBonds = numParticles-1;
+    const double temp = 10.0;
+    OpenCLPlatform platform;
+    System system;
+    BrownianIntegrator integrator(temp, 2.0, 0.01);
+    HarmonicBondForce* forceField = new HarmonicBondForce();
+    for (int i = 0; i < numParticles; ++i)
+        system.addParticle(2.0);
+    for (int i = 0; i < numBonds; ++i)
+        forceField->addBond(i, i+1, 1.0, 5.0);
+    system.addForce(forceField);
+    Context context(system, integrator, platform);
+    vector<Vec3> positions(numParticles);
+    for (int i = 0; i < numParticles; ++i)
+        positions[i] = Vec3(i, 0, 0);
+    context.setPositions(positions);
+
+    // Let it equilibrate.
+
+    integrator.step(10000);
+
+    // Now run it for a while and see if the temperature is correct.
+
+    double pe = 0.0;
+    const int steps = 50000;
+    for (int i = 0; i < steps; ++i) {
+        State state = context.getState(State::Energy);
+        pe += state.getPotentialEnergy();
+        integrator.step(1);
+    }
+    pe /= steps;
+    double expected = 0.5*numBonds*BOLTZ*temp;
+    ASSERT_EQUAL_TOL(expected, pe, 0.1*expected);
+}
+
+void testConstraints() {
+    const int numParticles = 8;
+    const int numConstraints = 5;
+    const double temp = 20.0;
+    OpenCLPlatform platform;
+    System system;
+    BrownianIntegrator integrator(temp, 2.0, 0.001);
+    integrator.setConstraintTolerance(1e-5);
+    NonbondedForce* forceField = new NonbondedForce();
+    for (int i = 0; i < numParticles; ++i) {
+        system.addParticle(10.0);
+        forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
+    }
+    system.addConstraint(0, 1, 1.0);
+    system.addConstraint(1, 2, 1.0);
+    system.addConstraint(2, 3, 1.0);
+    system.addConstraint(4, 5, 1.0);
+    system.addConstraint(6, 7, 1.0);
+    system.addForce(forceField);
+    Context context(system, integrator, platform);
+    vector<Vec3> positions(numParticles);
+    vector<Vec3> velocities(numParticles);
+    init_gen_rand(0);
+    for (int i = 0; i < numParticles; ++i) {
+        positions[i] = Vec3(i, 0, 0);
+        velocities[i] = Vec3(genrand_real2()-0.5, genrand_real2()-0.5, genrand_real2()-0.5);
+    }
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+
+    // Simulate it and see whether the constraints remain satisfied.
+
+    for (int i = 0; i < 1000; ++i) {
+        State state = context.getState(State::Positions);
+        for (int j = 0; j < numConstraints; ++j) {
+            int particle1, particle2;
+            double distance;
+            system.getConstraintParameters(j, particle1, particle2, distance);
+            Vec3 p1 = state.getPositions()[particle1];
+            Vec3 p2 = state.getPositions()[particle2];
+            double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
+            ASSERT_EQUAL_TOL(distance, dist, 1e-4);
+        }
+        integrator.step(1);
+    }
+}
+
+void testRandomSeed() {
+    const int numParticles = 8;
+    const double temp = 100.0;
+    const double collisionFreq = 10.0;
+    OpenCLPlatform platform;
+    System system;
+    BrownianIntegrator integrator(temp, 2.0, 0.001);
+    NonbondedForce* forceField = new NonbondedForce();
+    for (int i = 0; i < numParticles; ++i) {
+        system.addParticle(2.0);
+        forceField->addParticle((i%2 == 0 ? 1.0 : -1.0), 1.0, 5.0);
+    }
+    system.addForce(forceField);
+    vector<Vec3> positions(numParticles);
+    vector<Vec3> velocities(numParticles);
+    for (int i = 0; i < numParticles; ++i) {
+        positions[i] = Vec3((i%2 == 0 ? 2 : -2), (i%4 < 2 ? 2 : -2), (i < 4 ? 2 : -2));
+        velocities[i] = Vec3(0, 0, 0);
+    }
+
+    // Try twice with the same random seed.
+
+    integrator.setRandomNumberSeed(5);
+    Context context(system, integrator, platform);
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+    integrator.step(10);
+    State state1 = context.getState(State::Positions);
+    context.reinitialize();
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+    integrator.step(10);
+    State state2 = context.getState(State::Positions);
+
+    // Try twice with a different random seed.
+
+    integrator.setRandomNumberSeed(10);
+    context.reinitialize();
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+    integrator.step(10);
+    State state3 = context.getState(State::Positions);
+    context.reinitialize();
+    context.setPositions(positions);
+    context.setVelocities(velocities);
+    integrator.step(10);
+    State state4 = context.getState(State::Positions);
+
+    // Compare the results.
+
+    for (int i = 0; i < numParticles; i++) {
+        for (int j = 0; j < 3; j++) {
+            ASSERT(state1.getPositions()[i][j] == state2.getPositions()[i][j]);
+            ASSERT(state3.getPositions()[i][j] == state4.getPositions()[i][j]);
+            ASSERT(state1.getPositions()[i][j] != state3.getPositions()[i][j]);
+        }
+    }
+}
+
+int main() {
+    try {
+        testSingleBond();
+        testTemperature();
+//        testConstraints();
+        testRandomSeed();
+    }
+    catch(const exception& e) {
+        cout << "exception: " << e.what() << endl;
+        return 1;
+    }
+    cout << "Done" << endl;
+    return 0;
+}
+
--- a/platforms/opencl/tests/TestOpenCLRandom.cpp
+++ b/platforms/opencl/tests/TestOpenCLRandom.cpp
@@ -50,9 +50,9 @@ void testGaussian() {
        system.addParticle(1.0);
    OpenCLContext context(numAtoms, -1);
    context.initialize(system);
-    context.getIntegrationUtilties().initRandomNumberGenerator(0);
-    OpenCLArray<mm_float4>& random = context.getIntegrationUtilties().getRandom();
-    context.getIntegrationUtilties().prepareRandomNumbers(random.getSize());
+    context.getIntegrationUtilities().initRandomNumberGenerator(0);
+    OpenCLArray<mm_float4>& random = context.getIntegrationUtilities().getRandom();
+    context.getIntegrationUtilities().prepareRandomNumbers(random.getSize());
    const int numValues = random.getSize()*4;
    vector<mm_float4> values(numValues);
    random.download(values);