CUDA and OpenCL implementation of hard wall constraints for DrudeLangevinIntegrator

469cdcaf · Peter Eastman · cecc774a · 469cdcaf · 469cdcaf · 469cdcaf
Commit 469cdcaf authored Oct 05, 2015 by Peter Eastman
8 changed files
--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Portions copyright (c) 2013-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -279,6 +279,7 @@ void CudaIntegrateDrudeLangevinStepKernel::initialize(const System& system, cons
    CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaDrudeKernelSources::drudeLangevin, defines, "");
    kernel1 = cu.getKernel(module, "integrateDrudeLangevinPart1");
    kernel2 = cu.getKernel(module, "integrateDrudeLangevinPart2");
+    hardwallKernel = cu.getKernel(module, "applyHardWallConstraints");
    prevStepSize = -1.0;
 }
@@ -296,6 +297,8 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
    double vscaleDrude = exp(-stepSize*integrator.getDrudeFriction());
    double fscaleDrude = (1-vscaleDrude)/integrator.getDrudeFriction()/(double) 0x100000000;
    double noisescaleDrude = sqrt(2*BOLTZ*integrator.getDrudeTemperature()*integrator.getDrudeFriction())*sqrt(0.5*(1-vscaleDrude*vscaleDrude)/integrator.getDrudeFriction());
+    double maxDrudeDistance = integrator.getMaxDrudeDistance();
+    double hardwallscaleDrude = sqrt(BOLTZ*integrator.getDrudeTemperature());
    if (stepSize != prevStepSize) {
        if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
            double2 ss = make_double2(0, stepSize);
@@ -316,7 +319,9 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
    float vscaleDrudeFloat = (float) vscaleDrude;
    float fscaleDrudeFloat = (float) fscaleDrude;
    float noisescaleDrudeFloat = (float) noisescaleDrude;
-    void *vscalePtr, *fscalePtr, *noisescalePtr, *vscaleDrudePtr, *fscaleDrudePtr, *noisescaleDrudePtr;
+    float maxDrudeDistanceFloat =(float) maxDrudeDistance;
+    float hardwallscaleDrudeFloat = (float) hardwallscaleDrude;
+    void *vscalePtr, *fscalePtr, *noisescalePtr, *vscaleDrudePtr, *fscaleDrudePtr, *noisescaleDrudePtr, *maxDrudeDistancePtr, *hardwallscaleDrudePtr;
    if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
        vscalePtr = &vscale;
        fscalePtr = &fscale;
@@ -324,6 +329,8 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
        vscaleDrudePtr = &vscaleDrude;
        fscaleDrudePtr = &fscaleDrude;
        noisescaleDrudePtr = &noisescaleDrude;
+        maxDrudeDistancePtr = &maxDrudeDistance;
+        hardwallscaleDrudePtr = &hardwallscaleDrude;
    }
    else {
        vscalePtr = &vscaleFloat;
@@ -332,6 +339,8 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
        vscaleDrudePtr = &vscaleDrudeFloat;
        fscaleDrudePtr = &fscaleDrudeFloat;
        noisescaleDrudePtr = &noisescaleDrudeFloat;
+        maxDrudeDistancePtr = &maxDrudeDistanceFloat;
+        hardwallscaleDrudePtr = &hardwallscaleDrudeFloat;
    }
    // Call the first integration kernel.
@@ -352,6 +361,12 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
    void* args2[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &integration.getPosDelta().getDevicePointer(),
            &cu.getVelm().getDevicePointer(), &integration.getStepSize().getDevicePointer()};
    cu.executeKernel(kernel2, args2, numAtoms);
+    // Apply hard wall constraints.
+    void* hardwallArgs[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &cu.getVelm().getDevicePointer(),
+            &pairParticles->getDevicePointer(), &integration.getStepSize().getDevicePointer(), maxDrudeDistancePtr, hardwallscaleDrudePtr};
+    cu.executeKernel(hardwallKernel, hardwallArgs, pairParticles->getSize());
    integration.computeVirtualSites();
    // Update the time and step count.

--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Portions copyright (c) 2013-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -113,7 +113,7 @@ private:
    double prevStepSize;
    CudaArray* normalParticles;
    CudaArray* pairParticles;
-    CUfunction kernel1, kernel2;
+    CUfunction kernel1, kernel2, hardwallKernel;
 };
 /**

--- a/plugins/drude/platforms/cuda/src/kernels/drudeLangevin.cu
+++ b/plugins/drude/platforms/cuda/src/kernels/drudeLangevin.cu
@@ -101,3 +101,111 @@ extern "C" __global__ void integrateDrudeLangevinPart2(real4* __restrict__ posq,
        index += blockDim.x*gridDim.x;
    }
 }
+/**
+ * Apply hard wall constraints
+ */
+extern "C" __global__ void applyHardWallConstraints(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ velm,
+        const int2* __restrict__ pairParticles, const mixed2* __restrict__ dt, mixed maxDrudeDistance, mixed hardwallscaleDrude) {
+    mixed stepSize = dt[0].y;
+    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_PAIRS; i += blockDim.x*gridDim.x) {
+        int2 particles = pairParticles[i];
+#ifdef USE_MIXED_PRECISION
+        real4 posReal1 = posq[particles.x];
+        real4 posReal2 = posq[particles.y];
+        real4 posCorr1 = posqCorrection[particles.x];
+        real4 posCorr2 = posqCorrection[particles.y];
+        mixed4 pos1 = make_mixed4(posReal1.x+(mixed)posCorr1.x, posReal1.y+(mixed)posCorr1.y, posReal1.z+(mixed)posCorr1.z, posReal1.w);
+        mixed4 pos2 = make_mixed4(posReal2.x+(mixed)posCorr2.x, posReal2.y+(mixed)posCorr2.y, posReal2.z+(mixed)posCorr2.z, posReal2.w);
+#else
+        mixed4 pos1 = posq[particles.x];
+        mixed4 pos2 = posq[particles.y];
+#endif
+        mixed4 delta = pos1-pos2;
+        mixed r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
+        mixed rInv = RECIP(r);
+        if (rInv*maxDrudeDistance < 1) {
+            // The constraint has been violated, so make the inter-particle distance "bounce"
+            // off the hard wall.
+            mixed4 bondDir = delta*rInv;
+            mixed4 vel1 = velm[particles.x];
+            mixed4 vel2 = velm[particles.y];
+            mixed mass1 = RECIP(vel1.w);
+            mixed mass2 = RECIP(vel2.w);
+            mixed deltaR = r-maxDrudeDistance;
+            mixed deltaT = stepSize;
+            mixed dotvr1 = vel1.x*bondDir.x + vel1.y*bondDir.y + vel1.z*bondDir.z;
+            mixed4 vb1 = bondDir*dotvr1;
+            mixed4 vp1 = vel1-vb1;
+            if (vel2.w == 0) {
+                // The parent particle is massless, so move only the Drude particle.
+                if (dotvr1 != 0)
+                    deltaT = deltaR/fabs(dotvr1);
+                if (deltaT > stepSize)
+                    deltaT = stepSize;
+                dotvr1 = -dotvr1*hardwallscaleDrude/(fabs(dotvr1)*SQRT(mass1));
+                mixed dr = -deltaR + deltaT*dotvr1;
+                pos1.x += bondDir.x*dr;
+                pos1.y += bondDir.y*dr;
+                pos1.z += bondDir.z*dr;
+#ifdef USE_MIXED_PRECISION
+                posq[particles.x] = make_real4((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
+                posqCorrection[particles.x] = make_real4(pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
+#else
+                posq[particles.x] = pos1;
+#endif
+                vel1.x = vp1.x + bondDir.x*dotvr1;
+                vel1.y = vp1.y + bondDir.y*dotvr1;
+                vel1.z = vp1.z + bondDir.z*dotvr1;
+                velm[particles.x] = vel1;
+            }
+            else {
+                // Move both particles.
+                mixed invTotalMass = RECIP(mass1+mass2);
+                mixed dotvr2 = vel2.x*bondDir.x + vel2.y*bondDir.y + vel2.z*bondDir.z;
+                mixed4 vb2 = bondDir*dotvr2;
+                mixed4 vp2 = vel2-vb2;
+                mixed vbCMass = (mass1*dotvr1 + mass2*dotvr2)*invTotalMass;
+                dotvr1 -= vbCMass;
+                dotvr2 -= vbCMass;
+                if (dotvr1 != dotvr2)
+                    deltaT = deltaR/fabs(dotvr1-dotvr2);
+                if (deltaT > stepSize)
+                    deltaT = stepSize;
+                mixed vBond = hardwallscaleDrude/SQRT(mass1);
+                dotvr1 = -dotvr1*vBond*mass2*invTotalMass/fabs(dotvr1);
+                dotvr2 = -dotvr2*vBond*mass1*invTotalMass/fabs(dotvr2);
+                mixed dr1 = -deltaR*mass2*invTotalMass + deltaT*dotvr1;
+                mixed dr2 = deltaR*mass1*invTotalMass + deltaT*dotvr2;
+                dotvr1 += vbCMass;
+                dotvr2 += vbCMass;
+                pos1.x += bondDir.x*dr1;
+                pos1.y += bondDir.y*dr1;
+                pos1.z += bondDir.z*dr1;
+                pos2.x += bondDir.x*dr2;
+                pos2.y += bondDir.y*dr2;
+                pos2.z += bondDir.z*dr2;
+#ifdef USE_MIXED_PRECISION
+                posq[particles.x] = make_real4((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
+                posq[particles.y] = make_real4((real) pos2.x, (real) pos2.y, (real) pos2.z, (real) pos2.w);
+                posqCorrection[particles.x] = make_real4(pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
+                posqCorrection[particles.y] = make_real4(pos2.x-(real) pos2.x, pos2.y-(real) pos2.y, pos2.z-(real) pos2.z, 0);
+#else
+                posq[particles.x] = pos1;
+                posq[particles.y] = pos2;
+#endif
+                vel1.x = vp1.x + bondDir.x*dotvr1;
+                vel1.y = vp1.y + bondDir.y*dotvr1;
+                vel1.z = vp1.z + bondDir.z*dotvr1;
+                vel2.x = vp2.x + bondDir.x*dotvr2;
+                vel2.y = vp2.y + bondDir.y*dotvr2;
+                vel2.z = vp2.z + bondDir.z*dotvr2;
+                velm[particles.x] = vel1;
+                velm[particles.y] = vel2;
+            }
+        }
+    }
+}
--- a/plugins/drude/platforms/cuda/tests/TestCudaDrudeLangevinIntegrator.cpp
+++ b/plugins/drude/platforms/cuda/tests/TestCudaDrudeLangevinIntegrator.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Portions copyright (c) 2013-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -60,6 +60,7 @@ void testSinglePair() {
    const double mass2 = 0.1;
    const double totalMass = mass1+mass2;
    const double reducedMass = (mass1*mass2)/(mass1+mass2);
+    const double maxDistance = 0.05;
    System system;
    system.addParticle(mass1);
    system.addParticle(mass2);
@@ -70,6 +71,7 @@ void testSinglePair() {
    positions[0] = Vec3(0, 0, 0);
    positions[1] = Vec3(0, 0, 0);
    DrudeLangevinIntegrator integ(temperature, 20.0, temperatureDrude, 20.0, 0.003);
+    integ.setMaxDrudeDistance(maxDistance);
    Platform& platform = Platform::getPlatformByName("CUDA");
    Context context(system, integ, platform);
    context.setPositions(positions);
@@ -84,12 +86,15 @@ void testSinglePair() {
    int numSteps = 10000;
    for (int i = 0; i < numSteps; i++) {
        integ.step(10);
-        State state = context.getState(State::Velocities);
+        State state = context.getState(State::Velocities | State::Positions);
        const vector<Vec3>& vel = state.getVelocities();
        Vec3 velCM = vel[0]*(mass1/totalMass) + vel[1]*(mass2/totalMass);
        keCM += 0.5*totalMass*velCM.dot(velCM);
        Vec3 velInternal = vel[0]-vel[1];
        keInternal += 0.5*reducedMass*velInternal.dot(velInternal);
+        Vec3 delta = state.getPositions()[0]-state.getPositions()[1];
+        double distance = sqrt(delta.dot(delta));
+        ASSERT(distance <= maxDistance*(1+1e-6));
    }
    ASSERT_USUALLY_EQUAL_TOL(3*0.5*BOLTZ*temperature, keCM/numSteps, 0.1);
    ASSERT_USUALLY_EQUAL_TOL(3*0.5*BOLTZ*temperatureDrude, keInternal/numSteps, 0.01);

--- a/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.cpp
+++ b/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Portions copyright (c) 2013-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -284,6 +284,7 @@ void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, co
    cl::Program program = cl.createProgram(OpenCLDrudeKernelSources::drudeLangevin, defines, "");
    kernel1 = cl::Kernel(program, "integrateDrudeLangevinPart1");
    kernel2 = cl::Kernel(program, "integrateDrudeLangevinPart2");
+    hardwallKernel = cl::Kernel(program, "applyHardWallConstraints");
    prevStepSize = -1.0;
 }
@@ -307,6 +308,14 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
        kernel2.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
        kernel2.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
        kernel2.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
+        hardwallKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
+        if (cl.getUseMixedPrecision())
+            hardwallKernel.setArg<cl::Buffer>(1, cl.getPosqCorrection().getDeviceBuffer());
+        else
+            hardwallKernel.setArg<void*>(1, NULL);
+        hardwallKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
+        hardwallKernel.setArg<cl::Buffer>(3, pairParticles->getDeviceBuffer());
+        hardwallKernel.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
    }
    // Compute integrator coefficients.
@@ -318,6 +327,8 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
    double vscaleDrude = exp(-stepSize*integrator.getDrudeFriction());
    double fscaleDrude = (1-vscaleDrude)/integrator.getDrudeFriction();
    double noisescaleDrude = sqrt(2*BOLTZ*integrator.getDrudeTemperature()*integrator.getDrudeFriction())*sqrt(0.5*(1-vscaleDrude*vscaleDrude)/integrator.getDrudeFriction());
+    double maxDrudeDistance = integrator.getMaxDrudeDistance();
+    double hardwallscaleDrude = sqrt(BOLTZ*integrator.getDrudeTemperature());
    if (stepSize != prevStepSize) {
        if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
            mm_double2 ss = mm_double2(0, stepSize);
@@ -336,6 +347,8 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
            kernel1.setArg<cl_double>(9, vscaleDrude);
            kernel1.setArg<cl_double>(10, fscaleDrude);
            kernel1.setArg<cl_double>(11, noisescaleDrude);
+            hardwallKernel.setArg<cl_double>(5, maxDrudeDistance);
+            hardwallKernel.setArg<cl_double>(6, hardwallscaleDrude);
    }
    else {
            kernel1.setArg<cl_float>(6, (cl_float) vscale);
@@ -344,6 +357,8 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
            kernel1.setArg<cl_float>(9, (cl_float) vscaleDrude);
            kernel1.setArg<cl_float>(10, (cl_float) fscaleDrude);
            kernel1.setArg<cl_float>(11, (cl_float) noisescaleDrude);
+            hardwallKernel.setArg<cl_float>(5, (cl_float) maxDrudeDistance);
+            hardwallKernel.setArg<cl_float>(6, (cl_float) hardwallscaleDrude);
    }
    // Call the first integration kernel.
@@ -358,6 +373,10 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
    // Call the second integration kernel.
    cl.executeKernel(kernel2, numAtoms);
+    // Apply hard wall constraints.
+    cl.executeKernel(hardwallKernel, pairParticles->getSize());
    integration.computeVirtualSites();
    // Update the time and step count.

--- a/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
+++ b/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Portions copyright (c) 2013-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -114,7 +114,7 @@ private:
    double prevStepSize;
    OpenCLArray* normalParticles;
    OpenCLArray* pairParticles;
-    cl::Kernel kernel1, kernel2;
+    cl::Kernel kernel1, kernel2, hardwallKernel;
 };
 /**

--- a/plugins/drude/platforms/opencl/src/kernels/drudeLangevin.cl
+++ b/plugins/drude/platforms/opencl/src/kernels/drudeLangevin.cl
@@ -101,3 +101,99 @@ __kernel void integrateDrudeLangevinPart2(__global real4* restrict posq, __globa
        index += get_global_size(0);
    }
 }
+/**
+ * Apply hard wall constraints
+ */
+__kernel void applyHardWallConstraints(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm,
+        __global const int2* restrict pairParticles, __global const mixed2* restrict dt, mixed maxDrudeDistance, mixed hardwallscaleDrude) {
+    mixed stepSize = dt[0].y;
+    for (int i = get_global_id(0); i < NUM_PAIRS; i += get_global_size(0)) {
+        int2 particles = pairParticles[i];
+#ifdef USE_MIXED_PRECISION
+        real4 posReal1 = posq[particles.x];
+        real4 posReal2 = posq[particles.y];
+        real4 posCorr1 = posqCorrection[particles.x];
+        real4 posCorr2 = posqCorrection[particles.y];
+        mixed4 pos1 = (mixed4) (posReal1.x+(mixed)posCorr1.x, posReal1.y+(mixed)posCorr1.y, posReal1.z+(mixed)posCorr1.z, posReal1.w);
+        mixed4 pos2 = (mixed4) (posReal2.x+(mixed)posCorr2.x, posReal2.y+(mixed)posCorr2.y, posReal2.z+(mixed)posCorr2.z, posReal2.w);
+#else
+        mixed4 pos1 = posq[particles.x];
+        mixed4 pos2 = posq[particles.y];
+#endif
+        mixed4 delta = pos1-pos2;
+        mixed r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
+        mixed rInv = 1/r;
+        if (rInv*maxDrudeDistance < 1) {
+            // The constraint has been violated, so make the inter-particle distance "bounce"
+            // off the hard wall.
+            mixed4 bondDir = delta*rInv;
+            mixed4 vel1 = velm[particles.x];
+            mixed4 vel2 = velm[particles.y];
+            mixed mass1 = 1/vel1.w;
+            mixed mass2 = 1/vel2.w;
+            mixed deltaR = r-maxDrudeDistance;
+            mixed deltaT = stepSize;
+            mixed dotvr1 = vel1.x*bondDir.x + vel1.y*bondDir.y + vel1.z*bondDir.z;
+            mixed4 vb1 = bondDir*dotvr1;
+            mixed4 vp1 = vel1-vb1;
+            if (vel2.w == 0) {
+                // The parent particle is massless, so move only the Drude particle.
+                if (dotvr1 != 0)
+                    deltaT = deltaR/fabs(dotvr1);
+                if (deltaT > stepSize)
+                    deltaT = stepSize;
+                dotvr1 = -dotvr1*hardwallscaleDrude/(fabs(dotvr1)*sqrt(mass1));
+                mixed dr = -deltaR + deltaT*dotvr1;
+                pos1.xyz += bondDir.xyz*dr;
+#ifdef USE_MIXED_PRECISION
+                posq[particles.x] = (real4) ((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
+                posqCorrection[particles.x] = (real4) (pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
+#else
+                posq[particles.x] = pos1;
+#endif
+                vel1.xyz = vp1.xyz + bondDir.xyz*dotvr1;
+                velm[particles.x] = vel1;
+            }
+            else {
+                // Move both particles.
+                mixed invTotalMass = 1/(mass1+mass2);
+                mixed dotvr2 = vel2.x*bondDir.x + vel2.y*bondDir.y + vel2.z*bondDir.z;
+                mixed4 vb2 = bondDir*dotvr2;
+                mixed4 vp2 = vel2-vb2;
+                mixed vbCMass = (mass1*dotvr1 + mass2*dotvr2)*invTotalMass;
+                dotvr1 -= vbCMass;
+                dotvr2 -= vbCMass;
+                if (dotvr1 != dotvr2)
+                    deltaT = deltaR/fabs(dotvr1-dotvr2);
+                if (deltaT > stepSize)
+                    deltaT = stepSize;
+                mixed vBond = hardwallscaleDrude/sqrt(mass1);
+                dotvr1 = -dotvr1*vBond*mass2*invTotalMass/fabs(dotvr1);
+                dotvr2 = -dotvr2*vBond*mass1*invTotalMass/fabs(dotvr2);
+                mixed dr1 = -deltaR*mass2*invTotalMass + deltaT*dotvr1;
+                mixed dr2 = deltaR*mass1*invTotalMass + deltaT*dotvr2;
+                dotvr1 += vbCMass;
+                dotvr2 += vbCMass;
+                pos1.xyz += bondDir.xyz*dr1;
+                pos2.xyz += bondDir.xyz*dr2;
+#ifdef USE_MIXED_PRECISION
+                posq[particles.x] = (real4) ((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
+                posq[particles.y] = (real4) ((real) pos2.x, (real) pos2.y, (real) pos2.z, (real) pos2.w);
+                posqCorrection[particles.x] = (real4) (pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
+                posqCorrection[particles.y] = (real4) (pos2.x-(real) pos2.x, pos2.y-(real) pos2.y, pos2.z-(real) pos2.z, 0);
+#else
+                posq[particles.x] = pos1;
+                posq[particles.y] = pos2;
+#endif
+                vel1.xyz = vp1.xyz + bondDir.xyz*dotvr1;
+                vel2.xyz = vp2.xyz + bondDir.xyz*dotvr2;
+                velm[particles.x] = vel1;
+                velm[particles.y] = vel2;
+            }
+        }
+    }
+}
--- a/plugins/drude/platforms/opencl/tests/TestOpenCLDrudeLangevinIntegrator.cpp
+++ b/plugins/drude/platforms/opencl/tests/TestOpenCLDrudeLangevinIntegrator.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Portions copyright (c) 2013-2015 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -60,6 +60,7 @@ void testSinglePair() {
    const double mass2 = 0.1;
    const double totalMass = mass1+mass2;
    const double reducedMass = (mass1*mass2)/(mass1+mass2);
+    const double maxDistance = 0.05;
    System system;
    system.addParticle(mass1);
    system.addParticle(mass2);
@@ -70,6 +71,7 @@ void testSinglePair() {
    positions[0] = Vec3(0, 0, 0);
    positions[1] = Vec3(0, 0, 0);
    DrudeLangevinIntegrator integ(temperature, 20.0, temperatureDrude, 20.0, 0.003);
+    integ.setMaxDrudeDistance(maxDistance);
    Platform& platform = Platform::getPlatformByName("OpenCL");
    Context context(system, integ, platform);
    context.setPositions(positions);
@@ -84,12 +86,15 @@ void testSinglePair() {
    int numSteps = 10000;
    for (int i = 0; i < numSteps; i++) {
        integ.step(10);
-        State state = context.getState(State::Velocities);
+        State state = context.getState(State::Velocities | State::Positions);
        const vector<Vec3>& vel = state.getVelocities();
        Vec3 velCM = vel[0]*(mass1/totalMass) + vel[1]*(mass2/totalMass);
        keCM += 0.5*totalMass*velCM.dot(velCM);
        Vec3 velInternal = vel[0]-vel[1];
        keInternal += 0.5*reducedMass*velInternal.dot(velInternal);
+        Vec3 delta = state.getPositions()[0]-state.getPositions()[1];
+        double distance = sqrt(delta.dot(delta));
+        ASSERT(distance <= maxDistance*(1+1e-6));
    }
    ASSERT_USUALLY_EQUAL_TOL(3*0.5*BOLTZ*temperature, keCM/numSteps, 0.1);
    ASSERT_USUALLY_EQUAL_TOL(3*0.5*BOLTZ*temperatureDrude, keInternal/numSteps, 0.01);