Merge https://github.com/openmm/openmm

5a06df78 · tic20 · 8dd60914 · a9223eea · 8dd60914 · 8dd60914
Commit 5a06df78 authored Mar 04, 2020 by tic20
20 changed files
--- a/platforms/opencl/src/kernels/rmsd.cl
+++ b/platforms/opencl/src/kernels/rmsd.cl
-// This file contains kernels to compute the RMSD and its gradient using the algorithm described
-// in Coutsias et al, "Using quaternions to calculate RMSD" (doi: 10.1002/jcc.20110).
-/**
- * Sum a value over all threads.
- */
-real reduceValue(real value, __local volatile real* temp) {
-    const int thread = get_local_id(0);
-    barrier(CLK_LOCAL_MEM_FENCE);
-    temp[thread] = value;
-    barrier(CLK_LOCAL_MEM_FENCE);
-    for (uint step = 1; step < 32; step *= 2) {
-        if (thread+step < get_local_size(0) && thread%(2*step) == 0)
-            temp[thread] = temp[thread] + temp[thread+step];
-        SYNC_WARPS;
-    }
-    for (uint step = 32; step < get_local_size(0); step *= 2) {
-        if (thread+step < get_local_size(0) && thread%(2*step) == 0)
-            temp[thread] = temp[thread] + temp[thread+step];
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-    return temp[0];
-}
-/**
- * Perform the first step of computing the RMSD.  This is executed as a single work group.
- */
-__kernel void computeRMSDPart1(int numParticles, __global const real4* restrict posq, __global const real4* restrict referencePos,
-        __global const int* restrict particles, __global real* buffer, __local volatile real* restrict temp) {
-    // Compute the center of the particle positions.
-    real3 center = (real3) 0;
-    for (int i = get_local_id(0); i < numParticles; i += get_local_size(0))
-        center += posq[particles[i]].xyz;
-    center.x = reduceValue(center.x, temp)/numParticles;
-    center.y = reduceValue(center.y, temp)/numParticles;
-    center.z = reduceValue(center.z, temp)/numParticles;
-    // Compute the correlation matrix.
-    real R[3][3] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
-    real sum = 0;
-    for (int i = get_local_id(0); i < numParticles; i += get_local_size(0)) {
-        int index = particles[i];
-        real3 pos = posq[index].xyz - center;
-        real3 refPos = referencePos[index].xyz;
-        R[0][0] += pos.x*refPos.x;
-        R[0][1] += pos.x*refPos.y;
-        R[0][2] += pos.x*refPos.z;
-        R[1][0] += pos.y*refPos.x;
-        R[1][1] += pos.y*refPos.y;
-        R[1][2] += pos.y*refPos.z;
-        R[2][0] += pos.z*refPos.x;
-        R[2][1] += pos.z*refPos.y;
-        R[2][2] += pos.z*refPos.z;
-        sum += dot(pos, pos);
-    }
-    for (int i = 0; i < 3; i++)
-        for (int j = 0; j < 3; j++)
-            R[i][j] = reduceValue(R[i][j], temp);
-    sum = reduceValue(sum, temp);
-    // Copy everything into the output buffer to send back to the host.
-    if (get_local_id(0) == 0) {
-        for (int i = 0; i < 3; i++)
-            for (int j = 0; j < 3; j++)
-                buffer[3*i+j] = R[i][j];
-        buffer[9] = sum;
-        buffer[10] = center.x;
-        buffer[11] = center.y;
-        buffer[12] = center.z;
-    }
-}
-/**
- * Apply forces based on the RMSD.
- */
-__kernel void computeRMSDForces(int numParticles, __global const real4* restrict posq, __global const real4* restrict referencePos,
-        __global const int* restrict particles, __global const real* buffer, __global real4* restrict forceBuffers) {
-    real3 center = (real3) (buffer[10], buffer[11], buffer[12]);
-    real scale = 1 / (real) (buffer[9]*numParticles);
-    for (int i = get_global_id(0); i < numParticles; i += get_global_size(0)) {
-        int index = particles[i];
-        real3 pos = posq[index].xyz - center;
-        real3 refPos = referencePos[index].xyz;
-        real3 rotatedRef = (real3) (buffer[0]*refPos.x + buffer[3]*refPos.y + buffer[6]*refPos.z,
-                                    buffer[1]*refPos.x + buffer[4]*refPos.y + buffer[7]*refPos.z,
-                                    buffer[2]*refPos.x + buffer[5]*refPos.y + buffer[8]*refPos.z);
-        forceBuffers[index].xyz -= (pos-rotatedRef)*scale;
-    }
-}
--- a/platforms/opencl/src/kernels/settle.cl
+++ b/platforms/opencl/src/kernels/settle.cl
-mixed4 loadPos(__global const real4* restrict posq, __global const real4* restrict posqCorrection, int index) {
-#ifdef USE_MIXED_PRECISION
-    real4 pos1 = posq[index];
-    real4 pos2 = posqCorrection[index];
-    return (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
-#else
-    return posq[index];
-#endif
-}
-/**
- * Enforce constraints on SETTLE clusters
- */
-__kernel void applySettle(int numClusters, mixed tol, __global const real4* restrict oldPos, __global const real4* restrict posCorrection, __global mixed4* restrict posDelta, __global const mixed4* restrict velm, __global const int4* restrict clusterAtoms, __global const float2* restrict clusterParams) {
-    int index = get_global_id(0);
-    while (index < numClusters) {
-        // Load the data for this cluster.
-        int4 atoms = clusterAtoms[index];
-        float2 params = clusterParams[index];
-        mixed4 apos0 = loadPos(oldPos, posCorrection, atoms.x);
-        mixed4 xp0 = posDelta[atoms.x];
-        mixed4 apos1 = loadPos(oldPos, posCorrection, atoms.y);
-        mixed4 xp1 = posDelta[atoms.y];
-        mixed4 apos2 = loadPos(oldPos, posCorrection, atoms.z);
-        mixed4 xp2 = posDelta[atoms.z];
-        mixed m0 = 1/velm[atoms.x].w;
-        mixed m1 = 1/velm[atoms.y].w;
-        mixed m2 = 1/velm[atoms.z].w;
-        // Apply the SETTLE algorithm.
-        mixed xb0 = apos1.x-apos0.x;
-        mixed yb0 = apos1.y-apos0.y;
-        mixed zb0 = apos1.z-apos0.z;
-        mixed xc0 = apos2.x-apos0.x;
-        mixed yc0 = apos2.y-apos0.y;
-        mixed zc0 = apos2.z-apos0.z;
-        mixed invTotalMass = 1.0f/(m0+m1+m2);
-        mixed xcom = (xp0.x*m0 + (xb0+xp1.x)*m1 + (xc0+xp2.x)*m2) * invTotalMass;
-        mixed ycom = (xp0.y*m0 + (yb0+xp1.y)*m1 + (yc0+xp2.y)*m2) * invTotalMass;
-        mixed zcom = (xp0.z*m0 + (zb0+xp1.z)*m1 + (zc0+xp2.z)*m2) * invTotalMass;
-        mixed xa1 = xp0.x - xcom;
-        mixed ya1 = xp0.y - ycom;
-        mixed za1 = xp0.z - zcom;
-        mixed xb1 = xb0 + xp1.x - xcom;
-        mixed yb1 = yb0 + xp1.y - ycom;
-        mixed zb1 = zb0 + xp1.z - zcom;
-        mixed xc1 = xc0 + xp2.x - xcom;
-        mixed yc1 = yc0 + xp2.y - ycom;
-        mixed zc1 = zc0 + xp2.z - zcom;
-        mixed xaksZd = yb0*zc0 - zb0*yc0;
-        mixed yaksZd = zb0*xc0 - xb0*zc0;
-        mixed zaksZd = xb0*yc0 - yb0*xc0;
-        mixed xaksXd = ya1*zaksZd - za1*yaksZd;
-        mixed yaksXd = za1*xaksZd - xa1*zaksZd;
-        mixed zaksXd = xa1*yaksZd - ya1*xaksZd;
-        mixed xaksYd = yaksZd*zaksXd - zaksZd*yaksXd;
-        mixed yaksYd = zaksZd*xaksXd - xaksZd*zaksXd;
-        mixed zaksYd = xaksZd*yaksXd - yaksZd*xaksXd;
-        mixed axlng = sqrt(xaksXd*xaksXd + yaksXd*yaksXd + zaksXd*zaksXd);
-        mixed aylng = sqrt(xaksYd*xaksYd + yaksYd*yaksYd + zaksYd*zaksYd);
-        mixed azlng = sqrt(xaksZd*xaksZd + yaksZd*yaksZd + zaksZd*zaksZd);
-        mixed trns11 = xaksXd / axlng;
-        mixed trns21 = yaksXd / axlng;
-        mixed trns31 = zaksXd / axlng;
-        mixed trns12 = xaksYd / aylng;
-        mixed trns22 = yaksYd / aylng;
-        mixed trns32 = zaksYd / aylng;
-        mixed trns13 = xaksZd / azlng;
-        mixed trns23 = yaksZd / azlng;
-        mixed trns33 = zaksZd / azlng;
-        mixed xb0d = trns11*xb0 + trns21*yb0 + trns31*zb0;
-        mixed yb0d = trns12*xb0 + trns22*yb0 + trns32*zb0;
-        mixed xc0d = trns11*xc0 + trns21*yc0 + trns31*zc0;
-        mixed yc0d = trns12*xc0 + trns22*yc0 + trns32*zc0;
-        mixed za1d = trns13*xa1 + trns23*ya1 + trns33*za1;
-        mixed xb1d = trns11*xb1 + trns21*yb1 + trns31*zb1;
-        mixed yb1d = trns12*xb1 + trns22*yb1 + trns32*zb1;
-        mixed zb1d = trns13*xb1 + trns23*yb1 + trns33*zb1;
-        mixed xc1d = trns11*xc1 + trns21*yc1 + trns31*zc1;
-        mixed yc1d = trns12*xc1 + trns22*yc1 + trns32*zc1;
-        mixed zc1d = trns13*xc1 + trns23*yc1 + trns33*zc1;
-        //                                        --- Step2  A2' ---
-        float rc = 0.5*params.y;
-        mixed rb = sqrt(params.x*params.x-rc*rc);
-        mixed ra = rb*(m1+m2)*invTotalMass;
-        rb -= ra;
-        mixed sinphi = za1d / ra;
-        mixed cosphi = sqrt(1.0f - sinphi*sinphi);
-        mixed sinpsi = (zb1d - zc1d) / (2*rc*cosphi);
-        mixed cospsi = sqrt(1.0f - sinpsi*sinpsi);
-        mixed ya2d =   ra*cosphi;
-        mixed xb2d = - rc*cospsi;
-        mixed yb2d = - rb*cosphi - rc*sinpsi*sinphi;
-        mixed yc2d = - rb*cosphi + rc*sinpsi*sinphi;
-        mixed xb2d2 = xb2d*xb2d;
-        mixed hh2 = 4.0f*xb2d2 + (yb2d-yc2d)*(yb2d-yc2d) + (zb1d-zc1d)*(zb1d-zc1d);
-        mixed deltx = 2.0f*xb2d + sqrt(4.0f*xb2d2 - hh2 + params.y*params.y);
-        xb2d -= deltx*0.5;
-        //                                        --- Step3  al,be,ga ---
-        mixed alpha = (xb2d*(xb0d-xc0d) + yb0d*yb2d + yc0d*yc2d);
-        mixed beta = (xb2d*(yc0d-yb0d) + xb0d*yb2d + xc0d*yc2d);
-        mixed gamma = xb0d*yb1d - xb1d*yb0d + xc0d*yc1d - xc1d*yc0d;
-        mixed al2be2 = alpha*alpha + beta*beta;
-        mixed sintheta = (alpha*gamma - beta*sqrt(al2be2 - gamma*gamma)) / al2be2;
-        //                                        --- Step4  A3' ---
-        mixed costheta = sqrt(1.0f - sintheta*sintheta);
-        mixed xa3d = - ya2d*sintheta;
-        mixed ya3d =   ya2d*costheta;
-        mixed za3d = za1d;
-        mixed xb3d =   xb2d*costheta - yb2d*sintheta;
-        mixed yb3d =   xb2d*sintheta + yb2d*costheta;
-        mixed zb3d = zb1d;
-        mixed xc3d = - xb2d*costheta - yc2d*sintheta;
-        mixed yc3d = - xb2d*sintheta + yc2d*costheta;
-        mixed zc3d = zc1d;
-        //                                        --- Step5  A3 ---
-        mixed xa3 = trns11*xa3d + trns12*ya3d + trns13*za3d;
-        mixed ya3 = trns21*xa3d + trns22*ya3d + trns23*za3d;
-        mixed za3 = trns31*xa3d + trns32*ya3d + trns33*za3d;
-        mixed xb3 = trns11*xb3d + trns12*yb3d + trns13*zb3d;
-        mixed yb3 = trns21*xb3d + trns22*yb3d + trns23*zb3d;
-        mixed zb3 = trns31*xb3d + trns32*yb3d + trns33*zb3d;
-        mixed xc3 = trns11*xc3d + trns12*yc3d + trns13*zc3d;
-        mixed yc3 = trns21*xc3d + trns22*yc3d + trns23*zc3d;
-        mixed zc3 = trns31*xc3d + trns32*yc3d + trns33*zc3d;
-        xp0.x = xcom + xa3;
-        xp0.y = ycom + ya3;
-        xp0.z = zcom + za3;
-        xp1.x = xcom + xb3 - xb0;
-        xp1.y = ycom + yb3 - yb0;
-        xp1.z = zcom + zb3 - zb0;
-        xp2.x = xcom + xc3 - xc0;
-        xp2.y = ycom + yc3 - yc0;
-        xp2.z = zcom + zc3 - zc0;
-        // Record the new positions.
-        posDelta[atoms.x] = xp0;
-        posDelta[atoms.y] = xp1;
-        posDelta[atoms.z] = xp2;
-        index += get_global_size(0);
-    }
-}
-/**
- * Enforce velocity constraints on SETTLE clusters
- */
-__kernel void constrainVelocities(int numClusters, mixed tol, __global const real4* restrict oldPos, __global const real4* restrict posCorrection, __global mixed4* restrict posDelta, __global mixed4* restrict velm, __global const int4* restrict clusterAtoms, __global const float2* restrict clusterParams) {
-    for (int index = get_global_id(0); index < numClusters; index += get_global_size(0)) {
-        // Load the data for this cluster.
-        int4 atoms = clusterAtoms[index];
-        mixed4 apos0 = loadPos(oldPos, posCorrection, atoms.x);
-        mixed4 apos1 = loadPos(oldPos, posCorrection, atoms.y);
-        mixed4 apos2 = loadPos(oldPos, posCorrection, atoms.z);
-        mixed4 v0 = velm[atoms.x];
-        mixed4 v1 = velm[atoms.y];
-        mixed4 v2 = velm[atoms.z];
-        // Compute intermediate quantities: the atom masses, the bond directions, the relative velocities,
-        // and the angle cosines and sines.
-        mixed mA = 1/v0.w;
-        mixed mB = 1/v1.w;
-        mixed mC = 1/v2.w;
-        mixed4 eAB = apos1-apos0;
-        mixed4 eBC = apos2-apos1;
-        mixed4 eCA = apos0-apos2;
-        eAB.xyz /= sqrt(eAB.x*eAB.x + eAB.y*eAB.y + eAB.z*eAB.z);
-        eBC.xyz /= sqrt(eBC.x*eBC.x + eBC.y*eBC.y + eBC.z*eBC.z);
-        eCA.xyz /= sqrt(eCA.x*eCA.x + eCA.y*eCA.y + eCA.z*eCA.z);
-        mixed vAB = (v1.x-v0.x)*eAB.x + (v1.y-v0.y)*eAB.y + (v1.z-v0.z)*eAB.z;
-        mixed vBC = (v2.x-v1.x)*eBC.x + (v2.y-v1.y)*eBC.y + (v2.z-v1.z)*eBC.z;
-        mixed vCA = (v0.x-v2.x)*eCA.x + (v0.y-v2.y)*eCA.y + (v0.z-v2.z)*eCA.z;
-        mixed cA = -(eAB.x*eCA.x + eAB.y*eCA.y + eAB.z*eCA.z);
-        mixed cB = -(eAB.x*eBC.x + eAB.y*eBC.y + eAB.z*eBC.z);
-        mixed cC = -(eBC.x*eCA.x + eBC.y*eCA.y + eBC.z*eCA.z);
-        mixed s2A = 1-cA*cA;
-        mixed s2B = 1-cB*cB;
-        mixed s2C = 1-cC*cC;
-        // Solve the equations.  These are different from those in the SETTLE paper (JCC 13(8), pp. 952-962, 1992), because
-        // in going from equations B1 to B2, they make the assumption that mB=mC (but don't bother to mention they're
-        // making that assumption).  We allow all three atoms to have different masses.
-        mixed mABCinv = 1/(mA*mB*mC);
-        mixed denom = (((s2A*mB+s2B*mA)*mC+(s2A*mB*mB+2*(cA*cB*cC+1)*mA*mB+s2B*mA*mA))*mC+s2C*mA*mB*(mA+mB))*mABCinv;
-        mixed tab = ((cB*cC*mA-cA*mB-cA*mC)*vCA + (cA*cC*mB-cB*mC-cB*mA)*vBC + (s2C*mA*mA*mB*mB*mABCinv+(mA+mB+mC))*vAB)/denom;
-        mixed tbc = ((cA*cB*mC-cC*mB-cC*mA)*vCA + (s2A*mB*mB*mC*mC*mABCinv+(mA+mB+mC))*vBC + (cA*cC*mB-cB*mA-cB*mC)*vAB)/denom;
-        mixed tca = ((s2B*mA*mA*mC*mC*mABCinv+(mA+mB+mC))*vCA + (cA*cB*mC-cC*mB-cC*mA)*vBC + (cB*cC*mA-cA*mB-cA*mC)*vAB)/denom;
-        v0.xyz += (tab*eAB.xyz - tca*eCA.xyz)*v0.w;
-        v1.xyz += (tbc*eBC.xyz - tab*eAB.xyz)*v1.w;
-        v2.xyz += (tca*eCA.xyz - tbc*eBC.xyz)*v2.w;
-        velm[atoms.x] = v0;
-        velm[atoms.y] = v1;
-        velm[atoms.z] = v2;
-    }
-}
\ No newline at end of file
--- a/platforms/opencl/src/kernels/shakeHydrogens.cl
+++ b/platforms/opencl/src/kernels/shakeHydrogens.cl
-mixed4 loadPos(__global const real4* restrict posq, __global const real4* restrict posqCorrection, int index) {
-#ifdef USE_MIXED_PRECISION
-    real4 pos1 = posq[index];
-    real4 pos2 = posqCorrection[index];
-    return (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
-#else
-    return posq[index];
-#endif
-}
-/**
- * Enforce constraints on SHAKE clusters
- */
-__kernel void applyShakeToHydrogens(int numClusters, mixed tol, __global const real4* restrict oldPos, __global const real4* restrict posCorrection, __global mixed4* restrict posDelta, __global const int4* restrict clusterAtoms, __global const float4* restrict clusterParams) {
-    int index = get_global_id(0);
-    while (index < numClusters) {
-        // Load the data for this cluster.
-        int4 atoms = clusterAtoms[index];
-        float4 params = clusterParams[index];
-        mixed4 pos = loadPos(oldPos, posCorrection, atoms.x);
-        mixed4 xpi = posDelta[atoms.x];
-        mixed4 pos1 = loadPos(oldPos, posCorrection, atoms.y);
-        mixed4 xpj1 = posDelta[atoms.y];
-        mixed4 pos2 = {0.0f, 0.0f, 0.0f, 0.0f};
-        mixed4 xpj2 = {0.0f, 0.0f, 0.0f, 0.0f};
-        float invMassCentral = params.x;
-        float avgMass = params.y;
-        float d2 = params.z;
-        float invMassPeripheral = params.w;
-        if (atoms.z != -1) {
-            pos2 = loadPos(oldPos, posCorrection, atoms.z);
-            xpj2 = posDelta[atoms.z];
-        }
-        mixed4 pos3 = {0.0f, 0.0f, 0.0f, 0.0f};
-        mixed4 xpj3 = {0.0f, 0.0f, 0.0f, 0.0f};
-        if (atoms.w != -1) {
-            pos3 = loadPos(oldPos, posCorrection, atoms.w);
-            xpj3 = posDelta[atoms.w];
-        }
-        // Precompute quantities.
-        mixed4 rij1 = pos-pos1;
-        mixed4 rij2 = pos-pos2;
-        mixed4 rij3 = pos-pos3;
-        mixed rij1sq = rij1.x*rij1.x + rij1.y*rij1.y + rij1.z*rij1.z;
-        mixed rij2sq = rij2.x*rij2.x + rij2.y*rij2.y + rij2.z*rij2.z;
-        mixed rij3sq = rij3.x*rij3.x + rij3.y*rij3.y + rij3.z*rij3.z;
-        mixed ld1 = d2-rij1sq;
-        mixed ld2 = d2-rij2sq;
-        mixed ld3 = d2-rij3sq;
-        // Iterate until convergence.
-        int converged = false;
-        int iteration = 0;
-        while (iteration < 15 && !converged) {
-            converged = true;
-#ifdef CONSTRAIN_VELOCITIES
-            mixed4 rpij = xpi-xpj1;
-            mixed rrpr = rpij.x*rij1.x + rpij.y*rij1.y + rpij.z*rij1.z;
-            mixed delta = -2.0f*avgMass*rrpr/rij1sq;
-            mixed4 dr = rij1*delta;
-            xpi.xyz += dr.xyz*invMassCentral;
-            xpj1.xyz -= dr.xyz*invMassPeripheral;
-            if (fabs(delta) > tol)
-                converged = false;
-            if (atoms.z != -1) {
-                rpij = xpi-xpj2;
-                rrpr = rpij.x*rij2.x + rpij.y*rij2.y + rpij.z*rij2.z;
-                delta = -2.0f*avgMass*rrpr/rij2sq;
-                dr = rij2*delta;
-                xpi.xyz += dr.xyz*invMassCentral;
-                xpj2.xyz -= dr.xyz*invMassPeripheral;
-                if (fabs(delta) > tol)
-                    converged = false;
-            }
-            if (atoms.w != -1) {
-                rpij = xpi-xpj3;
-                rrpr = rpij.x*rij3.x + rpij.y*rij3.y + rpij.z*rij3.z;
-                delta = -2.0f*avgMass*rrpr/rij3sq;
-                dr = rij3*delta;
-                xpi.xyz += dr.xyz*invMassCentral;
-                xpj3.xyz -= dr.xyz*invMassPeripheral;
-                if (fabs(delta) > tol)
-                    converged = false;
-            }
-#else
-            mixed4 rpij = xpi-xpj1;
-            mixed rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
-            mixed rrpr = rij1.x*rpij.x + rij1.y*rpij.y + rij1.z*rpij.z;
-            mixed diff = fabs(ld1-2.0f*rrpr-rpsqij) / (d2*tol);
-            if (diff >= 1.0f) {
-                mixed acor  = (ld1-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij1sq);
-                mixed4 dr = rij1*acor;
-                xpi.xyz += dr.xyz*invMassCentral;
-                xpj1.xyz -= dr.xyz*invMassPeripheral;
-                converged = false;
-            }
-            if (atoms.z != -1) {
-                rpij.xyz = xpi.xyz-xpj2.xyz;
-                rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
-                rrpr = rij2.x*rpij.x + rij2.y*rpij.y + rij2.z*rpij.z;
-                diff = fabs(ld2-2.0f*rrpr-rpsqij) / (d2*tol);
-                if (diff >= 1.0f) {
-                    mixed acor  = (ld2 - 2.0f*rrpr - rpsqij)*avgMass / (rrpr + rij2sq);
-                    mixed4 dr = rij2*acor;
-                    xpi.xyz += dr.xyz*invMassCentral;
-                    xpj2.xyz -= dr.xyz*invMassPeripheral;
-                    converged = false;
-                }
-            }
-            if (atoms.w != -1) {
-                rpij.xyz = xpi.xyz-xpj3.xyz;
-                rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
-                rrpr = rij3.x*rpij.x + rij3.y*rpij.y + rij3.z*rpij.z;
-                diff = fabs(ld3 - 2.0f*rrpr - rpsqij) / (d2*tol);
-                if (diff >= 1.0f) {
-                    mixed acor  = (ld3-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij3sq);
-                    mixed4 dr = rij3*acor;
-                    xpi.xyz += dr.xyz*invMassCentral;
-                    xpj3.xyz -= dr.xyz*invMassPeripheral;
-                    converged = false;
-                }
-            }
-#endif
-            iteration++;
-        }
-        // Record the new positions.
-        posDelta[atoms.x] = xpi;
-        posDelta[atoms.y] = xpj1;
-        if (atoms.z != -1)
-            posDelta[atoms.z] = xpj2;
-        if (atoms.w != -1)
-            posDelta[atoms.w] = xpj3;
-        index += get_global_size(0);
-    }
-}
--- a/platforms/opencl/src/kernels/utilities.cl
+++ b/platforms/opencl/src/kernels/utilities.cl
@@ -81,21 +81,26 @@ __kernel void reduceReal4Buffer(__global real4* restrict buffer, int bufferSize,
    }
 }
-#ifdef SUPPORTS_64_BIT_ATOMICS
 /**
 * Sum the various buffers containing forces.
 */
-__kernel void reduceForces(__global const long* restrict longBuffer, __global real4* restrict buffer, int bufferSize, int numBuffers) {
+__kernel void reduceForces(__global long* restrict longBuffer, __global real4* restrict buffer, int bufferSize, int numBuffers) {
    int totalSize = bufferSize*numBuffers;
    real scale = 1/(real) 0x100000000;
    for (int index = get_global_id(0); index < bufferSize; index += get_global_size(0)) {
+#ifdef SUPPORTS_64_BIT_ATOMICS
        real4 sum = (real4) (scale*longBuffer[index], scale*longBuffer[index+bufferSize], scale*longBuffer[index+2*bufferSize], 0);
+#else
+        real4 sum = (real4) 0;
+#endif
        for (int i = index; i < totalSize; i += bufferSize)
            sum += buffer[i];
        buffer[index] = sum;
+        longBuffer[index] = (long) (sum.x*0x100000000);
+        longBuffer[index+bufferSize] = (long) (sum.y*0x100000000);
+        longBuffer[index+2*bufferSize] = (long) (sum.z*0x100000000);
    }
 }
-#endif
 /**
 * Sum the energy buffer.

--- a/platforms/opencl/src/kernels/velocityVerlet.cl
+++ b/platforms/opencl/src/kernels/velocityVerlet.cl
+/**
+ * Perform the first step of Velocity Verlet integration.
+ */
+__kernel void integrateVelocityVerletPart1(int numAtoms, int numPairs, int paddedNumAtoms, __global const mixed2* restrict dt, __global const real4* restrict posq,
+        __global const real4* restrict posqCorrection, __global mixed4* restrict velm, __global const real4* restrict force, __global mixed4* restrict posDelta,
+        __global const int* restrict atomList, __global const int2* restrict pairList) {
+    const mixed2 stepSize = dt[0];
+    const mixed dtPos = stepSize.y;
+    const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
+    int index = get_global_id(0);
+    while (index < numAtoms) {
+        int atom = atomList[index];
+        mixed4 velocity = velm[atom];
+        if (velocity.w != 0.0) {
+#ifdef USE_MIXED_PRECISION
+            real4 pos1 = posq[atom];
+            real4 pos2 = posqCorrection[atom];
+            mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
+#else
+            real4 pos = posq[atom];
+#endif
+            velocity.x += 0.5f * dtVel*force[atom].x*velocity.w;
+            velocity.y += 0.5f * dtVel*force[atom].y*velocity.w;
+            velocity.z += 0.5f * dtVel*force[atom].z*velocity.w;
+            pos.x = velocity.x*dtPos;
+            pos.y = velocity.y*dtPos;
+            pos.z = velocity.z*dtPos;
+            posDelta[atom] = pos;
+            velm[atom] = velocity;
+        }
+        index += get_global_size(0);
+    }
+    index = get_global_id(0);
+    while (index < numPairs){
+        int atom1 = pairList[index].x;
+        int atom2 = pairList[index].y;
+        mixed4 v1 = velm[atom1];
+        mixed4 v2 = velm[atom2];
+        mixed m1 = v1.w == 0.0f ? 0.0f : 1.0f / v1.w;
+        mixed m2 = v2.w == 0.0f ? 0.0f : 1.0f / v2.w;
+        mixed mass1fract = m1 / (m1 + m2);
+        mixed mass2fract = m2 / (m1 + m2);
+        mixed invRedMass = (m1 * m2 != 0.0f) ? (m1 + m2)/(m1 * m2) : 0.0f;
+        mixed invTotMass = (m1 + m2 != 0.0f) ? 1.0f /(m1 + m2) : 0.0f;
+        mixed3 comVel;
+        comVel.x= v1.x*mass1fract + v2.x*mass2fract;
+        comVel.y= v1.y*mass1fract + v2.y*mass2fract;
+        comVel.z= v1.z*mass1fract + v2.z*mass2fract;
+        mixed3 relVel;
+        relVel.x= v2.x - v1.x;
+        relVel.y= v2.y - v1.y;
+        relVel.z= v2.z - v1.z;
+        mixed3 comFrc;
+        comFrc.x = force[atom1].x + force[atom2].x;
+        comFrc.y = force[atom1].y + force[atom2].y;
+        comFrc.z = force[atom1].z + force[atom2].z;
+        mixed3 relFrc;
+        relFrc.x = mass1fract*force[atom2].x - mass2fract*force[atom1].x;
+        relFrc.y = mass1fract*force[atom2].y - mass2fract*force[atom1].y;
+        relFrc.z = mass1fract*force[atom2].z - mass2fract*force[atom1].z;
+        comVel.x += comFrc.x * 0.5f * dtVel * invTotMass;
+        comVel.y += comFrc.y * 0.5f * dtVel * invTotMass;
+        comVel.z += comFrc.z * 0.5f * dtVel * invTotMass;
+        relVel.x += relFrc.x * 0.5f * dtVel * invRedMass;
+        relVel.y += relFrc.y * 0.5f * dtVel * invRedMass;
+        relVel.z += relFrc.z * 0.5f * dtVel * invRedMass;
+#ifdef USE_MIXED_PRECISION
+        real4 posv1 = posq[atom1];
+        real4 posv2 = posq[atom2];
+        real4 posc1 = posqCorrection[atom1];
+        real4 posc2 = posqCorrection[atom2];
+        mixed4 pos1 = (mixed4) (posv1.x+(mixed)posc1.x, posv1.y+(mixed)posc1.y, posv1.z+(mixed)posc1.z, posv1.w);
+        mixed4 pos2 = (mixed4) (posv2.x+(mixed)posc2.x, posv2.y+(mixed)posc2.y, posv2.z+(mixed)posc2.z, posv2.w);
+#else
+        real4 pos1 = posq[atom1];
+        real4 pos2 = posq[atom2];
+#endif
+        if (v1.w != 0.0f) {
+            v1.x = comVel.x - relVel.x*mass2fract;
+            v1.y = comVel.y - relVel.y*mass2fract;
+            v1.z = comVel.z - relVel.z*mass2fract;
+            pos1.x = v1.x*dtPos;
+            pos1.y = v1.y*dtPos;
+            pos1.z = v1.z*dtPos;
+            posDelta[atom1] = pos1;
+            velm[atom1] = v1;
+        }
+        if (v2.w != 0.0f) {
+            v2.x = comVel.x + relVel.x*mass1fract;
+            v2.y = comVel.y + relVel.y*mass1fract;
+            v2.z = comVel.z + relVel.z*mass1fract;
+            pos2.x = v2.x*dtPos;
+            pos2.y = v2.y*dtPos;
+            pos2.z = v2.z*dtPos;
+            posDelta[atom2] = pos2;
+            velm[atom2] = v2;
+        }
+        index += get_global_size(0);
+     }
+}
+/**
+ * Perform the second step of Velocity Verlet integration.
+ */
+__kernel void integrateVelocityVerletPart2(int numAtoms, __global mixed2* restrict dt, __global real4* restrict posq,
+        __global real4* restrict posqCorrection, __global mixed4* restrict velm, __global const mixed4* restrict posDelta) {
+    mixed2 stepSize = dt[0];
+    int index = get_global_id(0);
+    if (index == 0)
+        dt[0].x = stepSize.y;
+    while(index < numAtoms) {
+        mixed4 velocity = velm[index];
+        if (velocity.w != 0.0) {
+#ifdef USE_MIXED_PRECISION
+            real4 pos1 = posq[index];
+            real4 pos2 = posqCorrection[index];
+            mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
+#else
+            real4 pos = posq[index];
+#endif
+            mixed4 delta = posDelta[index];
+            pos.xyz += delta.xyz;
+#ifdef USE_MIXED_PRECISION
+            posq[index] = (real4) ((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
+            posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
+#else
+            posq[index] = pos;
+#endif
+        }
+        index += get_global_size(0);
+    }
+}
+/**
+ * Perform the third step of Velocity Verlet integration.
+ */
+__kernel void integrateVelocityVerletPart3(int numAtoms, int numPairs, int paddedNumAtoms, __global mixed2* restrict dt, __global real4* restrict posq,
+        __global real4* restrict posqCorrection, __global mixed4* restrict velm, __global const real4* restrict force, __global const mixed4* restrict posDelta,
+        __global const int* restrict atomList, __global const int2* __restrict__ pairList) {
+    mixed2 stepSize = dt[0];
+#ifndef SUPPORTS_DOUBLE_PRECISION
+    double oneOverDt = 1.0/stepSize.y;
+#else
+    float oneOverDt = 1.0f/stepSize.y;
+    float correction = (1.0f-oneOverDt*stepSize.y)/stepSize.y;
+#endif
+    const mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
+    int index = get_global_id(0);
+    if (index == 0)
+        dt[0].x = stepSize.y;
+    while(index < numAtoms) {
+        int atom = atomList[index];
+        mixed4 velocity = velm[atom];
+        if (velocity.w != 0.0) {
+            mixed4 deltaXconstrained = posDelta[atom];
+            velocity.x += 0.5f * dtVel*force[atom].x*velocity.w + (deltaXconstrained.x - velocity.x*stepSize.y)*oneOverDt;
+            velocity.y += 0.5f * dtVel*force[atom].y*velocity.w + (deltaXconstrained.y - velocity.y*stepSize.y)*oneOverDt;
+            velocity.z += 0.5f * dtVel*force[atom].z*velocity.w + (deltaXconstrained.z - velocity.z*stepSize.y)*oneOverDt;
+#ifdef SUPPORTS_DOUBLE_PRECISION
+            velocity.x += (deltaXconstrained.x - velocity.x*stepSize.y)*correction;
+            velocity.y += (deltaXconstrained.y - velocity.y*stepSize.y)*correction;
+            velocity.z += (deltaXconstrained.z - velocity.z*stepSize.y)*correction;
+#endif
+            velm[atom] = velocity;
+        }
+        index += get_global_size(0);
+    }
+    index = get_global_id(0);
+    while(index < numPairs) {
+        int atom1 = pairList[index].x;
+        int atom2 = pairList[index].y;
+        mixed4 v1 = velm[atom1];
+        mixed4 v2 = velm[atom2];
+        mixed m1 = v1.w == 0.0f ? 0.0f : 1.0f / v1.w;
+        mixed m2 = v2.w == 0.0f ? 0.0f : 1.0f / v2.w;
+        mixed mass1fract = m1 / (m1 + m2);
+        mixed mass2fract = m2 / (m1 + m2);
+        mixed invRedMass = (m1 * m2 != 0.0f) ? (m1 + m2)/(m1 * m2) : 0.0f;
+        mixed invTotMass = (m1 + m2 != 0.0f) ? 1.0f /(m1 + m2) : 0.0f;
+        mixed3 comVel;
+        comVel.x= v1.x*mass1fract + v2.x*mass2fract;
+        comVel.y= v1.y*mass1fract + v2.y*mass2fract;
+        comVel.z= v1.z*mass1fract + v2.z*mass2fract;
+        mixed3 relVel;
+        relVel.x= v2.x - v1.x;
+        relVel.y= v2.y - v1.y;
+        relVel.z= v2.z - v1.z;
+        mixed3 comFrc;
+        comFrc.x = force[atom1].x + force[atom2].x;
+        comFrc.y = force[atom1].y + force[atom2].y;
+        comFrc.z = force[atom1].z + force[atom2].z;
+        mixed3 relFrc;
+        relFrc.x = mass1fract*force[atom2].x - mass2fract*force[atom1].x;
+        relFrc.y = mass1fract*force[atom2].y - mass2fract*force[atom1].y;
+        relFrc.z = mass1fract*force[atom2].z - mass2fract*force[atom1].z;
+        comVel.x += comFrc.x * 0.5f * dtVel * invTotMass;
+        comVel.y += comFrc.y * 0.5f * dtVel * invTotMass;
+        comVel.z += comFrc.z * 0.5f * dtVel * invTotMass;
+        relVel.x += relFrc.x * 0.5f * dtVel * invRedMass;
+        relVel.y += relFrc.y * 0.5f * dtVel * invRedMass;
+        relVel.z += relFrc.z * 0.5f * dtVel * invRedMass;
+        if (v1.w != 0.0f) {
+            mixed4 deltaXconstrained = posDelta[atom1];
+            v1.x = comVel.x - relVel.x*mass2fract + (deltaXconstrained.x - v1.x*stepSize.y)*oneOverDt;
+            v1.y = comVel.y - relVel.y*mass2fract + (deltaXconstrained.y - v1.y*stepSize.y)*oneOverDt;
+            v1.z = comVel.z - relVel.z*mass2fract + (deltaXconstrained.z - v1.z*stepSize.y)*oneOverDt;
+#ifdef SUPPORTS_DOUBLE_PRECISION
+            v1.x += (deltaXconstrained.x - v1.x*stepSize.y)*correction;
+            v1.y += (deltaXconstrained.y - v1.y*stepSize.y)*correction;
+            v1.z += (deltaXconstrained.z - v1.z*stepSize.y)*correction;
+#endif
+            velm[atom1] = v1;
+        }
+        if (v2.w != 0.0f) {
+            mixed4 deltaXconstrained = posDelta[atom2];
+            v2.x = comVel.x + relVel.x*mass1fract + (deltaXconstrained.x - v2.x*stepSize.y)*oneOverDt;
+            v2.y = comVel.y + relVel.y*mass1fract + (deltaXconstrained.y - v2.y*stepSize.y)*oneOverDt;
+            v2.z = comVel.z + relVel.z*mass1fract + (deltaXconstrained.z - v2.z*stepSize.y)*oneOverDt;
+#ifdef SUPPORTS_DOUBLE_PRECISION
+            v2.x += (deltaXconstrained.x - v2.x*stepSize.y)*correction;
+            v2.y += (deltaXconstrained.y - v2.y*stepSize.y)*correction;
+            v2.z += (deltaXconstrained.z - v2.z*stepSize.y)*correction;
+#endif
+            velm[atom2] = v2;
+        }
+        index += get_global_size(0);
+    }
+}
+__kernel void integrateVelocityVerletHardWall(int numPairs, __global const float* restrict maxPairDistance, 
+        __global mixed2* restrict dt, __global real4* restrict posq,
+        __global real4* restrict posqCorrection, __global mixed4* restrict velm, 
+        __global const int2* restrict pairList, __global const float* __restrict__ pairTemperature) {
+    mixed dtPos = dt[0].y;
+    mixed maxDelta = (mixed) maxPairDistance[0];
+    if (maxDelta > 0){
+        int index = get_global_id(0);
+        while(index < numPairs) {
+            const mixed hardWallScale = sqrt( ((mixed) pairTemperature[index]) * ((mixed) BOLTZ));
+            int2 atom = (int2) (pairList[index].x, pairList[index].y);
+#ifdef USE_MIXED_PRECISION
+            real4 posv1 = posq[atom.x];
+            real4 posc1 = posqCorrection[atom.x];
+            mixed4 pos1 = (mixed4) (posv1.x+(mixed)posc1.x, posv1.y+(mixed)posc1.y, posv1.z+(mixed)posc1.z, posv1.w);
+            real4 posv2 = posq[atom.y];
+            real4 posc2 = posqCorrection[atom.y];
+            mixed4 pos2 = (mixed4) (posv2.x+(mixed)posc2.x, posv2.y+(mixed)posc2.y, posv2.z+(mixed)posc2.z, posv2.w);
+#else
+            real4 pos1 = posq[atom.x];
+            real4 pos2 = posq[atom.y];
+#endif
+            mixed3 delta = (mixed3) (
+                (mixed) (pos1.x - pos2.x),
+                (mixed) (pos1.y - pos2.y),
+                (mixed) (pos1.z - pos2.z)
+            );
+            mixed r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
+            mixed rInv = 1/r;
+            if (rInv*maxDelta < 1.0) {
+                // The constraint has been violated, so make the inter-particle distance "bounce"
+                // off the hard wall.
+                mixed3 bondDir = (mixed3) (delta.x * rInv, delta.y * rInv, delta.z * rInv);
+                mixed3 vel1 = (mixed3) (velm[atom.x].x, velm[atom.x].y, velm[atom.x].z);
+                mixed3 vel2 = (mixed3) (velm[atom.y].x, velm[atom.y].y, velm[atom.y].z);
+                mixed m1 = velm[atom.x].w != 0.0 ? 1.0/velm[atom.x].w : 0.0;
+                mixed m2 = velm[atom.y].w != 0.0 ? 1.0/velm[atom.y].w : 0.0;
+                mixed invTotMass = (m1 + m2 != 0.0) ? 1.0 /(m1 + m2) : 0.0;
+                mixed deltaR = r-maxDelta;
+                mixed deltaT = dtPos;
+                mixed dt = dtPos;
+                mixed dotvr1 = vel1.x*bondDir.x + vel1.y*bondDir.y + vel1.z*bondDir.z;
+                mixed3 vb1 = (mixed3) (bondDir.x*dotvr1, bondDir.y*dotvr1, bondDir.z*dotvr1);
+                mixed3 vp1 = (mixed3) (vel1.x-vb1.x, vel1.y-vb1.y, vel1.z-vb1.z);
+                if (m2 == 0) {
+                    // The parent particle is massless, so move only the Drude particle.
+                    if (dotvr1 != 0.0)
+                        deltaT = deltaR/fabs(dotvr1);
+                    if (deltaT > dtPos)
+                        deltaT = dtPos;
+                    dotvr1 = -dotvr1*hardWallScale/(fabs(dotvr1)*sqrt(m1));
+                    mixed dr = -deltaR + deltaT*dotvr1;
+                    pos1.x += bondDir.x*dr;
+                    pos1.y += bondDir.y*dr;
+                    pos1.z += bondDir.z*dr;
+                    velm[atom.x] = (mixed4) (vp1.x + bondDir.x*dotvr1, vp1.y + bondDir.y*dotvr1, vp1.z + bondDir.z*dotvr1, velm[atom.x].w);
+#ifdef USE_MIXED_PRECISION
+                    posq[atom.x] = (real4) ((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
+                    posqCorrection[atom.x] = (real4) (pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
+#else
+                    posq[atom.x] = pos1;
+#endif
+                }
+                else {
+                    // Move both particles.
+                    mixed dotvr2 = vel2.x*bondDir.x + vel2.y*bondDir.y + vel2.z*bondDir.z;
+                    mixed3 vb2 = (mixed3) (bondDir.x*dotvr2, bondDir.y*dotvr2, bondDir.z*dotvr2);
+                    mixed3 vp2 = (mixed3) (vel2.x-vb2.x, vel2.y-vb2.y, vel2.z-vb2.z);
+                    mixed vbCMass = (m1*dotvr1 + m2*dotvr2)*invTotMass;
+                    dotvr1 -= vbCMass;
+                    dotvr2 -= vbCMass;
+                    if (dotvr1 != dotvr2)
+                        deltaT = deltaR/fabs(dotvr1-dotvr2);
+                    if (deltaT > dt)
+                        deltaT = dt;
+                    mixed vBond = hardWallScale/sqrt(m1);
+                    dotvr1 = -dotvr1*vBond*m2*invTotMass/fabs(dotvr1);
+                    dotvr2 = -dotvr2*vBond*m1*invTotMass/fabs(dotvr2);
+                    mixed dr1 = -deltaR*m2*invTotMass + deltaT*dotvr1;
+                    mixed dr2 = deltaR*m1*invTotMass + deltaT*dotvr2;
+                    dotvr1 += vbCMass;
+                    dotvr2 += vbCMass;
+                    pos1.x += bondDir.x*dr1;
+                    pos1.y += bondDir.y*dr1;
+                    pos1.z += bondDir.z*dr1;
+                    pos2.x += bondDir.x*dr2;
+                    pos2.y += bondDir.y*dr2;
+                    pos2.z += bondDir.z*dr2;
+                    velm[atom.x] = (mixed4) (vp1.x + bondDir.x*dotvr1, vp1.y + bondDir.y*dotvr1, vp1.z + bondDir.z*dotvr1, velm[atom.x].w);
+                    velm[atom.y] = (mixed4) (vp2.x + bondDir.x*dotvr2, vp2.y + bondDir.y*dotvr2, vp2.z + bondDir.z*dotvr2, velm[atom.y].w);
+#ifdef USE_MIXED_PRECISION
+                    posq[atom.x] = (real4) ((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
+                    posq[atom.y] = (real4) ((real) pos2.x, (real) pos2.y, (real) pos2.z, (real) pos2.w);
+                    posqCorrection[atom.x] = (real4) (pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
+                    posqCorrection[atom.y] = (real4) (pos2.x-(real) pos2.x, pos2.y-(real) pos2.y, pos2.z-(real) pos2.z, 0);
+#else
+                    posq[atom.x] = pos1;
+                    posq[atom.y] = pos2;
+#endif
+                }
+            }
+            index += get_global_size(0);
+        }
+    }
+}
--- a/platforms/opencl/src/kernels/verlet.cl
+++ b/platforms/opencl/src/kernels/verlet.cl
-/**
- * Perform the first step of verlet integration.
- */
-__kernel void integrateVerletPart1(int numAtoms, __global const mixed2* restrict dt, __global const real4* restrict posq, __global const real4* restrict posqCorrection, __global mixed4* restrict velm, __global const real4* restrict force, __global mixed4* restrict posDelta) {
-    mixed2 stepSize = dt[0];
-    mixed dtPos = stepSize.y;
-    mixed dtVel = 0.5f*(stepSize.x+stepSize.y);
-    int index = get_global_id(0);
-    while (index < numAtoms) {
-        mixed4 velocity = velm[index];
-        if (velocity.w != 0.0) {
-#ifdef USE_MIXED_PRECISION
-            real4 pos1 = posq[index];
-            real4 pos2 = posqCorrection[index];
-            mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
-#else
-            real4 pos = posq[index];
-#endif
-            velocity.x += force[index].x*dtVel*velocity.w;
-            velocity.y += force[index].y*dtVel*velocity.w;
-            velocity.z += force[index].z*dtVel*velocity.w;
-            pos.xyz = velocity.xyz*dtPos;
-            posDelta[index] = pos;
-            velm[index] = velocity;
-        }
-        index += get_global_size(0);
-    }
-}
-/**
- * Perform the second step of verlet integration.
- */
-__kernel void integrateVerletPart2(int numAtoms, __global mixed2* restrict dt, __global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm, __global const mixed4* restrict posDelta) {
-    mixed2 stepSize = dt[0];
-#ifdef SUPPORTS_DOUBLE_PRECISION
-    double oneOverDt = 1.0/stepSize.y;
-#else
-    float oneOverDt = 1.0f/stepSize.y;
-    float correction = (1.0f-oneOverDt*stepSize.y)/stepSize.y;
-#endif
-    if (get_global_id(0) == 0)
-        dt[0].x = stepSize.y;
-    barrier(CLK_LOCAL_MEM_FENCE);
-    int index = get_global_id(0);
-    while (index < numAtoms) {
-        mixed4 velocity = velm[index];
-        if (velocity.w != 0.0) {
-#ifdef USE_MIXED_PRECISION
-            real4 pos1 = posq[index];
-            real4 pos2 = posqCorrection[index];
-            mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
-#else
-            real4 pos = posq[index];
-#endif
-            mixed4 delta = posDelta[index];
-            pos.xyz += delta.xyz;
-#ifdef SUPPORTS_DOUBLE_PRECISION
-            velocity.xyz = convert_mixed4(convert_double4(delta)*oneOverDt).xyz;
-#else
-            velocity.xyz = delta.xyz*oneOverDt + delta.xyz*correction;
-#endif
-#ifdef USE_MIXED_PRECISION
-            posq[index] = convert_real4(pos);
-            posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
-#else
-            posq[index] = pos;
-#endif
-            velm[index] = velocity;
-        }
-        index += get_global_size(0);
-    }
-}
-/**
- * Select the step size to use for the next step.
- */
-__kernel void selectVerletStepSize(int numAtoms, mixed maxStepSize, mixed errorTol, __global mixed2* restrict dt, __global const mixed4* restrict velm, __global const real4* restrict force, __local mixed* restrict error) {
-    // Calculate the error.
-    mixed err = 0;
-    int index = get_local_id(0);
-    while (index < numAtoms) {
-        real4 f = force[index];
-        mixed invMass = velm[index].w;
-        err += (f.x*f.x + f.y*f.y + f.z*f.z)*invMass*invMass;
-        index += get_global_size(0);
-    }
-    error[get_local_id(0)] = err;
-    barrier(CLK_LOCAL_MEM_FENCE);
-    // Sum the errors from all threads.
-    for (unsigned int offset = 1; offset < get_local_size(0); offset *= 2) {
-        if (get_local_id(0)+offset < get_local_size(0) && (get_local_id(0)&(2*offset-1)) == 0)
-            error[get_local_id(0)] += error[get_local_id(0)+offset];
-        barrier(CLK_LOCAL_MEM_FENCE);
-    }
-    if (get_local_id(0) == 0) {
-        mixed totalError = sqrt(error[0]/(numAtoms*3));
-        mixed newStepSize = sqrt(errorTol/totalError);
-        mixed oldStepSize = dt[0].y;
-        if (oldStepSize > 0.0f)
-            newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
-        if (newStepSize > oldStepSize && newStepSize < 1.1f*oldStepSize)
-            newStepSize = oldStepSize; // Keeping dt constant between steps improves the behavior of the integrator.
-        if (newStepSize > maxStepSize)
-            newStepSize = maxStepSize;
-        dt[0].y = newStepSize;
-    }
-}
--- a/platforms/opencl/src/kernels/virtualSites.cl
+++ b/platforms/opencl/src/kernels/virtualSites.cl
-/**
- * Load the position of a particle.
- */
-mixed4 loadPos(__global const real4* restrict posq, __global const real4* restrict posqCorrection, int index) {
-#ifdef USE_MIXED_PRECISION
-    real4 pos1 = posq[index];
-    real4 pos2 = posqCorrection[index];
-    return (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
-#else
-    return posq[index];
-#endif
-}
-/**
- * Store the position of a particle.
- */
-void storePos(__global real4* restrict posq, __global real4* restrict posqCorrection, int index, mixed4 pos) {
-#ifdef USE_MIXED_PRECISION
-    posq[index] = (real4) ((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
-    posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
-#else
-    posq[index] = pos;
-#endif
-}
-/**
- * Compute the positions of virtual sites
- */
-__kernel void computeVirtualSites(__global real4* restrict posq,
-#ifdef USE_MIXED_PRECISION
-        __global real4* restrict posqCorrection,
-#endif
-        __global const int4* restrict avg2Atoms, __global const real2* restrict avg2Weights,
-        __global const int4* restrict avg3Atoms, __global const real4* restrict avg3Weights,
-        __global const int4* restrict outOfPlaneAtoms, __global const real4* restrict outOfPlaneWeights,
-        __global const int* restrict localCoordsIndex, __global const int* restrict localCoordsAtoms,
-        __global const real* restrict localCoordsWeights, __global const real4* restrict localCoordsPos,
-        __global const int* restrict localCoordsStartIndex) {
-#ifndef USE_MIXED_PRECISION
-        __global real4* posqCorrection = 0;
-#endif
-    // Two particle average sites.
-    for (int index = get_global_id(0); index < NUM_2_AVERAGE; index += get_global_size(0)) {
-        int4 atoms = avg2Atoms[index];
-        real2 weights = avg2Weights[index];
-        mixed4 pos = loadPos(posq, posqCorrection, atoms.x);
-        mixed4 pos1 = loadPos(posq, posqCorrection, atoms.y);
-        mixed4 pos2 = loadPos(posq, posqCorrection, atoms.z);
-        pos.xyz = pos1.xyz*weights.x + pos2.xyz*weights.y;
-        storePos(posq, posqCorrection, atoms.x, pos);
-    }
-    // Three particle average sites.
-    for (int index = get_global_id(0); index < NUM_3_AVERAGE; index += get_global_size(0)) {
-        int4 atoms = avg3Atoms[index];
-        real4 weights = avg3Weights[index];
-        mixed4 pos = loadPos(posq, posqCorrection, atoms.x);
-        mixed4 pos1 = loadPos(posq, posqCorrection, atoms.y);
-        mixed4 pos2 = loadPos(posq, posqCorrection, atoms.z);
-        mixed4 pos3 = loadPos(posq, posqCorrection, atoms.w);
-        pos.xyz = pos1.xyz*weights.x + pos2.xyz*weights.y + pos3.xyz*weights.z;
-        storePos(posq, posqCorrection, atoms.x, pos);
-    }
-    // Out of plane sites.
-    for (int index = get_global_id(0); index < NUM_OUT_OF_PLANE; index += get_global_size(0)) {
-        int4 atoms = outOfPlaneAtoms[index];
-        real4 weights = outOfPlaneWeights[index];
-        mixed4 pos = loadPos(posq, posqCorrection, atoms.x);
-        mixed4 pos1 = loadPos(posq, posqCorrection, atoms.y);
-        mixed4 pos2 = loadPos(posq, posqCorrection, atoms.z);
-        mixed4 pos3 = loadPos(posq, posqCorrection, atoms.w);
-        mixed4 v12 = pos2-pos1;
-        mixed4 v13 = pos3-pos1;
-        pos.xyz = pos1.xyz + v12.xyz*weights.x + v13.xyz*weights.y + cross(v12, v13).xyz*weights.z;
-        storePos(posq, posqCorrection, atoms.x, pos);
-    }
-    // Local coordinates sites.
-    for (int index = get_global_id(0); index < NUM_LOCAL_COORDS; index += get_global_size(0)) {
-        int siteAtomIndex = localCoordsIndex[index];
-        int start = localCoordsStartIndex[index];
-        int end = localCoordsStartIndex[index+1];
-        mixed3 origin = 0, xdir = 0, ydir = 0;
-        for (int j = start; j < end; j++) {
-            mixed3 pos = loadPos(posq, posqCorrection, localCoordsAtoms[j]).xyz;
-            origin += pos*localCoordsWeights[3*j];
-            xdir += pos*localCoordsWeights[3*j+1];
-            ydir += pos*localCoordsWeights[3*j+2];
-        }
-        mixed3 zdir = cross(xdir, ydir);
-        mixed normXdir = sqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z);
-        mixed normZdir = sqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z);
-        mixed invNormXdir = (normXdir > 0 ? 1/normXdir : 0);
-        mixed invNormZdir = (normZdir > 0 ? 1/normZdir : 0);
-        xdir *= invNormXdir;
-        zdir *= invNormZdir;
-        ydir = cross(zdir, xdir);
-        mixed3 localPosition = convert_mixed4(localCoordsPos[index]).xyz;
-        mixed4 pos = loadPos(posq, posqCorrection, siteAtomIndex);
-        pos.x = origin.x + xdir.x*localPosition.x + ydir.x*localPosition.y + zdir.x*localPosition.z;
-        pos.y = origin.y + xdir.y*localPosition.x + ydir.y*localPosition.y + zdir.y*localPosition.z;
-        pos.z = origin.z + xdir.z*localPosition.x + ydir.z*localPosition.y + zdir.z*localPosition.z;
-        storePos(posq, posqCorrection, siteAtomIndex, pos);
-    }
-}
-#ifdef HAS_OVERLAPPING_VSITES
-    #ifdef SUPPORTS_64_BIT_ATOMICS
-        // We will use 64 bit atomics for force redistribution.
-        #define ADD_FORCE(index, f) addForce(index, f, longForce);
-        #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
-        void addForce(int index, real4 f,  __global long* longForce) {
-            atom_add(&longForce[index], (long) (f.x*0x100000000));
-            atom_add(&longForce[index+PADDED_NUM_ATOMS], (long) (f.y*0x100000000));
-            atom_add(&longForce[index+2*PADDED_NUM_ATOMS], (long) (f.z*0x100000000));
-        }
-        __kernel void addDistributedForces(__global const long* restrict longForces, __global real4* restrict forces) {
-            real scale = 1/(real) 0x100000000;
-            for (int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
-                real4 f = (real4) (scale*longForces[index], scale*longForces[index+PADDED_NUM_ATOMS], scale*longForces[index+2*PADDED_NUM_ATOMS], 0);
-                forces[index] += f;
-            }
-        }
-    #else
-        // 64 bit atomics aren't supported, so we have to use atomic_cmpxchg() for force redistribution.
-        #define ADD_FORCE(index, f) addForce(index, f, force);
-        void atomicAddFloat(__global float* p, float v) {
-            __global int* ip = (__global int*) p;
-            while (true) {
-                union {
-                    float f;
-                    int i;
-                } oldval, newval;
-                oldval.f = *p;
-                newval.f = oldval.f+v;
-                if (atomic_cmpxchg(ip, oldval.i, newval.i) == oldval.i)
-                    return;
-            }
-        }
-        void addForce(int index, float4 f, __global float4* force) {
-            __global float* components = (__global float*) force;
-            atomicAddFloat(&components[4*index], f.x);
-            atomicAddFloat(&components[4*index+1], f.y);
-            atomicAddFloat(&components[4*index+2], f.z);
-        }
-    #endif
-#else
-    // There are no overlapping virtual sites, so we can just store forces directly.
-    #define ADD_FORCE(index, f) force[index].xyz += (f).xyz;
-#endif
-/**
- * Distribute forces from virtual sites to the atoms they are based on.
- */
-__kernel void distributeForces(__global const real4* restrict posq, __global real4* restrict force,
-#ifdef SUPPORTS_64_BIT_ATOMICS
-        __global long* restrict longForce,
-#endif
-#ifdef USE_MIXED_PRECISION
-        __global real4* restrict posqCorrection,
-#endif
-        __global const int4* restrict avg2Atoms, __global const real2* restrict avg2Weights,
-        __global const int4* restrict avg3Atoms, __global const real4* restrict avg3Weights,
-        __global const int4* restrict outOfPlaneAtoms, __global const real4* restrict outOfPlaneWeights,
-        __global const int* restrict localCoordsIndex, __global const int* restrict localCoordsAtoms,
-        __global const real* restrict localCoordsWeights, __global const real4* restrict localCoordsPos,
-        __global const int* restrict localCoordsStartIndex) {
-#ifndef USE_MIXED_PRECISION
-        __global real4* posqCorrection = 0;
-#endif
-    // Two particle average sites.
-    for (int index = get_global_id(0); index < NUM_2_AVERAGE; index += get_global_size(0)) {
-        int4 atoms = avg2Atoms[index];
-        real2 weights = avg2Weights[index];
-        real4 f = force[atoms.x];
-        ADD_FORCE(atoms.y, f*weights.x);
-        ADD_FORCE(atoms.z, f*weights.y);
-    }
-    // Three particle average sites.
-    for (int index = get_global_id(0); index < NUM_3_AVERAGE; index += get_global_size(0)) {
-        int4 atoms = avg3Atoms[index];
-        real4 weights = avg3Weights[index];
-        real4 f = force[atoms.x];
-        ADD_FORCE(atoms.y, f*weights.x);
-        ADD_FORCE(atoms.z, f*weights.y);
-        ADD_FORCE(atoms.w, f*weights.z);
-    }
-    // Out of plane sites.
-    for (int index = get_global_id(0); index < NUM_OUT_OF_PLANE; index += get_global_size(0)) {
-        int4 atoms = outOfPlaneAtoms[index];
-        real4 weights = outOfPlaneWeights[index];
-        mixed4 pos1 = loadPos(posq, posqCorrection, atoms.y);
-        mixed4 pos2 = loadPos(posq, posqCorrection, atoms.z);
-        mixed4 pos3 = loadPos(posq, posqCorrection, atoms.w);
-        mixed4 v12 = pos2-pos1;
-        mixed4 v13 = pos3-pos1;
-        real4 f = force[atoms.x];
-        real4 fp2 = (real4) (weights.x*f.x - weights.z*v13.z*f.y + weights.z*v13.y*f.z,
-                   weights.z*v13.z*f.x + weights.x*f.y - weights.z*v13.x*f.z,
-                  -weights.z*v13.y*f.x + weights.z*v13.x*f.y + weights.x*f.z, 0.0f);
-        real4 fp3 = (real4) (weights.y*f.x + weights.z*v12.z*f.y - weights.z*v12.y*f.z,
-                  -weights.z*v12.z*f.x + weights.y*f.y + weights.z*v12.x*f.z,
-                   weights.z*v12.y*f.x - weights.z*v12.x*f.y + weights.y*f.z, 0.0f);
-        ADD_FORCE(atoms.y, f-fp2-fp3);
-        ADD_FORCE(atoms.z, fp2);
-        ADD_FORCE(atoms.w, fp3);
-    }
-    // Local coordinates sites.
-    for (int index = get_global_id(0); index < NUM_LOCAL_COORDS; index += get_global_size(0)) {
-        int siteAtomIndex = localCoordsIndex[index];
-        int start = localCoordsStartIndex[index];
-        int end = localCoordsStartIndex[index+1];
-        mixed3 origin = 0, xdir = 0, ydir = 0;
-        for (int j = start; j < end; j++) {
-            mixed3 pos = loadPos(posq, posqCorrection, localCoordsAtoms[j]).xyz;
-            origin += pos*localCoordsWeights[3*j];
-            xdir += pos*localCoordsWeights[3*j+1];
-            ydir += pos*localCoordsWeights[3*j+2];
-        }
-        mixed3 zdir = cross(xdir, ydir);
-        mixed normXdir = sqrt(xdir.x*xdir.x+xdir.y*xdir.y+xdir.z*xdir.z);
-        mixed normZdir = sqrt(zdir.x*zdir.x+zdir.y*zdir.y+zdir.z*zdir.z);
-        mixed invNormXdir = (normXdir > 0 ? 1/normXdir : 0);
-        mixed invNormZdir = (normZdir > 0 ? 1/normZdir : 0);
-        mixed3 dx = xdir*invNormXdir;
-        mixed3 dz = zdir*invNormZdir;
-        mixed3 dy = cross(dz, dx);
-        mixed3 localPosition = convert_mixed4(localCoordsPos[index]).xyz;
-        // The derivatives for this case are very complicated.  They were computed with SymPy then simplified by hand.
-        real4 f = force[siteAtomIndex];
-        mixed3 fp1 = localPosition*f.x;
-        mixed3 fp2 = localPosition*f.y;
-        mixed3 fp3 = localPosition*f.z;
-        for (int j = start; j < end; j++) {
-            real originWeight = localCoordsWeights[3*j];
-            real wx = localCoordsWeights[3*j+1];
-            real wy = localCoordsWeights[3*j+2];
-            mixed wxScaled = wx*invNormXdir;
-            mixed t1 = (wx*ydir.x-wy*xdir.x)*invNormZdir;
-            mixed t2 = (wx*ydir.y-wy*xdir.y)*invNormZdir;
-            mixed t3 = (wx*ydir.z-wy*xdir.z)*invNormZdir;
-            mixed sx = t3*dz.y-t2*dz.z;
-            mixed sy = t1*dz.z-t3*dz.x;
-            mixed sz = t2*dz.x-t1*dz.y;
-            real4 fresult = 0;
-            fresult.x += fp1.x*wxScaled*(1-dx.x*dx.x) + fp1.z*(dz.x*sx   ) + fp1.y*((-dx.x*dy.x     )*wxScaled + dy.x*sx - dx.y*t2 - dx.z*t3) + f.x*originWeight;
-            fresult.y += fp1.x*wxScaled*( -dx.x*dx.y) + fp1.z*(dz.x*sy+t3) + fp1.y*((-dx.y*dy.x-dz.z)*wxScaled + dy.x*sy + dx.y*t1);
-            fresult.z += fp1.x*wxScaled*( -dx.x*dx.z) + fp1.z*(dz.x*sz-t2) + fp1.y*((-dx.z*dy.x+dz.y)*wxScaled + dy.x*sz + dx.z*t1);
-            fresult.x += fp2.x*wxScaled*( -dx.y*dx.x) + fp2.z*(dz.y*sx-t3) - fp2.y*(( dx.x*dy.y-dz.z)*wxScaled - dy.y*sx - dx.x*t2);
-            fresult.y += fp2.x*wxScaled*(1-dx.y*dx.y) + fp2.z*(dz.y*sy   ) - fp2.y*(( dx.y*dy.y     )*wxScaled - dy.y*sy + dx.x*t1 + dx.z*t3) + f.y*originWeight;
-            fresult.z += fp2.x*wxScaled*( -dx.y*dx.z) + fp2.z*(dz.y*sz+t1) - fp2.y*(( dx.z*dy.y+dz.x)*wxScaled - dy.y*sz - dx.z*t2);
-            fresult.x += fp3.x*wxScaled*( -dx.z*dx.x) + fp3.z*(dz.z*sx+t2) + fp3.y*((-dx.x*dy.z-dz.y)*wxScaled + dy.z*sx + dx.x*t3);
-            fresult.y += fp3.x*wxScaled*( -dx.z*dx.y) + fp3.z*(dz.z*sy-t1) + fp3.y*((-dx.y*dy.z+dz.x)*wxScaled + dy.z*sy + dx.y*t3);
-            fresult.z += fp3.x*wxScaled*(1-dx.z*dx.z) + fp3.z*(dz.z*sz   ) + fp3.y*((-dx.z*dy.z     )*wxScaled + dy.z*sz - dx.x*t1 - dx.y*t2) + f.z*originWeight;
-            ADD_FORCE(localCoordsAtoms[j], fresult);
-        }
-    }
-}
--- a/platforms/opencl/staticTarget/CMakeLists.txt
+++ b/platforms/opencl/staticTarget/CMakeLists.txt
@@ -4,16 +4,16 @@
 # INCLUDE(${CMAKE_CURRENT_SOURCE_DIR}/../FindOpenCL.cmake)
 INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})
-FILE(GLOB OPENCL_KERNELS ${CL_SOURCE_DIR}/kernels/*.cl)
+FILE(GLOB OPENCL_KERNELS ${KERNEL_SOURCE_DIR}/kernels/*.cl)
-ADD_CUSTOM_COMMAND(OUTPUT ${CL_KERNELS_CPP} ${CL_KERNELS_H}
+ADD_CUSTOM_COMMAND(OUTPUT ${KERNELS_CPP} ${KERNELS_H}
    COMMAND ${CMAKE_COMMAND}
-    ARGS -D CL_SOURCE_DIR=${CL_SOURCE_DIR} -D CL_KERNELS_CPP=${CL_KERNELS_CPP} -D CL_KERNELS_H=${CL_KERNELS_H} -D CL_SOURCE_CLASS=${CL_SOURCE_CLASS} -P ${CMAKE_CURRENT_SOURCE_DIR}/../EncodeCLFiles.cmake
+    ARGS -D KERNEL_SOURCE_DIR=${KERNEL_SOURCE_DIR} -D KERNELS_CPP=${KERNELS_CPP} -D KERNELS_H=${KERNELS_H} -D KERNEL_SOURCE_CLASS=${KERNEL_SOURCE_CLASS} -D KERNEL_FILE_EXTENSION=cl -P ${CMAKE_SOURCE_DIR}/cmake_modules/EncodeKernelFiles.cmake
    DEPENDS ${OPENCL_KERNELS}
 )
-SET_SOURCE_FILES_PROPERTIES(${CL_KERNELS_CPP} ${CL_KERNELS_H} PROPERTIES GENERATED TRUE)
+SET_SOURCE_FILES_PROPERTIES(${KERNELS_CPP} ${KERNELS_H} PROPERTIES GENERATED TRUE)
 ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
 TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${OPENMM_LIBRARY_NAME}  ${OPENCL_LIBRARIES} ${PTHREADS_LIB_STATIC})
-SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_OPENCL_BUILDING_STATIC_LIBRARY")
+SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_COMMON_BUILDING_STATIC_LIBRARY")
 INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${STATIC_TARGET})
--- a/platforms/opencl/tests/TestOpenCLDeviceQuery.cpp
+++ b/platforms/opencl/tests/TestOpenCLDeviceQuery.cpp
@@ -47,7 +47,13 @@ int main() {
    for (int j = 0; j < platforms.size(); j++) {
        vector<cl::Device> devices;
-        platforms[j].getDevices(CL_DEVICE_TYPE_ALL, &devices);
+        try {
+            platforms[j].getDevices(CL_DEVICE_TYPE_ALL, &devices);
+        }
+        catch (...) {
+            // There are no devices available for this platform.
+            continue;
+        }
        for (int i = 0; i < devices.size(); i++) {
            cl::Device d = devices[i];

--- a/platforms/opencl/tests/TestOpenCLBAOABLangevinIntegrator.cpp
+++ b/platforms/opencl/tests/TestOpenCLBAOABLangevinIntegrator.cpp
@@ -30,7 +30,7 @@
 * -------------------------------------------------------------------------- */
 #include "OpenCLTests.h"
-#include "TestBAOABLangevinIntegrator.h"
+#include "TestLangevinMiddleIntegrator.h"
 void runPlatformTests() {
 }
--- a/platforms/opencl/tests/TestOpenCLNoseHooverIntegrator.cpp
+++ b/platforms/opencl/tests/TestOpenCLNoseHooverIntegrator.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2019 Stanford University and the Authors.           *
+ * Authors: Andreas Krämer and Andrew C. Simmmonett                           *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+#include "OpenCLTests.h"
+#include "TestNoseHooverIntegrator.h"
+void runPlatformTests() {
+}
--- a/platforms/opencl/tests/TestOpenCLNoseHooverThermostat.cpp
+++ b/platforms/opencl/tests/TestOpenCLNoseHooverThermostat.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2019 Stanford University and the Authors.           *
+ * Authors: Andreas Krämer and Andrew C. Simmonett                            *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+#include "OpenCLTests.h"
+#include "TestNoseHooverThermostat.h"
+void runPlatformTests() {
+}
--- a/platforms/reference/include/ReferenceConstraints.h
+++ b/platforms/reference/include/ReferenceConstraints.h
@@ -61,7 +61,7 @@ public:
     * Apply the constraint algorithm to velocities.
     * 
     * @param atomCoordinates  the atom coordinates
-     * @param atomCoordinatesP the velocities to modify
+     * @param velocities       the velocities to modify
     * @param inverseMasses    1/mass
     * @param tolerance        the constraint tolerance
     */

--- a/platforms/reference/include/ReferenceKernels.h
+++ b/platforms/reference/include/ReferenceKernels.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2019 Stanford University and the Authors.      *
+ * Portions copyright (c) 2008-2020 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -45,7 +45,7 @@ namespace OpenMM {
 class ReferenceObc;
 class ReferenceAndersenThermostat;
-class ReferenceBAOABDynamics;
+class ReferenceLangevinMiddleDynamics;
 class ReferenceCustomBondIxn;
 class ReferenceCustomAngleIxn;
 class ReferenceCustomTorsionIxn;
@@ -59,7 +59,9 @@ class ReferenceGayBerneForce;
 class ReferenceBrownianDynamics;
 class ReferenceStochasticDynamics;
 class ReferenceConstraintAlgorithm;
+class ReferenceNoseHooverChain;
 class ReferenceMonteCarloBarostat;
+class ReferenceVelocityVerletDynamics;
 class ReferenceVariableStochasticDynamics;
 class ReferenceVariableVerletDynamics;
 class ReferenceVerletDynamics;
@@ -629,7 +631,7 @@ private:
    std::map<std::pair<std::string, int>, std::array<double, 3> > particleParamOffsets, exceptionParamOffsets;
    double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldDispersionAlpha, dispersionCoefficient;
    int kmax[3], gridSize[3], dispersionGridSize[3];
-    bool useSwitchingFunction;
+    bool useSwitchingFunction, exceptionsArePeriodic;
    std::vector<std::set<int> > exclusions;
    NonbondedMethod nonbondedMethod;
    NeighborList* neighborList;
@@ -1132,6 +1134,45 @@ private:
    ReferenceVerletDynamics* dynamics;
    std::vector<double> masses;
    double prevStepSize;
+}
+;
+/**
+ * This kernel is invoked by NoseHooverIntegrator to take one time step.
+ */
+class ReferenceIntegrateVelocityVerletStepKernel : public IntegrateVelocityVerletStepKernel {
+public:
+    ReferenceIntegrateVelocityVerletStepKernel(std::string name, const Platform& platform, ReferencePlatform::PlatformData& data) : IntegrateVelocityVerletStepKernel(name, platform),
+        data(data), dynamics(0) {
+    }
+    ~ReferenceIntegrateVelocityVerletStepKernel();
+    /**
+     * Initialize the kernel.
+     * 
+     * @param system     the System this kernel will be applied to
+     * @param integrator the NoseHooverIntegrator this kernel will be used for
+     */
+    void initialize(const System& system, const NoseHooverIntegrator& integrator);
+    /**
+     * Execute the kernel.
+     * 
+     * @param context    the context in which to execute this kernel
+     * @param integrator the VerletIntegrator this kernel is being used for
+     * @param forcesAreValid a reference to the parent integrator's boolean for keeping
+     *                       track of the validity of the current forces.
+     */
+    void execute(ContextImpl& context, const NoseHooverIntegrator& integrator, bool &forcesAreValid);
+    /**
+     * Compute the kinetic energy.
+     * 
+     * @param context    the context in which to execute this kernel
+     * @param integrator the NoseHooverIntegrator this kernel is being used for
+     */
+    double computeKineticEnergy(ContextImpl& context, const NoseHooverIntegrator& integrator);
+private:
+    ReferencePlatform::PlatformData& data;
+    ReferenceVelocityVerletDynamics* dynamics;
+    std::vector<double> masses;
+    double prevStepSize;
 };
 /**
@@ -1172,42 +1213,38 @@ private:
 };
 /**
- * This kernel is invoked by BAOABLangevinIntegrator to take one time step.
+ * This kernel is invoked by LangevinMiddleIntegrator to take one time step.
 */
-class ReferenceIntegrateBAOABStepKernel : public IntegrateBAOABStepKernel {
+class ReferenceIntegrateLangevinMiddleStepKernel : public IntegrateLangevinMiddleStepKernel {
 public:
-    ReferenceIntegrateBAOABStepKernel(std::string name, const Platform& platform, ReferencePlatform::PlatformData& data) : IntegrateBAOABStepKernel(name, platform),
+    ReferenceIntegrateLangevinMiddleStepKernel(std::string name, const Platform& platform, ReferencePlatform::PlatformData& data) : IntegrateLangevinMiddleStepKernel(name, platform),
        data(data), dynamics(0) {
    }
-    ~ReferenceIntegrateBAOABStepKernel();
+    ~ReferenceIntegrateLangevinMiddleStepKernel();
    /**
     * Initialize the kernel, setting up the particle masses.
     * 
     * @param system     the System this kernel will be applied to
-     * @param integrator the BAOABLangevinIntegrator this kernel will be used for
+     * @param integrator the LangevinMiddleIntegrator this kernel will be used for
     */
-    void initialize(const System& system, const BAOABLangevinIntegrator& integrator);
+    void initialize(const System& system, const LangevinMiddleIntegrator& integrator);
    /**
     * Execute the kernel.
     * 
     * @param context    the context in which to execute this kernel
-     * @param integrator the BAOABLangevinIntegrator this kernel is being used for
+     * @param integrator the LangevinMiddleIntegrator this kernel is being used for
-     * @param forcesAreValid if the context has been modified since the last time step, this will be
-     *                       false to show that cached forces are invalid and must be recalculated.
-     *                       On exit, this should specify whether the cached forces are valid at the
-     *                       end of the step.
     */
-    void execute(ContextImpl& context, const BAOABLangevinIntegrator& integrator, bool& forcesAreValid);
+    void execute(ContextImpl& context, const LangevinMiddleIntegrator& integrator);
    /**
     * Compute the kinetic energy.
     * 
     * @param context    the context in which to execute this kernel
-     * @param integrator the BAOABLangevinIntegrator this kernel is being used for
+     * @param integrator the LangevinMiddleIntegrator this kernel is being used for
     */
-    double computeKineticEnergy(ContextImpl& context, const BAOABLangevinIntegrator& integrator);
+    double computeKineticEnergy(ContextImpl& context, const LangevinMiddleIntegrator& integrator);
 private:
    ReferencePlatform::PlatformData& data;
-    ReferenceBAOABDynamics* dynamics;
+    ReferenceLangevinMiddleDynamics* dynamics;
    std::vector<double> masses;
    double prevTemp, prevFriction, prevStepSize;
 };
@@ -1429,6 +1466,62 @@ private:
    std::vector<double> masses;
 };
+/**
+ * This kernel is invoked by NoseHooverChain at the start of each time step to adjust the thermostat
+ * and update the associated particle velocities.
+ */
+class ReferenceNoseHooverChainKernel : public NoseHooverChainKernel {
+public:
+    ReferenceNoseHooverChainKernel(std::string name, const Platform& platform) : NoseHooverChainKernel(name, platform), chainPropagator(0) {
+    }
+    ~ReferenceNoseHooverChainKernel();
+    /**
+     * Initialize the kernel.
+     */
+    virtual void initialize();
+    /**
+     * Execute the kernel that propagates the Nose Hoover chain and determines the velocity scale factor.
+     * 
+     * @param context  the context in which to execute this kernel
+     * @param noseHooverChain the object describing the chain to be propagated.
+     * @param kineticEnergy the {absolute, relative} kinetic energies of the particles being thermostated by this chain.
+     * @param timeStep the time step used by the integrator.
+     * @return the velocity scale factors to apply to the {absolute, relative} motion of particles associated with this heat bath.
+     */
+    virtual std::pair<double, double> propagateChain(ContextImpl& context, const NoseHooverChain &nhc, std::pair<double, double> kineticEnergy, double timeStep);
+    /**
+     * Execute the kernal that computes the total (kinetic + potential) heat bath energy.
+     *
+     * @param context the context in which to execute this kernel
+     * @param noseHooverChain the chain whose energy is to be determined.
+     * @return the total heat bath energy.
+     */
+    virtual double computeHeatBathEnergy(ContextImpl& context, const NoseHooverChain &nhc);
+    /**
+     * Execute the kernel that computes the kinetic energy for a subset of atoms,
+     * or the relative kinetic energy of Drude particles with respect to their parent atoms
+     *
+     * @param context the context in which to execute this kernel
+     * @param noseHooverChain the chain whose energy is to be determined.
+     * @param downloadValue whether the computed value should be downloaded and returned.
+     *
+     */
+     virtual std::pair<double, double> computeMaskedKineticEnergy(ContextImpl& context, const NoseHooverChain &noseHooverChain, bool downloadValue);
+    /**
+     * Execute the kernel that scales the velocities of particles associated with a nose hoover chain
+     *
+     * @param context the context in which to execute this kernel
+     * @param noseHooverChain the chain whose energy is to be determined.
+     * @param scaleFactor the multiplicative factor by which {absolute, relative} velocities are scaled.
+     */
+    virtual void scaleVelocities(ContextImpl& context, const NoseHooverChain &noseHooverChain, std::pair<double, double> scaleFactor);
+private:
+    ReferenceNoseHooverChain* chainPropagator;
+};
 /**
 * This kernel is invoked by MonteCarloBarostat to adjust the periodic box volume
 */

--- a/platforms/reference/include/ReferenceLJCoulomb14.h
+++ b/platforms/reference/include/ReferenceLJCoulomb14.h
@@ -32,40 +32,53 @@ namespace OpenMM {
 class OPENMM_EXPORT ReferenceLJCoulomb14 : public ReferenceBondIxn {
-   public:
+public:
-      /**---------------------------------------------------------------------------------------
-         Constructor
-         --------------------------------------------------------------------------------------- */
-       ReferenceLJCoulomb14();
-      /**---------------------------------------------------------------------------------------
-         Destructor
-         --------------------------------------------------------------------------------------- */
-       ~ReferenceLJCoulomb14();
-      /**---------------------------------------------------------------------------------------
-         Calculate Ryckaert-Bellemans bond ixn
-         @param atomIndices      atom indices of 4 atoms in bond
-         @param atomCoordinates  atom coordinates
-         @param parameters       six RB parameters
-         @param forces           force array (forces added to current values)
-         @param totalEnergy      if not null, the energy will be added to this
-         --------------------------------------------------------------------------------------- */
-      void calculateBondIxn(std::vector<int>& atomIndices, std::vector<OpenMM::Vec3>& atomCoordinates,
-                            std::vector<double>& parameters, std::vector<OpenMM::Vec3>& forces,
-                            double* totalEnergy, double* energyParamDerivs);
+    /**---------------------------------------------------------------------------------------
+       Constructor
+       --------------------------------------------------------------------------------------- */
+     ReferenceLJCoulomb14();
+    /**---------------------------------------------------------------------------------------
+       Destructor
+       --------------------------------------------------------------------------------------- */
+     ~ReferenceLJCoulomb14();
+     /**---------------------------------------------------------------------------------------
+       Set the force to use periodic boundary conditions.
+       @param vectors    the vectors defining the periodic box
+       --------------------------------------------------------------------------------------- */
+    void setPeriodic(OpenMM::Vec3* vectors);
+    /**---------------------------------------------------------------------------------------
+       Calculate nonbonded 1-4 interactinos
+       @param atomIndices      atom indices of the atoms in each pair
+       @param atomCoordinates  atom coordinates
+       @param parameters       (sigma, 4*epsilon, charge product) for each pair
+       @param forces           force array (forces added to current values)
+       @param totalEnergy      if not null, the energy will be added to this
+       --------------------------------------------------------------------------------------- */
+    void calculateBondIxn(std::vector<int>& atomIndices, std::vector<OpenMM::Vec3>& atomCoordinates,
+                          std::vector<double>& parameters, std::vector<OpenMM::Vec3>& forces,
+                          double* totalEnergy, double* energyParamDerivs);
+private:
+    bool periodic;
+    OpenMM::Vec3 periodicBoxVectors[3];
 };
 } // namespace OpenMM

--- a/platforms/reference/include/ReferenceBAOABDynamics.h
+++ b/platforms/reference/include/ReferenceBAOABDynamics.h
-/* Portions copyright (c) 2006-2019 Stanford University and Simbios.
+/* Portions copyright (c) 2006-2020 Stanford University and Simbios.
 * Contributors: Pande Group
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -22,8 +22,8 @@
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
-#ifndef __ReferenceBAOABDynamics_H__
+#ifndef __ReferenceLangevinMiddleDynamics_H__
-#define __ReferenceBAOABDynamics_H__
+#define __ReferenceLangevinMiddleDynamics_H__
 #include "ReferenceDynamics.h"
 #include "openmm/internal/ContextImpl.h"
@@ -31,7 +31,7 @@
 namespace OpenMM {
-class OPENMM_EXPORT ReferenceBAOABDynamics : public ReferenceDynamics {
+class OPENMM_EXPORT ReferenceLangevinMiddleDynamics : public ReferenceDynamics {
   protected:
@@ -52,7 +52,7 @@ class OPENMM_EXPORT ReferenceBAOABDynamics : public ReferenceDynamics {
         --------------------------------------------------------------------------------------- */
-       ReferenceBAOABDynamics(int numberOfAtoms, double deltaT, double friction, double temperature);
+       ReferenceLangevinMiddleDynamics(int numberOfAtoms, double deltaT, double friction, double temperature);
      /**---------------------------------------------------------------------------------------
@@ -60,7 +60,7 @@ class OPENMM_EXPORT ReferenceBAOABDynamics : public ReferenceDynamics {
         --------------------------------------------------------------------------------------- */
-       ~ReferenceBAOABDynamics();
+       ~ReferenceLangevinMiddleDynamics();
      /**---------------------------------------------------------------------------------------
@@ -79,29 +79,25 @@ class OPENMM_EXPORT ReferenceBAOABDynamics : public ReferenceDynamics {
         @param atomCoordinates     atom coordinates
         @param velocities          velocities
         @param masses              atom masses
-         @param forcesAreValid      whether the current forces are valid or need to be recomputed
         @param tolerance           the constraint tolerance
         --------------------------------------------------------------------------------------- */
      void update(OpenMM::ContextImpl& context, std::vector<OpenMM::Vec3>& atomCoordinates,
-                  std::vector<OpenMM::Vec3>& velocities, std::vector<double>& masses, bool& forcesAreValid, double tolerance);
+                  std::vector<OpenMM::Vec3>& velocities, std::vector<double>& masses, double tolerance);
      /**---------------------------------------------------------------------------------------
         First update; based on code in update.c do_update_sd() Gromacs 3.1.4
         @param numberOfAtoms       number of atoms
-         @param atomCoordinates     atom coordinates
         @param velocities          velocities
         @param forces              forces
         @param inverseMasses       inverse atom masses
-         @param xPrime              xPrime
         --------------------------------------------------------------------------------------- */
-      virtual void updatePart1(int numberOfAtoms, std::vector<OpenMM::Vec3>& atomCoordinates, std::vector<OpenMM::Vec3>& velocities,
+      virtual void updatePart1(int numberOfAtoms, std::vector<OpenMM::Vec3>& velocities,  std::vector<OpenMM::Vec3>& forces, std::vector<double>& inverseMasses);
-                       std::vector<OpenMM::Vec3>& forces, std::vector<double>& inverseMasses, std::vector<OpenMM::Vec3>& xPrime);
      /**---------------------------------------------------------------------------------------
@@ -116,7 +112,7 @@ class OPENMM_EXPORT ReferenceBAOABDynamics : public ReferenceDynamics {
         --------------------------------------------------------------------------------------- */
      virtual void updatePart2(int numberOfAtoms, std::vector<OpenMM::Vec3>& atomCoordinates, std::vector<OpenMM::Vec3>& velocities,
-                       std::vector<double>& inverseMasses, std::vector<OpenMM::Vec3>& xPrime);
+                               std::vector<double>& inverseMasses, std::vector<OpenMM::Vec3>& xPrime);
      /**---------------------------------------------------------------------------------------
@@ -126,16 +122,15 @@ class OPENMM_EXPORT ReferenceBAOABDynamics : public ReferenceDynamics {
         @param numberOfAtoms       number of atoms
         @param atomCoordinates     atom coordinates
         @param velocities          velocities
-         @param forces              forces
         @param inverseMasses       inverse atom masses
         @param xPrime              xPrime
         --------------------------------------------------------------------------------------- */
      virtual void updatePart3(OpenMM::ContextImpl& context, int numberOfAtoms, std::vector<OpenMM::Vec3>& atomCoordinates, std::vector<OpenMM::Vec3>& velocities,
-                       std::vector<OpenMM::Vec3>& forces, std::vector<double>& inverseMasses, std::vector<OpenMM::Vec3>& xPrime);
+                               std::vector<double>& inverseMasses, std::vector<OpenMM::Vec3>& xPrime);
 };
 } // namespace OpenMM
-#endif // __ReferenceBAOABDynamics_H__
+#endif // __ReferenceLangevinMiddleDynamics_H__
--- a/platforms/reference/include/ReferenceNoseHooverChain.h
+++ b/platforms/reference/include/ReferenceNoseHooverChain.h
+/* Portions copyright (c) 2019 Stanford University and Simbios.
+ * Contributors: Andreas Krämer and Andrew C. Simmonett
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject
+ * to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __ReferenceNoseHooverChain_H__
+#define __ReferenceNoseHooverChain_H__
+#include "openmm/Vec3.h"
+#include <vector>
+namespace OpenMM {
+using std::vector;
+class ReferenceNoseHooverChain {
+   private:
+   public:
+      /**---------------------------------------------------------------------------------------
+         Constructor
+         --------------------------------------------------------------------------------------- */
+       ReferenceNoseHooverChain();
+      /**---------------------------------------------------------------------------------------
+         Destructor
+         --------------------------------------------------------------------------------------- */
+       ~ReferenceNoseHooverChain();
+      /**---------------------------------------------------------------------------------------
+         Propagate the Nose-Hoover chain a half timestep and find the appropriate velocity scaling
+         @param kineticEnergy      the instantaneous kinetic energy of the particles being thermostated
+         @param chainVelocities    the velocities of the chain's beads in nm / ps
+         @param chainPositions     the positions of the chains's beads in nm
+         @param numDOFs            the number of degrees of freedom in the system that this chain thermostats
+         @param temperature        thermostat temperature in Kelvin
+         @param collisionFrequency collision frequency for each atom in ps^-1
+         @param timeStep           full integration step size in ps (this only propagates half way)
+         @param numMTS             number of multi timestep increments used in the Trotter expansion
+         @param YSWeights          vector of weights used in the Yoshida-Suzuki multi-timestepping.
+         --------------------------------------------------------------------------------------- */
+      double propagate(double kineticEnergy, vector<double>& chainVelocities,
+                       vector<double>& chainPositions, int numDOFs,
+                       double temperature, double collisionFrequency, double timeStep,
+                       int numMTS, const vector<double>& YSWeights) const;
+};
+} // namespace OpenMM
+#endif // __ReferenceNoseHooverChain_H__
--- a/platforms/reference/include/ReferencePlatform.h
+++ b/platforms/reference/include/ReferencePlatform.h
@@ -72,6 +72,8 @@ public:
    Vec3* periodicBoxVectors;
    ReferenceConstraints* constraints;
    std::map<std::string, double>* energyParameterDerivatives;
+    std::vector<std::vector<double>>* noseHooverPositions;
+    std::vector<std::vector<double>>* noseHooverVelocities;
 };
 } // namespace OpenMM

--- a/platforms/reference/include/ReferenceSETTLEAlgorithm.h
+++ b/platforms/reference/include/ReferenceSETTLEAlgorithm.h
@@ -75,7 +75,7 @@ public:
     * Apply the constraint algorithm to velocities.
     * 
     * @param atomCoordinates  the atom coordinates
-     * @param atomCoordinatesP the velocities to modify
+     * @param velocities       the velocities to modify
     * @param inverseMasses    1/mass
     * @param tolerance        the constraint tolerance
     */

--- a/platforms/reference/include/ReferenceVelocityVerletDynamics.h
+++ b/platforms/reference/include/ReferenceVelocityVerletDynamics.h
+/* Portions copyright (c) 2006-2012 Stanford University and Simbios.
+ * Contributors: Pande Group
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject
+ * to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __ReferenceVelocityVerletDynamics_H__
+#define __ReferenceVelocityVerletDynamics_H__
+#include "ReferenceDynamics.h"
+namespace OpenMM {
+class ContextImpl;
+class ReferenceVelocityVerletDynamics : public ReferenceDynamics {
+   private:
+      std::vector<OpenMM::Vec3> xPrime;
+      std::vector<double> inverseMasses;
+   public:
+      /**---------------------------------------------------------------------------------------
+         Constructor
+         @param numberOfAtoms  number of atoms
+         @param deltaT         delta t for dynamics
+         @param friction       friction coefficient
+         @param temperature    temperature
+         --------------------------------------------------------------------------------------- */
+       ReferenceVelocityVerletDynamics(int numberOfAtoms, double deltaT);
+      /**---------------------------------------------------------------------------------------
+         Destructor
+         --------------------------------------------------------------------------------------- */
+       ~ReferenceVelocityVerletDynamics();
+      /**---------------------------------------------------------------------------------------
+         Update
+         @param system              the System to be integrated
+         @param atomCoordinates     atom coordinates
+         @param velocities          velocities
+         @param forces              forces
+         @param masses              atom masses
+         @param tolerance           the constraint tolerance
+         @param forcesAreValid      whether the forces are valid (duh!)
+         @param allAtoms            a list of all atoms not involved in a Drude-like pair
+         @param allPairs            a list of all Drude-like pairs, and their KT values, in the system
+         @param maxPairDistance     the maximum separation allowed for a Drude-like pair
+         --------------------------------------------------------------------------------------- */
+      void update(OpenMM::ContextImpl &context, const OpenMM::System& system, std::vector<OpenMM::Vec3>& atomCoordinates,
+                  std::vector<OpenMM::Vec3>& velocities, std::vector<OpenMM::Vec3>& forces, std::vector<double>& masses, double tolerance, bool &forcesAreValid,
+                  const std::vector<int> & allAtoms, const std::vector<std::tuple<int, int, double>> & allPairs, double maxPairDistance);
+};
+} // namespace OpenMM
+#endif // __ReferenceVelocityVerletDynamics_H__