Continuing to implement OpenCL CustomIntegrator: added velocity constraints...

Continuing to implement OpenCL CustomIntegrator: added velocity constraints (not yet fully debugged)

Continuing to implement OpenCL CustomIntegrator: added velocity constraints...
Continuing to implement OpenCL CustomIntegrator: added velocity constraints (not yet fully debugged)
12c95e78 · Peter Eastman · 08021b4e · 12c95e78 · 12c95e78 · 12c95e78
Commit 12c95e78 authored Jan 11, 2012 by Peter Eastman
7 changed files
--- a/platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+++ b/platforms/opencl/src/OpenCLIntegrationUtilities.cpp
@@ -91,7 +91,7 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
        random(NULL), randomSeed(NULL), randomPos(0), stepSize(NULL), ccmaAtoms(NULL), ccmaDistance(NULL),
        ccmaReducedMass(NULL), ccmaAtomConstraints(NULL), ccmaNumAtomConstraints(NULL), ccmaConstraintMatrixColumn(NULL),
        ccmaConstraintMatrixValue(NULL), ccmaDelta1(NULL), ccmaDelta2(NULL), ccmaConverged(NULL),
-        ccmaConvergedBuffer(NULL), hasInitializedConstraintKernels(false) {
+        ccmaConvergedBuffer(NULL), hasInitializedPosConstraintKernels(false), hasInitializedVelConstraintKernels(false) {
    // Create workspace arrays.

    posDelta = new OpenCLArray<mm_float4>(context, context.getPaddedNumAtoms(), "posDelta");
@@ -101,10 +101,15 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c

    // Create kernels for enforcing constraints.

+    map<string, string> velocityDefines;
+    velocityDefines["CONSTRAIN_VELOCITIES"] = "1";
    cl::Program settleProgram = context.createProgram(OpenCLKernelSources::settle);
-    settleKernel = cl::Kernel(settleProgram, "applySettle");
+    settlePosKernel = cl::Kernel(settleProgram, "applySettle");
+    settleVelKernel = cl::Kernel(settleProgram, "constrainVelocities");
    cl::Program shakeProgram = context.createProgram(OpenCLKernelSources::shakeHydrogens);
-    shakeKernel = cl::Kernel(shakeProgram, "applyShakeToHydrogens");
+    shakePosKernel = cl::Kernel(shakeProgram, "applyShakeToHydrogens");
+    shakeProgram = context.createProgram(OpenCLKernelSources::shakeHydrogens, velocityDefines);
+    shakeVelKernel = cl::Kernel(shakeProgram, "applyShakeToHydrogens");

    // Record the set of constraints and how many constraints each atom is involved in.

@@ -502,9 +507,13 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
        defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(numAtoms);
        cl::Program ccmaProgram = context.createProgram(OpenCLKernelSources::ccma, defines);
        ccmaDirectionsKernel = cl::Kernel(ccmaProgram, "computeConstraintDirections");
-        ccmaForceKernel = cl::Kernel(ccmaProgram, "computeConstraintForce");
+        ccmaPosForceKernel = cl::Kernel(ccmaProgram, "computeConstraintForce");
        ccmaMultiplyKernel = cl::Kernel(ccmaProgram, "multiplyByConstraintMatrix");
-        ccmaUpdateKernel = cl::Kernel(ccmaProgram, "updateAtomPositions");
+        ccmaPosUpdateKernel = cl::Kernel(ccmaProgram, "updateAtomPositions");
+        defines["CONSTRAIN_VELOCITIES"] = "1";
+        ccmaProgram = context.createProgram(OpenCLKernelSources::ccma, defines);
+        ccmaVelForceKernel = cl::Kernel(ccmaProgram, "computeConstraintForce");
+        ccmaVelUpdateKernel = cl::Kernel(ccmaProgram, "updateAtomPositions");
    }
 }

@@ -550,39 +559,63 @@ OpenCLIntegrationUtilities::~OpenCLIntegrationUtilities() {
 }

 void OpenCLIntegrationUtilities::applyConstraints(double tol) {
+    applyConstraints(false, tol);
+}
+
+void OpenCLIntegrationUtilities::applyVelocityConstraints(double tol) {
+    applyConstraints(true, tol);
+}
+
+void OpenCLIntegrationUtilities::applyConstraints(bool constrainVelocities, double tol) {
+    bool hasInitialized;
+    cl::Kernel settleKernel, shakeKernel, ccmaForceKernel, ccmaUpdateKernel;
+    if (constrainVelocities) {
+        hasInitialized = hasInitializedVelConstraintKernels;
+        settleKernel = settleVelKernel;
+        shakeKernel = shakeVelKernel;
+        ccmaForceKernel = ccmaVelForceKernel;
+        ccmaUpdateKernel = ccmaVelUpdateKernel;
+        hasInitializedVelConstraintKernels = true;
+    }
+    else {
+        hasInitialized = hasInitializedPosConstraintKernels;
+        settleKernel = settlePosKernel;
+        shakeKernel = shakePosKernel;
+        ccmaForceKernel = ccmaPosForceKernel;
+        ccmaUpdateKernel = ccmaPosUpdateKernel;
+        hasInitializedPosConstraintKernels = true;
+    }
    if (settleAtoms != NULL) {
-        if (!hasInitializedConstraintKernels) {
+        if (!hasInitialized) {
            settleKernel.setArg<cl_int>(0, settleAtoms->getSize());
            settleKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
            settleKernel.setArg<cl::Buffer>(3, posDelta->getDeviceBuffer());
-            settleKernel.setArg<cl::Buffer>(4, posDelta->getDeviceBuffer());
-            settleKernel.setArg<cl::Buffer>(5, context.getVelm().getDeviceBuffer());
-            settleKernel.setArg<cl::Buffer>(6, settleAtoms->getDeviceBuffer());
-            settleKernel.setArg<cl::Buffer>(7, settleParams->getDeviceBuffer());
+            settleKernel.setArg<cl::Buffer>(4, context.getVelm().getDeviceBuffer());
+            settleKernel.setArg<cl::Buffer>(5, settleAtoms->getDeviceBuffer());
+            settleKernel.setArg<cl::Buffer>(6, settleParams->getDeviceBuffer());
        }
        settleKernel.setArg<cl_float>(1, (cl_float) tol);
        context.executeKernel(settleKernel, settleAtoms->getSize());
    }
    if (shakeAtoms != NULL) {
-        if (!hasInitializedConstraintKernels) {
+        if (!hasInitialized) {
            shakeKernel.setArg<cl_int>(0, shakeAtoms->getSize());
            shakeKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
-            shakeKernel.setArg<cl::Buffer>(3, posDelta->getDeviceBuffer());
-            shakeKernel.setArg<cl::Buffer>(4, posDelta->getDeviceBuffer());
-            shakeKernel.setArg<cl::Buffer>(5, shakeAtoms->getDeviceBuffer());
-            shakeKernel.setArg<cl::Buffer>(6, shakeParams->getDeviceBuffer());
+            shakeKernel.setArg<cl::Buffer>(3, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta->getDeviceBuffer());
+            shakeKernel.setArg<cl::Buffer>(4, shakeAtoms->getDeviceBuffer());
+            shakeKernel.setArg<cl::Buffer>(5, shakeParams->getDeviceBuffer());
        }
        shakeKernel.setArg<cl_float>(1, (cl_float) tol);
        context.executeKernel(shakeKernel, shakeAtoms->getSize());
    }
    if (ccmaAtoms != NULL) {
-        if (!hasInitializedConstraintKernels) {
+        if (!hasInitialized) {
            ccmaDirectionsKernel.setArg<cl::Buffer>(0, ccmaAtoms->getDeviceBuffer());
            ccmaDirectionsKernel.setArg<cl::Buffer>(1, ccmaDistance->getDeviceBuffer());
            ccmaDirectionsKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
            ccmaForceKernel.setArg<cl::Buffer>(0, ccmaAtoms->getDeviceBuffer());
            ccmaForceKernel.setArg<cl::Buffer>(1, ccmaDistance->getDeviceBuffer());
-            ccmaForceKernel.setArg<cl::Buffer>(2, posDelta->getDeviceBuffer());
+            ccmaForceKernel.setArg<cl::Buffer>(2, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta->getDeviceBuffer());
            ccmaForceKernel.setArg<cl::Buffer>(3, ccmaReducedMass->getDeviceBuffer());
            ccmaForceKernel.setArg<cl::Buffer>(4, ccmaDelta1->getDeviceBuffer());
            ccmaForceKernel.setArg<cl::Buffer>(5, ccmaConverged->getDeviceBuffer());
@@ -594,7 +627,7 @@ void OpenCLIntegrationUtilities::applyConstraints(double tol) {
            ccmaUpdateKernel.setArg<cl::Buffer>(0, ccmaNumAtomConstraints->getDeviceBuffer());
            ccmaUpdateKernel.setArg<cl::Buffer>(1, ccmaAtomConstraints->getDeviceBuffer());
            ccmaUpdateKernel.setArg<cl::Buffer>(2, ccmaDistance->getDeviceBuffer());
-            ccmaUpdateKernel.setArg<cl::Buffer>(3, posDelta->getDeviceBuffer());
+            ccmaUpdateKernel.setArg<cl::Buffer>(3, constrainVelocities ? context.getVelm().getDeviceBuffer() : posDelta->getDeviceBuffer());
            ccmaUpdateKernel.setArg<cl::Buffer>(4, context.getVelm().getDeviceBuffer());
            ccmaUpdateKernel.setArg<cl::Buffer>(5, ccmaDelta1->getDeviceBuffer());
            ccmaUpdateKernel.setArg<cl::Buffer>(6, ccmaDelta2->getDeviceBuffer());
@@ -625,7 +658,6 @@ void OpenCLIntegrationUtilities::applyConstraints(double tol) {
            }
        }
    }
-    hasInitializedConstraintKernels = true;
 }

 void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSeed) {

--- a/platforms/opencl/src/OpenCLIntegrationUtilities.h
+++ b/platforms/opencl/src/OpenCLIntegrationUtilities.h
@@ -67,6 +67,12 @@ public:
     * @param tol             the constraint tolerance
     */
    void applyConstraints(double tol);
+    /**
+     * Apply constraints to the atom velocities.
+     *
+     * @param tol             the constraint tolerance
+     */
+    void applyVelocityConstraints(double tol);
    /**
     * Initialize the random number generator.
     */
@@ -79,13 +85,14 @@ public:
     */
    int prepareRandomNumbers(int numValues);
 private:
+    void applyConstraints(bool constrainVelocities, double tol);
    OpenCLContext& context;
-    cl::Kernel settleKernel;
-    cl::Kernel shakeKernel;
+    cl::Kernel settlePosKernel, settleVelKernel;
+    cl::Kernel shakePosKernel, shakeVelKernel;
    cl::Kernel ccmaDirectionsKernel;
-    cl::Kernel ccmaForceKernel;
+    cl::Kernel ccmaPosForceKernel, ccmaVelForceKernel;
    cl::Kernel ccmaMultiplyKernel;
-    cl::Kernel ccmaUpdateKernel;
+    cl::Kernel ccmaPosUpdateKernel, ccmaVelUpdateKernel;
    cl::Kernel randomKernel;
    OpenCLArray<mm_float4>* posDelta;
    OpenCLArray<mm_int4>* settleAtoms;
@@ -109,7 +116,7 @@ private:
    cl_int* ccmaConvergedMemory;
    int randomPos;
    int lastSeed;
-    bool hasInitializedConstraintKernels;
+    bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels;
    struct ShakeCluster;
    struct ConstraintOrderer;
 };

--- a/platforms/opencl/src/OpenCLKernels.cpp
+++ b/platforms/opencl/src/OpenCLKernels.cpp
@@ -3857,6 +3857,9 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
            cl.getIntegrationUtilities().applyConstraints(integrator.getConstraintTolerance());
            cl.executeKernel(kernels[i][0], numAtoms);
        }
+        else if (stepType[i] == CustomIntegrator::ConstrainVelocities) {
+            cl.getIntegrationUtilities().applyVelocityConstraints(integrator.getConstraintTolerance());
+        }
        if (invalidatesForces[i])
            forcesAreValid = false;
    }

--- a/platforms/opencl/src/kernels/ccma.cl
+++ b/platforms/opencl/src/kernels/ccma.cl
@@ -38,12 +38,24 @@ __kernel void computeConstraintForce(__global const int2* restrict constraintAto
        int2 atoms = constraintAtoms[index];
        float4 dir = constraintDistance[index];
        float4 rp_ij = atomPositions[atoms.x]-atomPositions[atoms.y];
+#ifndef CONSTRAIN_VELOCITIES
        rp_ij.xyz += dir.xyz;
+#endif
+        float rrpr = rp_ij.x*dir.x + rp_ij.y*dir.y + rp_ij.z*dir.z;
+        float d_ij2 = dir.x*dir.x + dir.y*dir.y + dir.z*dir.z;
+#ifdef CONSTRAIN_VELOCITIES
+        delta1[index] = -2.0f*reducedMass[index]*rrpr/d_ij2;
+
+        // See whether it has converged.
+
+        if (groupConverged && fabs(delta1[index]) > tol) {
+            groupConverged = 0;
+            converged[iteration%2] = 0;
+        }
+#else
        float rp2 = rp_ij.x*rp_ij.x + rp_ij.y*rp_ij.y + rp_ij.z*rp_ij.z;
        float dist2 = dir.w*dir.w;
        float diff = dist2 - rp2;
-        float rrpr = rp_ij.x*dir.x + rp_ij.y*dir.y + rp_ij.z*dir.z;
-        float d_ij2 = dir.x*dir.x + dir.y*dir.y + dir.z*dir.z;
        delta1[index] = (rrpr > d_ij2*1e-6f ? reducedMass[index]*diff/rrpr : 0.0f);

        // See whether it has converged.
@@ -52,6 +64,7 @@ __kernel void computeConstraintForce(__global const int2* restrict constraintAto
            groupConverged = 0;
            converged[iteration%2] = 0;
        }
+#endif
    }
 }


--- a/platforms/opencl/src/kernels/settle.cl
+++ b/platforms/opencl/src/kernels/settle.cl
@@ -2,7 +2,7 @@
 * Enforce constraints on SETTLE clusters
 */

-__kernel void applySettle(int numClusters, float tol, __global const float4* restrict oldPos, __global const float4* restrict posDelta, __global float4* restrict newDelta, __global const float4* restrict velm, __global const int4* restrict clusterAtoms, __global const float2* restrict clusterParams) {
+__kernel void applySettle(int numClusters, float tol, __global const float4* restrict oldPos, __global float4* restrict posDelta, __global const float4* restrict velm, __global const int4* restrict clusterAtoms, __global const float2* restrict clusterParams) {
    int index = get_global_id(0);
    while (index < numClusters) {
        // Load the data for this cluster.
@@ -151,9 +151,77 @@ __kernel void applySettle(int numClusters, float tol, __global const float4* res

        // Record the new positions.

-        newDelta[atoms.x] = xp0;
-        newDelta[atoms.y] = xp1;
-        newDelta[atoms.z] = xp2;
+        posDelta[atoms.x] = xp0;
+        posDelta[atoms.y] = xp1;
+        posDelta[atoms.z] = xp2;
        index += get_global_size(0);
    }
 }
+
+/**
+ * Enforce velocity constraints on SETTLE clusters
+ */
+
+__kernel void constrainVelocities(int numClusters, float tol, __global const float4* restrict oldPos, __global float4* restrict posDelta, __global float4* restrict velm, __global const int4* restrict clusterAtoms, __global const float2* restrict clusterParams) {
+    for (int index = get_global_id(0); index < numClusters; index++) {
+        // Load the data for this cluster.
+
+        int4 atoms = clusterAtoms[index];
+        float4 apos0 = oldPos[atoms.x];
+        float4 apos1 = oldPos[atoms.y];
+        float4 apos2 = oldPos[atoms.z];
+        float4 v0 = velm[atoms.x];
+        float4 v1 = velm[atoms.y];
+        float4 v2 = velm[atoms.z];
+        float m0 = RECIP(v0.w);
+        float m1 = RECIP(v1.w);
+        float m2 = RECIP(v2.w);
+        
+        // Compute offsets between atoms.
+        
+        float4 deltax1 = apos1-apos0;
+        float4 deltax2 = apos2-apos0;
+        float4 deltav1 = v1-v0;
+        float4 deltav2 = v2-v0;
+        
+        // Compute linear and angular momentum, and the inertia tensor.
+        
+        float4 p = v0*m0 + v1*m1 + v2*m2;
+        float4 L = m1*cross(deltax1, deltav1) + m2*cross(deltax2, deltav2);
+        float Ixx = m1*(deltax1.y*deltax1.y+deltax1.z*deltax1.z) + m2*(deltax2.y*deltax2.y+deltax2.z*deltax2.z);
+        float Iyy = m1*(deltax1.x*deltax1.x+deltax1.z*deltax1.z) + m2*(deltax2.x*deltax2.x+deltax2.z*deltax2.z);
+        float Izz = m1*(deltax1.x*deltax1.x+deltax1.y*deltax1.y) + m2*(deltax2.x*deltax2.x+deltax2.y*deltax2.y);
+        float Ixy = m1*deltax1.x*deltax1.y + m2*deltax2.x*deltax2.y;
+        float Ixz = m1*deltax1.x*deltax1.z + m2*deltax2.x*deltax2.z;
+        float Iyz = m1*deltax1.y*deltax1.z + m2*deltax2.y*deltax2.z;
+        float Iyx = Ixy;
+        float Izx = Ixz;
+        float Izy = Iyz;
+        
+        // Invert the inertia tensor and use it to compute the angular velocity.
+        
+        float Axx = Iyy*Izz - Iyz*Izy;
+        float Axy = Iyz*Izx - Iyx*Izz;
+        float Axz = Iyx*Izy - Iyy*Izx;
+        float Ayx = Izy*Ixz - Izz*Ixy;
+        float Ayy = Izz*Ixx - Izx*Ixz;
+        float Ayz = Izx*Ixy - Izy*Ixx;
+        float Azx = Ixy*Iyz - Ixz*Iyy;
+        float Azy = Ixz*Iyx - Ixx*Iyz;
+        float Azz = Ixx*Iyy - Ixy*Iyx;
+        float invDet = 1.0f/(Ixx*Axx + Ixy*Axy + Ixz*Axz);
+        float4 w = (float4) (invDet*(L.x*Axx + L.y*Axy + L.z*Axz),
+                             invDet*(L.x*Ayx + L.y*Ayy + L.z*Ayz),
+                             invDet*(L.x*Azx + L.y*Azy + L.z*Azz), 0.0f);
+        
+        // Compute the particle velocities from the molecule's linear and angular velocities.
+        
+        float4 v = p/(m0+m1+m2);
+        v0.xyz = v.xyz;
+        v1.xyz = v.xyz+cross(deltax1, w).xyz;
+        v2.xyz = v.xyz+cross(deltax2, w).xyz;
+        velm[atoms.x] = v0;
+        velm[atoms.y] = v1;
+        velm[atoms.z] = v2;
+    }
+}
\ No newline at end of file
--- a/platforms/opencl/src/kernels/shakeHydrogens.cl
+++ b/platforms/opencl/src/kernels/shakeHydrogens.cl
@@ -2,7 +2,7 @@
 * Enforce constraints on SHAKE clusters
 */

-__kernel void applyShakeToHydrogens(int numClusters, float tol, __global const float4* restrict oldPos, __global const float4* restrict posDelta, __global float4* restrict newDelta, __global const int4* restrict clusterAtoms, __global const float4* restrict clusterParams) {
+__kernel void applyShakeToHydrogens(int numClusters, float tol, __global const float4* restrict oldPos, __global float4* restrict posDelta, __global const int4* restrict clusterAtoms, __global const float4* restrict clusterParams) {
    int index = get_global_id(0);
    while (index < numClusters) {
        // Load the data for this cluster.
@@ -48,6 +48,36 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
        int iteration = 0;
        while (iteration < 15 && !converged) {
            converged = true;
+#ifdef CONSTRAIN_VELOCITIES
+            float4 rpij = xpi-xpj1;
+            float rrpr = rpij.x*rij1.x + rpij.y*rij1.y + rpij.z*rij1.z;
+            float delta = -2.0f*avgMass*rrpr/rij1sq;
+            float4 dr = rij1*delta;
+            xpi.xyz += dr.xyz*invMassCentral;
+            xpj1.xyz -= dr.xyz*invMassPeripheral;
+            if (fabs(delta) > tol)
+                converged = false;
+            if (atoms.z != -1) {
+                rpij = xpi-xpj2;
+                rrpr = rpij.x*rij2.x + rpij.y*rij2.y + rpij.z*rij2.z;
+                delta = -2.0f*avgMass*rrpr/rij2sq;
+                dr = rij2*delta;
+                xpi.xyz += dr.xyz*invMassCentral;
+                xpj2.xyz -= dr.xyz*invMassPeripheral;
+                if (fabs(delta) > tol)
+                    converged = false;
+            }
+            if (atoms.w != -1) {
+                rpij = xpi-xpj3;
+                rrpr = rpij.x*rij3.x + rpij.y*rij3.y + rpij.z*rij3.z;
+                delta = -2.0f*avgMass*rrpr/rij3sq;
+                dr = rij3*delta;
+                xpi.xyz += dr.xyz*invMassCentral;
+                xpj3.xyz -= dr.xyz*invMassPeripheral;
+                if (fabs(delta) > tol)
+                    converged = false;
+            }
+#else
            float4 rpij = xpi-xpj1;
            float rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z;
            float rrpr = rij1.x*rpij.x + rij1.y*rpij.y + rij1.z*rpij.z;
@@ -85,17 +115,18 @@ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const f
                    converged = false;
                }
            }
+#endif
            iteration++;
        }

        // Record the new positions.

-        newDelta[atoms.x] = xpi;
-        newDelta[atoms.y] = xpj1;
+        posDelta[atoms.x] = xpi;
+        posDelta[atoms.y] = xpj1;
        if (atoms.z != -1)
-            newDelta[atoms.z] = xpj2;
+            posDelta[atoms.z] = xpj2;
        if (atoms.w != -1)
-            newDelta[atoms.w] = xpj3;
+            posDelta[atoms.w] = xpj3;
        index += get_global_size(0);
    }
 }
--- a/platforms/opencl/tests/TestOpenCLCustomIntegrator.cpp
+++ b/platforms/opencl/tests/TestOpenCLCustomIntegrator.cpp
@@ -155,8 +155,7 @@ void testConstraints() {
 * Test an integrator that applies constraints directly to velocities.
 */
 void testVelocityConstraints() {
-    const int numParticles = 8;
-    const double temp = 500.0;
+    const int numParticles = 10;
    OpenCLPlatform platform;
    System system;
    CustomIntegrator integrator(0.002);
@@ -171,7 +170,21 @@ void testVelocityConstraints() {
        system.addParticle(i%2 == 0 ? 5.0 : 10.0);
        forceField->addParticle((i%2 == 0 ? 0.2 : -0.2), 0.5, 5.0);
    }
-    for (int i = 0; i < numParticles-1; ++i)
+    
+    // Constrain the first three particles with SHAKE.
+    
+    system.addConstraint(0, 1, 1.0);
+    system.addConstraint(1, 2, 1.0);
+    
+    // Constrain the next three with SETTLE.
+    
+    system.addConstraint(3, 4, 1.0);
+    system.addConstraint(5, 4, 1.0);
+    system.addConstraint(3, 5, sqrt(2.0));
+    
+    // Constraint the rest with CCMA.
+    
+    for (int i = 6; i < numParticles-1; ++i)
        system.addConstraint(i, i+1, 1.0);
    system.addForce(forceField);
    Context context(system, integrator, platform);
@@ -191,7 +204,8 @@ void testVelocityConstraints() {
    
    double initialEnergy = 0.0;
    for (int i = 0; i < 1000; ++i) {
-        State state = context.getState(State::Positions | State::Energy);
+        integrator.step(2);
+        State state = context.getState(State::Positions | State::Velocities | State::Energy);
        for (int j = 0; j < system.getNumConstraints(); ++j) {
            int particle1, particle2;
            double distance;
@@ -200,13 +214,18 @@ void testVelocityConstraints() {
            Vec3 p2 = state.getPositions()[particle2];
            double dist = std::sqrt((p1[0]-p2[0])*(p1[0]-p2[0])+(p1[1]-p2[1])*(p1[1]-p2[1])+(p1[2]-p2[2])*(p1[2]-p2[2]));
            ASSERT_EQUAL_TOL(distance, dist, 2e-5);
+            if (i > 0) {
+                Vec3 v1 = state.getVelocities()[particle1];
+                Vec3 v2 = state.getVelocities()[particle2];
+                double vel = (v1-v2).dot(p1-p2);
+                ASSERT_EQUAL_TOL(0.0, vel, 2e-5);
+            }
        }
        double energy = state.getKineticEnergy()+state.getPotentialEnergy();
-        if (i == 1)
+        if (i == 0)
            initialEnergy = energy;
-        else if (i > 1)
+        else if (i > 0)
            ASSERT_EQUAL_TOL(initialEnergy, energy, 0.05);
-        integrator.step(2);
    }
 }

@@ -383,7 +402,7 @@ int main() {
    try {
       testSingleBond();
        testConstraints();
-//        testVelocityConstraints();
+        testVelocityConstraints();
        testWithThermostat();
        testMonteCarlo();
        testSum();