Created OpenCL implementation of DrudeSCFIntegrator

16bd6601 · peastman · c78efcbd · 16bd6601 · 16bd6601 · 16bd6601
Commit 16bd6601 authored May 24, 2013 by peastman
6 changed files
--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
@@ -334,6 +334,8 @@ private:
 };

 CudaIntegrateDrudeSCFStepKernel::~CudaIntegrateDrudeSCFStepKernel() {
+    if (minimizerPos != NULL)
+        lbfgs_free(minimizerPos);
 }

 void CudaIntegrateDrudeSCFStepKernel::initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force) {

--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
@@ -122,7 +122,7 @@ private:
 class CudaIntegrateDrudeSCFStepKernel : public IntegrateDrudeSCFStepKernel {
 public:
    CudaIntegrateDrudeSCFStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
-            IntegrateDrudeSCFStepKernel(name, platform), cu(cu) {
+            IntegrateDrudeSCFStepKernel(name, platform), cu(cu), minimizerPos(NULL) {
    }
    ~CudaIntegrateDrudeSCFStepKernel();
    /**

--- a/plugins/drude/platforms/opencl/src/OpenCLDrudeKernelFactory.cpp
+++ b/plugins/drude/platforms/opencl/src/OpenCLDrudeKernelFactory.cpp
@@ -43,6 +43,7 @@ extern "C" void registerKernelFactories() {
        OpenCLDrudeKernelFactory* factory = new OpenCLDrudeKernelFactory();
        platform.registerKernelFactory(CalcDrudeForceKernel::Name(), factory);
        platform.registerKernelFactory(IntegrateDrudeLangevinStepKernel::Name(), factory);
+        platform.registerKernelFactory(IntegrateDrudeSCFStepKernel::Name(), factory);
    }
    catch (std::exception ex) {
        // Ignore
@@ -65,5 +66,7 @@ KernelImpl* OpenCLDrudeKernelFactory::createKernelImpl(std::string name, const P
        return new OpenCLCalcDrudeForceKernel(name, platform, cl);
    if (name == IntegrateDrudeLangevinStepKernel::Name())
        return new OpenCLIntegrateDrudeLangevinStepKernel(name, platform, cl);
+    if (name == IntegrateDrudeSCFStepKernel::Name())
+        return new OpenCLIntegrateDrudeSCFStepKernel(name, platform, cl);
    throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
 }
--- a/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.cpp
+++ b/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.cpp
@@ -42,6 +42,13 @@
 using namespace OpenMM;
 using namespace std;

+static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
+    if (cl.getUseMixedPrecision())
+        kernel.setArg<cl::Buffer>(index, cl.getPosqCorrection().getDeviceBuffer());
+    else
+        kernel.setArg<void*>(index, NULL);
+}
+
 class OpenCLDrudeForceInfo : public OpenCLForceInfo {
 public:
    OpenCLDrudeForceInfo(const DrudeForce& force) : OpenCLForceInfo(0), force(force) {
@@ -310,3 +317,235 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
 double OpenCLIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
    return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
 }
+
+class OpenCLIntegrateDrudeSCFStepKernel::ReorderListener : public OpenCLContext::ReorderListener {
+public:
+    ReorderListener(OpenCLContext& cl, const vector<int>& drudeParticles, vector<int>& reorderedDrudeParticles) :
+            cl(cl), drudeParticles(drudeParticles), reorderedDrudeParticles(reorderedDrudeParticles) {
+    }
+    void execute() {
+        const vector<int>& order = cl.getAtomIndex();
+        int numParticles = order.size();
+        vector<int> inverseOrder(numParticles);
+        for (int i = 0; i < numParticles; i++)
+            inverseOrder[order[i]] = i;
+        int numDrudeParticles = drudeParticles.size();
+        for (int i = 0; i < numDrudeParticles; i++)
+            reorderedDrudeParticles[i] = inverseOrder[drudeParticles[i]];
+    }
+private:
+    OpenCLContext& cl;
+    const vector<int>& drudeParticles;
+    vector<int>& reorderedDrudeParticles;
+};
+
+OpenCLIntegrateDrudeSCFStepKernel::~OpenCLIntegrateDrudeSCFStepKernel() {
+    if (minimizerPos != NULL)
+        lbfgs_free(minimizerPos);
+}
+
+void OpenCLIntegrateDrudeSCFStepKernel::initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force) {
+    cl.getPlatformData().initializeContexts(system);
+
+    // Identify Drude particles.
+    
+    for (int i = 0; i < force.getNumParticles(); i++) {
+        int p, p1, p2, p3, p4;
+        double charge, polarizability, aniso12, aniso34;
+        force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
+        drudeParticles.push_back(p);
+        reorderedDrudeParticles.push_back(p);
+    }
+    cl.addReorderListener(new ReorderListener(cl, drudeParticles, reorderedDrudeParticles));
+    
+    // Initialize the energy minimizer.
+    
+    minimizerPos = lbfgs_malloc(drudeParticles.size()*3);
+    if (minimizerPos == NULL)
+        throw OpenMMException("DrudeSCFIntegrator: Failed to allocate memory");
+    lbfgs_parameter_init(&minimizerParams);
+    minimizerParams.linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;    
+
+    // Create the kernels.
+    
+    cl::Program program = cl.createProgram(OpenCLKernelSources::verlet, "");
+    kernel1 = cl::Kernel(program, "integrateVerletPart1");
+    kernel2 = cl::Kernel(program, "integrateVerletPart2");
+    prevStepSize = -1.0;
+}
+
+void OpenCLIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
+    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
+    int numAtoms = cl.getNumAtoms();
+    double dt = integrator.getStepSize();
+    if (!hasInitializedKernels) {
+        hasInitializedKernels = true;
+        kernel1.setArg<cl_int>(0, numAtoms);
+        kernel1.setArg<cl::Buffer>(1, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
+        setPosqCorrectionArg(cl, kernel1, 3);
+        kernel1.setArg<cl::Buffer>(4, cl.getVelm().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(5, cl.getForce().getDeviceBuffer());
+        kernel1.setArg<cl::Buffer>(6, integration.getPosDelta().getDeviceBuffer());
+        kernel2.setArg<cl_int>(0, numAtoms);
+        kernel2.setArg<cl::Buffer>(1, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
+        setPosqCorrectionArg(cl, kernel2, 3);
+        kernel2.setArg<cl::Buffer>(4, cl.getVelm().getDeviceBuffer());
+        kernel2.setArg<cl::Buffer>(5, integration.getPosDelta().getDeviceBuffer());
+    }
+    if (dt != prevStepSize) {
+        if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
+            vector<mm_double2> stepSizeVec(1);
+            stepSizeVec[0] = mm_double2(dt, dt);
+            cl.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
+        }
+        else {
+            vector<mm_float2> stepSizeVec(1);
+            stepSizeVec[0] = mm_float2((cl_float) dt, (cl_float) dt);
+            cl.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
+        }
+        prevStepSize = dt;
+    }
+
+    // Call the first integration kernel.
+
+    cl.executeKernel(kernel1, numAtoms);
+
+    // Apply constraints.
+
+    integration.applyConstraints(integrator.getConstraintTolerance());
+
+    // Call the second integration kernel.
+
+    cl.executeKernel(kernel2, numAtoms);
+
+    // Update the positions of virtual sites and Drude particles.
+
+    integration.computeVirtualSites();
+    minimize(context, integrator.getMinimizationErrorTolerance());
+
+    // Update the time and step count.
+
+    cl.setTime(cl.getTime()+dt);
+    cl.setStepCount(cl.getStepCount()+1);
+    
+    // Reduce UI lag.
+    
+#ifdef WIN32
+    cl.getQueue().flush();
+#endif
+}
+
+double OpenCLIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
+    return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
+}
+
+struct MinimizerData {
+    ContextImpl& context;
+    OpenCLContext& cl;
+    vector<int>& reorderedDrudeParticles;
+    MinimizerData(ContextImpl& context, OpenCLContext& cl, vector<int>& reorderedDrudeParticles) : context(context), cl(cl), reorderedDrudeParticles(reorderedDrudeParticles) {}
+};
+
+static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
+    MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
+    ContextImpl& context = data->context;
+    OpenCLContext& cl = data->cl;
+    vector<int>& reorderedDrudeParticles = data->reorderedDrudeParticles;
+    int numDrudeParticles = reorderedDrudeParticles.size();
+
+    // Set the particle positions.
+    
+    cl.getPosq().download(cl.getPinnedBuffer());
+    mm_double4 periodicBoxSize = cl.getPeriodicBoxSizeDouble();
+    if (cl.getUseDoublePrecision()) {
+        mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
+        for (int i = 0; i < numDrudeParticles; ++i) {
+            mm_double4& p = posq[reorderedDrudeParticles[i]];
+            mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]];
+            p.x = x[3*i]+offset.x*periodicBoxSize.x;
+            p.y = x[3*i+1]+offset.y*periodicBoxSize.y;
+            p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
+        }
+    }
+    else {
+        mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
+        for (int i = 0; i < numDrudeParticles; ++i) {
+            mm_float4& p = posq[reorderedDrudeParticles[i]];
+            mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]];
+            p.x = x[3*i]+offset.x*periodicBoxSize.x;
+            p.y = x[3*i+1]+offset.y*periodicBoxSize.y;
+            p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
+        }
+    }
+    cl.getPosq().upload(cl.getPinnedBuffer());
+
+    // Compute the forces and energy for this configuration.
+
+    double energy = context.calcForcesAndEnergy(true, true);
+    cl.getForce().download(cl.getPinnedBuffer());
+    if (cl.getUseDoublePrecision()) {
+        mm_double4* force = (mm_double4*) cl.getPinnedBuffer();
+        for (int i = 0; i < numDrudeParticles; ++i) {
+            int index = reorderedDrudeParticles[i];
+            g[3*i] = -force[index].x;
+            g[3*i+1] = -force[index].y;
+            g[3*i+2] = -force[index].z;
+        }
+    }
+    else {
+        mm_float4* force = (mm_float4*) cl.getPinnedBuffer();
+        for (int i = 0; i < numDrudeParticles; ++i) {
+            int index = reorderedDrudeParticles[i];
+            g[3*i] = -force[index].x;
+            g[3*i+1] = -force[index].y;
+            g[3*i+2] = -force[index].z;
+        }
+    }
+    return energy;
+}
+
+void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
+    // Record the initial positions.
+
+    int numDrudeParticles = reorderedDrudeParticles.size();
+    cl.getPosq().download(cl.getPinnedBuffer());
+    mm_double4 periodicBoxSize = cl.getPeriodicBoxSizeDouble();
+    if (cl.getUseDoublePrecision()) {
+        mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
+        for (int i = 0; i < numDrudeParticles; ++i) {
+            mm_double4 p = posq[reorderedDrudeParticles[i]];
+            mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]];
+            minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x;
+            minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y;
+            minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
+        }
+    }
+    else {
+        mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
+        for (int i = 0; i < numDrudeParticles; ++i) {
+            mm_float4 p = posq[reorderedDrudeParticles[i]];
+            mm_int4 offset = cl.getPosCellOffsets()[reorderedDrudeParticles[i]];
+            minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x;
+            minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y;
+            minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
+        }
+        minimizerParams.xtol = 1e-7;
+    }
+    
+    // Determine a normalization constant for scaling the tolerance.
+    
+    double norm = 0.0;
+    for (int i = 0; i < 3*numDrudeParticles; i++)
+        norm += minimizerPos[i]*minimizerPos[i];
+    norm /= numDrudeParticles;
+    norm = (norm < 1 ? 1 : sqrt(norm));
+    minimizerParams.epsilon = tolerance/norm;
+    
+    // Perform the minimization.
+
+    lbfgsfloatval_t fx;
+    MinimizerData data(context, cl, reorderedDrudeParticles);
+    lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
+}
\ No newline at end of file
--- a/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
+++ b/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
@@ -35,6 +35,7 @@
 #include "openmm/DrudeKernels.h"
 #include "OpenCLContext.h"
 #include "OpenCLArray.h"
+#include "lbfgs.h"

 namespace OpenMM {

@@ -116,6 +117,50 @@ private:
    cl::Kernel kernel1, kernel2;
 };

+/**
+ * This kernel is invoked by DrudeSCFIntegrator to take one time step
+ */
+class OpenCLIntegrateDrudeSCFStepKernel : public IntegrateDrudeSCFStepKernel {
+public:
+    OpenCLIntegrateDrudeSCFStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
+            IntegrateDrudeSCFStepKernel(name, platform), cl(cl), hasInitializedKernels(false), minimizerPos(NULL) {
+    }
+    ~OpenCLIntegrateDrudeSCFStepKernel();
+    /**
+     * Initialize the kernel.
+     *
+     * @param system     the System this kernel will be applied to
+     * @param integrator the DrudeSCFIntegrator this kernel will be used for
+     * @param force      the DrudeForce to get particle parameters from
+     */
+    void initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force);
+    /**
+     * Execute the kernel.
+     *
+     * @param context        the context in which to execute this kernel
+     * @param integrator     the DrudeSCFIntegrator this kernel is being used for
+     */
+    void execute(ContextImpl& context, const DrudeSCFIntegrator& integrator);
+    /**
+     * Compute the kinetic energy.
+     * 
+     * @param context     the context in which to execute this kernel
+     * @param integrator  the DrudeSCFIntegrator this kernel is being used for
+     */
+    double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator);
+private:
+    class ReorderListener;
+    void minimize(ContextImpl& context, double tolerance);
+    OpenCLContext& cl;
+    bool hasInitializedKernels;
+    double prevStepSize;
+    std::vector<int> drudeParticles;
+    std::vector<int> reorderedDrudeParticles;
+    lbfgsfloatval_t *minimizerPos;
+    lbfgs_parameter_t minimizerParams;
+    cl::Kernel kernel1, kernel2;
+};
+
 } // namespace OpenMM

 #endif /*OPENCL_DRUDE_KERNELS_H_*/
--- a/plugins/drude/platforms/opencl/tests/TestOpenCLDrudeSCFIntegrator.cpp
+++ b/plugins/drude/platforms/opencl/tests/TestOpenCLDrudeSCFIntegrator.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+/**
+ * This tests the OpenCL implementation of DrudeSCFIntegrator.
+ */
+
+#include "openmm/internal/AssertionUtilities.h"
+#include "openmm/Context.h"
+#include "openmm/NonbondedForce.h"
+#include "openmm/Platform.h"
+#include "openmm/System.h"
+#include "openmm/VirtualSite.h"
+#include "openmm/DrudeForce.h"
+#include "openmm/DrudeSCFIntegrator.h"
+#include "SimTKUtilities/SimTKOpenMMUtilities.h"
+#include <iostream>
+#include <vector>
+
+using namespace OpenMM;
+using namespace std;
+
+extern "C" OPENMM_EXPORT void registerDrudeOpenCLKernelFactories();
+
+void testWater() {
+    // Create a box of SWM4-NDP water molecules.  This involves constraints, virtual sites,
+    // and Drude particles.
+    
+    const int gridSize = 4;
+    const int numMolecules = gridSize*gridSize*gridSize;
+    const double spacing = 0.6;
+    const double boxSize = spacing*(gridSize+1);
+    System system;
+    NonbondedForce* nonbonded = new NonbondedForce();
+    DrudeForce* drude = new DrudeForce();
+    system.addForce(nonbonded);
+    system.addForce(drude);
+    system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
+    nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
+    nonbonded->setCutoffDistance(1.0);
+    for (int i = 0; i < numMolecules; i++) {
+        int startIndex = system.getNumParticles();
+        system.addParticle(15.6); // O
+        system.addParticle(0.4);  // D
+        system.addParticle(1.0);  // H1
+        system.addParticle(1.0);  // H2
+        system.addParticle(0.0);  // M
+        nonbonded->addParticle(1.71636, 0.318395, 0.21094*4.184);
+        nonbonded->addParticle(-1.71636, 1, 0);
+        nonbonded->addParticle(0.55733, 1, 0);
+        nonbonded->addParticle(0.55733, 1, 0);
+        nonbonded->addParticle(-1.11466, 1, 0);
+        for (int j = 0; j < 5; j++)
+            for (int k = 0; k < j; k++)
+                nonbonded->addException(startIndex+j, startIndex+k, 0, 1, 0);
+        system.addConstraint(startIndex, startIndex+2, 0.09572);
+        system.addConstraint(startIndex, startIndex+3, 0.09572);
+        system.addConstraint(startIndex+2, startIndex+3, 0.15139);
+        system.setVirtualSite(startIndex+4, new ThreeParticleAverageSite(startIndex, startIndex+2, startIndex+3, 0.786646558, 0.106676721, 0.106676721));
+        drude->addParticle(startIndex+1, startIndex, -1, -1, -1, -1.71636, 1.71636*1.71636/(100000*4.184), 1, 1);
+    }
+    vector<Vec3> positions;
+    for (int i = 0; i < gridSize; i++)
+        for (int j = 0; j < gridSize; j++)
+            for (int k = 0; k < gridSize; k++) {
+                Vec3 pos(i*spacing, j*spacing, k*spacing);
+                positions.push_back(pos);
+                positions.push_back(pos);
+                positions.push_back(pos+Vec3(0.09572, 0, 0));
+                positions.push_back(pos+Vec3(-0.023999, 0.092663, 0));
+                positions.push_back(pos);
+            }
+    
+    // Simulate it and check energy conservation and the total force on the Drude particles.
+    
+    DrudeSCFIntegrator integ(0.0005);
+    Platform& platform = Platform::getPlatformByName("OpenCL");
+    Context context(system, integ, platform);
+    context.setPositions(positions);
+    context.applyConstraints(1e-5);
+    context.setVelocitiesToTemperature(300.0);
+    State state = context.getState(State::Energy);
+    double initialEnergy;
+    int numSteps = 1000;
+    double maxNorm = (platform.getPropertyValue(context, "OpenCLPrecision") == "double" ? 1.0 : 5.0);
+    for (int i = 0; i < numSteps; i++) {
+        integ.step(1);
+        state = context.getState(State::Energy | State::Forces);
+        if (i == 0)
+            initialEnergy = state.getPotentialEnergy()+state.getKineticEnergy();
+        else
+            ASSERT_EQUAL_TOL(initialEnergy, state.getPotentialEnergy()+state.getKineticEnergy(), 0.01);
+        const vector<Vec3>& force = state.getForces();
+        double norm = 0.0;
+        for (int j = 1; j < (int) force.size(); j += 5)
+            norm += sqrt(force[j].dot(force[j]));
+        norm = (norm/numMolecules);
+        ASSERT(norm < maxNorm);
+    }
+}
+
+int main(int argc, char* argv[]) {
+    try {
+        registerDrudeOpenCLKernelFactories();
+        if (argc > 1)
+            Platform::getPlatformByName("OpenCL").setPropertyDefaultValue("OpenCLPrecision", string(argv[1]));
+        testWater();
+    }
+    catch(const std::exception& e) {
+        std::cout << "exception: " << e.what() << std::endl;
+        std::cout << "FAIL - ERROR.  Test failed." << std::endl;
+        return 1;
+    }
+    std::cout << "Done" << std::endl;
+    return 0;
+}