Created CUDA implementation of DrudeSCFIntegrator

8f5c9840 · peastman · 7cbb8a01 · 8f5c9840 · 8f5c9840 · 8f5c9840
Commit 8f5c9840 authored May 23, 2013 by peastman
6 changed files
--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernelFactory.cpp
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernelFactory.cpp
@@ -43,6 +43,7 @@ extern "C" void registerKernelFactories() {
        CudaDrudeKernelFactory* factory = new CudaDrudeKernelFactory();
        platform.registerKernelFactory(CalcDrudeForceKernel::Name(), factory);
        platform.registerKernelFactory(IntegrateDrudeLangevinStepKernel::Name(), factory);
+        platform.registerKernelFactory(IntegrateDrudeSCFStepKernel::Name(), factory);
    }
    catch (std::exception ex) {
        // Ignore
@@ -65,5 +66,7 @@ KernelImpl* CudaDrudeKernelFactory::createKernelImpl(std::string name, const Pla
        return new CudaCalcDrudeForceKernel(name, platform, cu);
    if (name == IntegrateDrudeLangevinStepKernel::Name())
        return new CudaIntegrateDrudeLangevinStepKernel(name, platform, cu);
+    if (name == IntegrateDrudeSCFStepKernel::Name())
+        return new CudaIntegrateDrudeSCFStepKernel(name, platform, cu);
    throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
 }
--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
@@ -311,3 +311,212 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
 double CudaIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
    return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
 }
+
+class CudaIntegrateDrudeSCFStepKernel::ReorderListener : public CudaContext::ReorderListener {
+public:
+    ReorderListener(CudaContext& cu, const vector<int>& drudeParticles, vector<int>& reorderedDrudeParticles) :
+            cu(cu), drudeParticles(drudeParticles), reorderedDrudeParticles(reorderedDrudeParticles) {
+    }
+    void execute() {
+        const vector<int>& order = cu.getAtomIndex();
+        int numParticles = order.size();
+        vector<int> inverseOrder(numParticles);
+        for (int i = 0; i < numParticles; i++)
+            inverseOrder[order[i]] = i;
+        int numDrudeParticles = drudeParticles.size();
+        for (int i = 0; i < numDrudeParticles; i++)
+            reorderedDrudeParticles[i] = inverseOrder[drudeParticles[i]];
+    }
+private:
+    CudaContext& cu;
+    const vector<int>& drudeParticles;
+    vector<int>& reorderedDrudeParticles;
+};
+
+CudaIntegrateDrudeSCFStepKernel::~CudaIntegrateDrudeSCFStepKernel() {
+}
+
+void CudaIntegrateDrudeSCFStepKernel::initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force) {
+    cu.getPlatformData().initializeContexts(system);
+    cu.setAsCurrent();
+
+    // Identify Drude particles.
+    
+    for (int i = 0; i < force.getNumParticles(); i++) {
+        int p, p1, p2, p3, p4;
+        double charge, polarizability, aniso12, aniso34;
+        force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
+        drudeParticles.push_back(p);
+        reorderedDrudeParticles.push_back(p);
+    }
+    cu.addReorderListener(new ReorderListener(cu, drudeParticles, reorderedDrudeParticles));
+    
+    // Initialize the energy minimizer.
+    
+    minimizerPos = lbfgs_malloc(drudeParticles.size()*3);
+    if (minimizerPos == NULL)
+        throw OpenMMException("DrudeSCFIntegrator: Failed to allocate memory");
+    lbfgs_parameter_init(&minimizerParams);
+    minimizerParams.linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;    
+
+    // Create the kernels.
+    
+    map<string, string> defines;
+    defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
+    defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
+    CUmodule module = cu.createModule(CudaKernelSources::verlet, defines, "");
+    kernel1 = cu.getKernel(module, "integrateVerletPart1");
+    kernel2 = cu.getKernel(module, "integrateVerletPart2");
+    prevStepSize = -1.0;
+}
+
+void CudaIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
+    cu.setAsCurrent();
+    CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
+    int numAtoms = cu.getNumAtoms();
+    double dt = integrator.getStepSize();
+    if (dt != prevStepSize) {
+        if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
+            vector<double2> stepSizeVec(1);
+            stepSizeVec[0] = make_double2(dt, dt);
+            cu.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
+        }
+        else {
+            vector<float2> stepSizeVec(1);
+            stepSizeVec[0] = make_float2((float) dt, (float) dt);
+            cu.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
+        }
+        prevStepSize = dt;
+    }
+
+    // Call the first integration kernel.
+
+    CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
+    void* args1[] = {&cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
+            &cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
+    cu.executeKernel(kernel1, args1, numAtoms);
+
+    // Apply constraints.
+
+    integration.applyConstraints(integrator.getConstraintTolerance());
+
+    // Call the second integration kernel.
+
+    void* args2[] = {&cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
+            &cu.getVelm().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
+    cu.executeKernel(kernel2, args2, numAtoms);
+
+    // Update the positions of virtual sites and Drude particles.
+
+    integration.computeVirtualSites();
+    minimize(context, integrator.getMinimizationErrorTolerance());
+
+    // Update the time and step count.
+
+    cu.setTime(cu.getTime()+dt);
+    cu.setStepCount(cu.getStepCount()+1);
+}
+
+double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
+    return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
+}
+
+struct MinimizerData {
+    ContextImpl& context;
+    CudaContext& cu;
+    vector<int>& reorderedDrudeParticles;
+    MinimizerData(ContextImpl& context, CudaContext& cu, vector<int>& reorderedDrudeParticles) : context(context), cu(cu), reorderedDrudeParticles(reorderedDrudeParticles) {}
+};
+
+static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
+    MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
+    ContextImpl& context = data->context;
+    CudaContext& cu = data->cu;
+    vector<int>& reorderedDrudeParticles = data->reorderedDrudeParticles;
+    int numDrudeParticles = reorderedDrudeParticles.size();
+
+    // Set the particle positions.
+    
+    cu.getPosq().download(cu.getPinnedBuffer());
+    double4 periodicBoxSize = cu.getPeriodicBoxSize();
+    if (cu.getUseDoublePrecision()) {
+        double4* posq = (double4*) cu.getPinnedBuffer();
+        for (int i = 0; i < numDrudeParticles; ++i) {
+            double4& p = posq[reorderedDrudeParticles[i]];
+            int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]];
+            p.x = x[3*i]+offset.x*periodicBoxSize.x;
+            p.y = x[3*i+1]+offset.y*periodicBoxSize.y;
+            p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
+        }
+    }
+    else {
+        float4* posq = (float4*) cu.getPinnedBuffer();
+        for (int i = 0; i < numDrudeParticles; ++i) {
+            float4& p = posq[reorderedDrudeParticles[i]];
+            int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]];
+            p.x = x[3*i]+offset.x*periodicBoxSize.x;
+            p.y = x[3*i+1]+offset.y*periodicBoxSize.y;
+            p.z = x[3*i+2]+offset.z*periodicBoxSize.z;
+        }
+    }
+    cu.getPosq().upload(cu.getPinnedBuffer());
+
+    // Compute the forces and energy for this configuration.
+
+    double energy = context.calcForcesAndEnergy(true, true);
+    long long* force = (long long*) cu.getPinnedBuffer();
+    cu.getForce().download(force);
+    double forceScale = -1.0/0x100000000;
+    int paddedNumAtoms = cu.getPaddedNumAtoms();
+    for (int i = 0; i < numDrudeParticles; ++i) {
+        int index = reorderedDrudeParticles[i];
+        g[3*i] = forceScale*force[index];
+        g[3*i+1] = forceScale*force[index+paddedNumAtoms];
+        g[3*i+2] = forceScale*force[index+paddedNumAtoms*2];
+    }
+    return energy;
+}
+
+void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
+    // Record the initial positions.
+
+    int numDrudeParticles = reorderedDrudeParticles.size();
+    cu.getPosq().download(cu.getPinnedBuffer());
+    double4 periodicBoxSize = cu.getPeriodicBoxSize();
+    if (cu.getUseDoublePrecision()) {
+        double4* posq = (double4*) cu.getPinnedBuffer();
+        for (int i = 0; i < numDrudeParticles; ++i) {
+            double4 p = posq[reorderedDrudeParticles[i]];
+            int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]];
+            minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x;
+            minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y;
+            minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
+        }
+    }
+    else {
+        float4* posq = (float4*) cu.getPinnedBuffer();
+        for (int i = 0; i < numDrudeParticles; ++i) {
+            float4 p = posq[reorderedDrudeParticles[i]];
+            int4 offset = cu.getPosCellOffsets()[reorderedDrudeParticles[i]];
+            minimizerPos[3*i] = p.x-offset.x*periodicBoxSize.x;
+            minimizerPos[3*i+1] = p.y-offset.y*periodicBoxSize.y;
+            minimizerPos[3*i+2] = p.z-offset.z*periodicBoxSize.z;
+        }
+        minimizerParams.xtol = 1e-7;
+    }
+    
+    // Determine a normalization constant for scaling the tolerance.
+    
+    double norm = 0.0;
+    for (int i = 0; i < 3*numDrudeParticles; i++)
+        norm += minimizerPos[i]*minimizerPos[i];
+    norm /= numDrudeParticles;
+    norm = (norm < 1 ? 1 : sqrt(norm));
+    minimizerParams.epsilon = tolerance/norm;
+    
+    // Perform the minimization.
+
+    lbfgsfloatval_t fx;
+    MinimizerData data(context, cu, reorderedDrudeParticles);
+    lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
+}
\ No newline at end of file
--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
@@ -35,6 +35,7 @@
 #include "openmm/DrudeKernels.h"
 #include "CudaContext.h"
 #include "CudaArray.h"
+#include "lbfgs.h"

 namespace OpenMM {

@@ -115,6 +116,49 @@ private:
    CUfunction kernel1, kernel2;
 };

+/**
+ * This kernel is invoked by DrudeSCFIntegrator to take one time step
+ */
+class CudaIntegrateDrudeSCFStepKernel : public IntegrateDrudeSCFStepKernel {
+public:
+    CudaIntegrateDrudeSCFStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
+            IntegrateDrudeSCFStepKernel(name, platform), cu(cu) {
+    }
+    ~CudaIntegrateDrudeSCFStepKernel();
+    /**
+     * Initialize the kernel.
+     *
+     * @param system     the System this kernel will be applied to
+     * @param integrator the DrudeSCFIntegrator this kernel will be used for
+     * @param force      the DrudeForce to get particle parameters from
+     */
+    void initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force);
+    /**
+     * Execute the kernel.
+     *
+     * @param context        the context in which to execute this kernel
+     * @param integrator     the DrudeSCFIntegrator this kernel is being used for
+     */
+    void execute(ContextImpl& context, const DrudeSCFIntegrator& integrator);
+    /**
+     * Compute the kinetic energy.
+     * 
+     * @param context     the context in which to execute this kernel
+     * @param integrator  the DrudeSCFIntegrator this kernel is being used for
+     */
+    double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator);
+private:
+    class ReorderListener;
+    void minimize(ContextImpl& context, double tolerance);
+    CudaContext& cu;
+    double prevStepSize;
+    std::vector<int> drudeParticles;
+    std::vector<int> reorderedDrudeParticles;
+    lbfgsfloatval_t *minimizerPos;
+    lbfgs_parameter_t minimizerParams;
+    CUfunction kernel1, kernel2;
+};
+
 } // namespace OpenMM

 #endif /*CUDA_DRUDE_KERNELS_H_*/
--- a/plugins/drude/platforms/cuda/tests/TestCudaDrudeSCFIntegrator.cpp
+++ b/plugins/drude/platforms/cuda/tests/TestCudaDrudeSCFIntegrator.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+/**
+ * This tests the CUDA implementation of DrudeSCFIntegrator.
+ */
+
+#include "openmm/internal/AssertionUtilities.h"
+#include "openmm/Context.h"
+#include "openmm/NonbondedForce.h"
+#include "openmm/Platform.h"
+#include "openmm/System.h"
+#include "openmm/VirtualSite.h"
+#include "openmm/DrudeForce.h"
+#include "openmm/DrudeSCFIntegrator.h"
+#include "SimTKUtilities/SimTKOpenMMUtilities.h"
+#include <iostream>
+#include <vector>
+
+using namespace OpenMM;
+using namespace std;
+
+extern "C" OPENMM_EXPORT void registerDrudeCudaKernelFactories();
+
+void testWater() {
+    // Create a box of SWM4-NDP water molecules.  This involves constraints, virtual sites,
+    // and Drude particles.
+    
+    const int gridSize = 4;
+    const int numMolecules = gridSize*gridSize*gridSize;
+    const double spacing = 0.6;
+    const double boxSize = spacing*(gridSize+1);
+    System system;
+    NonbondedForce* nonbonded = new NonbondedForce();
+    DrudeForce* drude = new DrudeForce();
+    system.addForce(nonbonded);
+    system.addForce(drude);
+    system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
+    nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
+    nonbonded->setCutoffDistance(1.0);
+    for (int i = 0; i < numMolecules; i++) {
+        int startIndex = system.getNumParticles();
+        system.addParticle(15.6); // O
+        system.addParticle(0.4);  // D
+        system.addParticle(1.0);  // H1
+        system.addParticle(1.0);  // H2
+        system.addParticle(0.0);  // M
+        nonbonded->addParticle(1.71636, 0.318395, 0.21094*4.184);
+        nonbonded->addParticle(-1.71636, 1, 0);
+        nonbonded->addParticle(0.55733, 1, 0);
+        nonbonded->addParticle(0.55733, 1, 0);
+        nonbonded->addParticle(-1.11466, 1, 0);
+        for (int j = 0; j < 5; j++)
+            for (int k = 0; k < j; k++)
+                nonbonded->addException(startIndex+j, startIndex+k, 0, 1, 0);
+        system.addConstraint(startIndex, startIndex+2, 0.09572);
+        system.addConstraint(startIndex, startIndex+3, 0.09572);
+        system.addConstraint(startIndex+2, startIndex+3, 0.15139);
+        system.setVirtualSite(startIndex+4, new ThreeParticleAverageSite(startIndex, startIndex+2, startIndex+3, 0.786646558, 0.106676721, 0.106676721));
+        drude->addParticle(startIndex+1, startIndex, -1, -1, -1, -1.71636, 1.71636*1.71636/(100000*4.184), 1, 1);
+    }
+    vector<Vec3> positions;
+    for (int i = 0; i < gridSize; i++)
+        for (int j = 0; j < gridSize; j++)
+            for (int k = 0; k < gridSize; k++) {
+                Vec3 pos(i*spacing, j*spacing, k*spacing);
+                positions.push_back(pos);
+                positions.push_back(pos);
+                positions.push_back(pos+Vec3(0.09572, 0, 0));
+                positions.push_back(pos+Vec3(-0.023999, 0.092663, 0));
+                positions.push_back(pos);
+            }
+    
+    // Simulate it and check energy conservation and the total force on the Drude particles.
+    
+    DrudeSCFIntegrator integ(0.0005);
+    Platform& platform = Platform::getPlatformByName("CUDA");
+    Context context(system, integ, platform);
+    context.setPositions(positions);
+    context.applyConstraints(1e-5);
+    context.setVelocitiesToTemperature(300.0);
+    State state = context.getState(State::Energy);
+    double initialEnergy;
+    int numSteps = 1000;
+    double maxNorm = (platform.getPropertyValue(context, "CudaPrecision") == "double" ? 1.0 : 5.0);
+    for (int i = 0; i < numSteps; i++) {
+        integ.step(1);
+        state = context.getState(State::Energy | State::Forces);
+        if (i == 0)
+            initialEnergy = state.getPotentialEnergy()+state.getKineticEnergy();
+        else
+            ASSERT_EQUAL_TOL(initialEnergy, state.getPotentialEnergy()+state.getKineticEnergy(), 0.01);
+        const vector<Vec3>& force = state.getForces();
+        double norm = 0.0;
+        for (int j = 1; j < (int) force.size(); j += 5)
+            norm += sqrt(force[j].dot(force[j]));
+        norm = (norm/numMolecules);
+        ASSERT(norm < maxNorm);
+    }
+}
+
+int main(int argc, char* argv[]) {
+    try {
+        registerDrudeCudaKernelFactories();
+        if (argc > 1)
+            Platform::getPlatformByName("CUDA").setPropertyDefaultValue("CudaPrecision", string(argv[1]));
+        testWater();
+    }
+    catch(const std::exception& e) {
+        std::cout << "exception: " << e.what() << std::endl;
+        std::cout << "FAIL - ERROR.  Test failed." << std::endl;
+        return 1;
+    }
+    std::cout << "Done" << std::endl;
+    return 0;
+}
--- a/plugins/drude/platforms/reference/src/ReferenceDrudeKernels.cpp
+++ b/plugins/drude/platforms/reference/src/ReferenceDrudeKernels.cpp
@@ -418,8 +418,9 @@ void ReferenceIntegrateDrudeSCFStepKernel::initialize(const System& system, cons
    if (minimizerPos == NULL)
        throw OpenMMException("DrudeSCFIntegrator: Failed to allocate memory");
    lbfgs_parameter_init(&minimizerParams);
-    minimizerParams.max_iterations = 0;
    minimizerParams.linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;
+    if (sizeof(RealOpenMM) < 8)
+        minimizerParams.xtol = 1e-7;
 }

 void ReferenceIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeSCFIntegrator& integrator) {

--- a/plugins/drude/platforms/reference/tests/TestReferenceDrudeSCFIntegrator.cpp
+++ b/plugins/drude/platforms/reference/tests/TestReferenceDrudeSCFIntegrator.cpp
@@ -106,12 +106,15 @@ void testWater() {
    context.applyConstraints(1e-5);
    context.setVelocitiesToTemperature(300.0);
    State state = context.getState(State::Energy);
-    double initialEnergy = state.getPotentialEnergy()+state.getKineticEnergy();
+    double initialEnergy;
    int numSteps = 1000;
    for (int i = 0; i < numSteps; i++) {
        integ.step(1);
        state = context.getState(State::Energy | State::Forces);
-        ASSERT_EQUAL_TOL(initialEnergy, state.getPotentialEnergy()+state.getKineticEnergy(), 0.01);
+        if (i == 0)
+            initialEnergy = state.getPotentialEnergy()+state.getKineticEnergy();
+        else
+            ASSERT_EQUAL_TOL(initialEnergy, state.getPotentialEnergy()+state.getKineticEnergy(), 0.01);
        const vector<Vec3>& force = state.getForces();
        double norm = 0.0;
        for (int j = 1; j < (int) force.size(); j += 5)