Merge https://github.com/openmm/openmm

5a06df78 · tic20 · 8dd60914 · a9223eea · 5a06df78 · 5a06df78
Commit 5a06df78 authored Mar 04, 2020 by tic20
20 changed files
--- a/plugins/drude/platforms/opencl/src/OpenCLDrudeKernelSources.h.in
+++ b/plugins/drude/platforms/opencl/src/OpenCLDrudeKernelSources.h.in
-#ifndef OPENMM_OPENCLDRUDEKERNELSOURCES_H_
-#define OPENMM_OPENCLDRUDEKERNELSOURCES_H_
+#ifndef OPENMM_COMMONDRUDEKERNELSOURCES_H_
+#define OPENMM_COMMONDRUDEKERNELSOURCES_H_

 /* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
@@ -32,16 +32,16 @@
 namespace OpenMM {

 /**
- * This class is a central holding place for the source code of OpenCL kernels.
- * The CMake build script inserts declarations into it based on the .cu files in the
+ * This class is a central holding place for the source code of device kernels.
+ * The CMake build script inserts declarations into it based on the .cc files in the
 * kernels subfolder.
 */

-class OpenCLDrudeKernelSources {
+class CommonDrudeKernelSources {
 public:
-@CL_FILE_DECLARATIONS@
+@KERNEL_FILE_DECLARATIONS@
 };

 } // namespace OpenMM

-#endif /*OPENMM_OPENCLDRUDEKERNELSOURCES_H_*/
+#endif /*OPENMM_COMMONDRUDEKERNELSOURCES_H_*/
--- a/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.cpp
+++ b/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2019 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -29,29 +29,22 @@
 * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
 * -------------------------------------------------------------------------- */

-#include "OpenCLDrudeKernels.h"
-#include "OpenCLDrudeKernelSources.h"
+#include "CommonDrudeKernels.h"
+#include "CommonDrudeKernelSources.h"
 #include "openmm/internal/ContextImpl.h"
-#include "OpenCLBondedUtilities.h"
-#include "OpenCLForceInfo.h"
-#include "OpenCLIntegrationUtilities.h"
-#include "OpenCLKernelSources.h"
+#include "openmm/common/BondedUtilities.h"
+#include "openmm/common/ComputeForceInfo.h"
+#include "openmm/common/IntegrationUtilities.h"
+#include "CommonKernelSources.h"
 #include "SimTKOpenMMRealType.h"
 #include <set>

 using namespace OpenMM;
 using namespace std;

-static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
-    if (cl.getUseMixedPrecision())
-        kernel.setArg<cl::Buffer>(index, cl.getPosqCorrection().getDeviceBuffer());
-    else
-        kernel.setArg<void*>(index, NULL);
-}
-
-class OpenCLDrudeForceInfo : public OpenCLForceInfo {
+class CommonDrudeForceInfo : public ComputeForceInfo {
 public:
-    OpenCLDrudeForceInfo(const DrudeForce& force) : OpenCLForceInfo(0), force(force) {
+    CommonDrudeForceInfo(const DrudeForce& force) : force(force) {
    }
    int getNumParticleGroups() {
        return force.getNumParticles()+force.getNumScreenedPairs();
@@ -107,15 +100,16 @@ private:
    const DrudeForce& force;
 };

-void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) {
-    if (cl.getContextIndex() != 0)
+void CommonCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) {
+    cc.setAsCurrent();
+    if (cc.getContextIndex() != 0)
        return; // This is run entirely on one device
    int numParticles = force.getNumParticles();
    if (numParticles > 0) {
        // Create the harmonic interaction .
        
        vector<vector<int> > atoms(numParticles, vector<int>(5));
-        particleParams.initialize<mm_float4>(cl, numParticles, "drudeParticleParams");
+        particleParams.initialize<mm_float4>(cc, numParticles, "drudeParticleParams");
        vector<mm_float4> paramVector(numParticles);
        for (int i = 0; i < numParticles; i++) {
            double charge, polarizability, aniso12, aniso34;
@@ -139,15 +133,15 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
        }
        particleParams.upload(paramVector);
        map<string, string> replacements;
-        replacements["PARAMS"] = cl.getBondedUtilities().addArgument(particleParams.getDeviceBuffer(), "float4");
-        cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup());
+        replacements["PARAMS"] = cc.getBondedUtilities().addArgument(particleParams, "float4");
+        cc.getBondedUtilities().addInteraction(atoms, cc.replaceStrings(CommonDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup());
    }
    int numPairs = force.getNumScreenedPairs();
    if (numPairs > 0) {
        // Create the screened interaction between dipole pairs.
        
        vector<vector<int> > atoms(numPairs, vector<int>(4));
-        pairParams.initialize<mm_float2>(cl, numPairs, "drudePairParams");
+        pairParams.initialize<mm_float2>(cc, numPairs, "drudePairParams");
        vector<mm_float2> paramVector(numPairs);
        for (int i = 0; i < numPairs; i++) {
            int drude1, drude2;
@@ -163,18 +157,18 @@ void OpenCLCalcDrudeForceKernel::initialize(const System& system, const DrudeFor
        }
        pairParams.upload(paramVector);
        map<string, string> replacements;
-        replacements["PARAMS"] = cl.getBondedUtilities().addArgument(pairParams.getDeviceBuffer(), "float2");
-        cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLDrudeKernelSources::drudePairForce, replacements), force.getForceGroup());
+        replacements["PARAMS"] = cc.getBondedUtilities().addArgument(pairParams, "float2");
+        cc.getBondedUtilities().addInteraction(atoms, cc.replaceStrings(CommonDrudeKernelSources::drudePairForce, replacements), force.getForceGroup());
    }
-    cl.addForce(new OpenCLDrudeForceInfo(force));
+    cc.addForce(new CommonDrudeForceInfo(force));
 }

-double OpenCLCalcDrudeForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
+double CommonCalcDrudeForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
    return 0.0;
 }

-void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, const DrudeForce& force) {
-    if (cl.getContextIndex() != 0)
+void CommonCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, const DrudeForce& force) {
+    if (cc.getContextIndex() != 0)
        return; // This is run entirely on one device
    
    // Set the particle parameters.
@@ -226,9 +220,9 @@ void OpenCLCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, c
    }
 }

-void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
-    cl.getPlatformData().initializeContexts(system);
-    cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
+void CommonIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
+    cc.initializeContexts();
+    cc.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
    
    // Identify particle pairs and ordinary particles.
    
@@ -246,8 +240,8 @@ void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, co
        pairParticleVec.push_back(mm_int2(p, p1));
    }
    normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end());
-    normalParticles.initialize<int>(cl, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
-    pairParticles.initialize<cl_int2>(cl, max((int) pairParticleVec.size(), 1), "drudePairParticles");
+    normalParticles.initialize<int>(cc, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
+    pairParticles.initialize<mm_int2>(cc, max((int) pairParticleVec.size(), 1), "drudePairParticles");
    if (normalParticleVec.size() > 0)
        normalParticles.upload(normalParticleVec);
    if (pairParticleVec.size() > 0)
@@ -256,61 +250,67 @@ void OpenCLIntegrateDrudeLangevinStepKernel::initialize(const System& system, co
    // Create kernels.
    
    map<string, string> defines;
-    defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
-    defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
-    defines["NUM_NORMAL_PARTICLES"] = cl.intToString(normalParticleVec.size());
-    defines["NUM_PAIRS"] = cl.intToString(pairParticleVec.size());
+    defines["NUM_ATOMS"] = cc.intToString(cc.getNumAtoms());
+    defines["PADDED_NUM_ATOMS"] = cc.intToString(cc.getPaddedNumAtoms());
+    defines["NUM_NORMAL_PARTICLES"] = cc.intToString(normalParticleVec.size());
+    defines["NUM_PAIRS"] = cc.intToString(pairParticleVec.size());
    map<string, string> replacements;
-    cl::Program program = cl.createProgram(OpenCLDrudeKernelSources::drudeLangevin, defines, "");
-    kernel1 = cl::Kernel(program, "integrateDrudeLangevinPart1");
-    kernel2 = cl::Kernel(program, "integrateDrudeLangevinPart2");
-    hardwallKernel = cl::Kernel(program, "applyHardWallConstraints");
+    ComputeProgram program = cc.compileProgram(CommonDrudeKernelSources::drudeLangevin, defines);
+    kernel1 = program->createKernel("integrateDrudeLangevinPart1");
+    kernel2 = program->createKernel("integrateDrudeLangevinPart2");
+    hardwallKernel = program->createKernel("applyHardWallConstraints");
    prevStepSize = -1.0;
 }

-void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
-    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
-    int numAtoms = cl.getNumAtoms();
+void CommonIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
+    cc.setAsCurrent();
+    IntegrationUtilities& integration = cc.getIntegrationUtilities();
+    int numAtoms = cc.getNumAtoms();
    if (!hasInitializedKernels) {
        hasInitializedKernels = true;
-        kernel1.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(3, normalParticles.getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(4, pairParticles.getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(5, integration.getStepSize().getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(12, integration.getRandom().getDeviceBuffer());
-        kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
-        if (cl.getUseMixedPrecision())
-            kernel2.setArg<cl::Buffer>(1, cl.getPosqCorrection().getDeviceBuffer());
+        kernel1->addArg(cc.getVelm());
+        kernel1->addArg(cc.getLongForceBuffer());
+        kernel1->addArg(integration.getPosDelta());
+        kernel1->addArg(normalParticles);
+        kernel1->addArg(pairParticles);
+        kernel1->addArg(integration.getStepSize());
+        for (int i = 0; i < 6; i++)
+            kernel1->addArg();
+        kernel1->addArg(integration.getRandom());
+        kernel1->addArg();
+        kernel2->addArg(cc.getPosq());
+        if (cc.getUseMixedPrecision())
+            kernel2->addArg(cc.getPosqCorrection());
        else
-            kernel2.setArg<void*>(1, NULL);
-        kernel2.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
-        kernel2.setArg<cl::Buffer>(3, cl.getVelm().getDeviceBuffer());
-        kernel2.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
-        hardwallKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
-        if (cl.getUseMixedPrecision())
-            hardwallKernel.setArg<cl::Buffer>(1, cl.getPosqCorrection().getDeviceBuffer());
+            kernel2->addArg(NULL);
+        kernel2->addArg(integration.getPosDelta());
+        kernel2->addArg(cc.getVelm());
+        kernel2->addArg(integration.getStepSize());
+        hardwallKernel->addArg(cc.getPosq());
+        if (cc.getUseMixedPrecision())
+            hardwallKernel->addArg(cc.getPosqCorrection());
        else
-            hardwallKernel.setArg<void*>(1, NULL);
-        hardwallKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
-        hardwallKernel.setArg<cl::Buffer>(3, pairParticles.getDeviceBuffer());
-        hardwallKernel.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
+            hardwallKernel->addArg(NULL);
+        hardwallKernel->addArg(cc.getVelm());
+        hardwallKernel->addArg(pairParticles);
+        hardwallKernel->addArg(integration.getStepSize());
+        hardwallKernel->addArg();
+        hardwallKernel->addArg();
    }
    
    // Compute integrator coefficients.
    
    double stepSize = integrator.getStepSize();
    double vscale = exp(-stepSize*integrator.getFriction());
-    double fscale = (1-vscale)/integrator.getFriction();
+    double fscale = (1-vscale)/integrator.getFriction()/(double) 0x100000000;
    double noisescale = sqrt(2*BOLTZ*integrator.getTemperature()*integrator.getFriction())*sqrt(0.5*(1-vscale*vscale)/integrator.getFriction());
    double vscaleDrude = exp(-stepSize*integrator.getDrudeFriction());
-    double fscaleDrude = (1-vscaleDrude)/integrator.getDrudeFriction();
+    double fscaleDrude = (1-vscaleDrude)/integrator.getDrudeFriction()/(double) 0x100000000;
    double noisescaleDrude = sqrt(2*BOLTZ*integrator.getDrudeTemperature()*integrator.getDrudeFriction())*sqrt(0.5*(1-vscaleDrude*vscaleDrude)/integrator.getDrudeFriction());
    double maxDrudeDistance = integrator.getMaxDrudeDistance();
    double hardwallscaleDrude = sqrt(BOLTZ*integrator.getDrudeTemperature());
    if (stepSize != prevStepSize) {
-        if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
+        if (cc.getUseDoublePrecision() || cc.getUseMixedPrecision()) {
            mm_double2 ss = mm_double2(0, stepSize);
            integration.getStepSize().upload(&ss);
        }
@@ -320,31 +320,31 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const
        }
        prevStepSize = stepSize;
    }
-    if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
-            kernel1.setArg<cl_double>(6, vscale);
-            kernel1.setArg<cl_double>(7, fscale);
-            kernel1.setArg<cl_double>(8, noisescale);
-            kernel1.setArg<cl_double>(9, vscaleDrude);
-            kernel1.setArg<cl_double>(10, fscaleDrude);
-            kernel1.setArg<cl_double>(11, noisescaleDrude);
-            hardwallKernel.setArg<cl_double>(5, maxDrudeDistance);
-            hardwallKernel.setArg<cl_double>(6, hardwallscaleDrude);
+    if (cc.getUseDoublePrecision() || cc.getUseMixedPrecision()) {
+            kernel1->setArg(6, vscale);
+            kernel1->setArg(7, fscale);
+            kernel1->setArg(8, noisescale);
+            kernel1->setArg(9, vscaleDrude);
+            kernel1->setArg(10, fscaleDrude);
+            kernel1->setArg(11, noisescaleDrude);
+            hardwallKernel->setArg(5, maxDrudeDistance);
+            hardwallKernel->setArg(6, hardwallscaleDrude);
    }
    else {
-            kernel1.setArg<cl_float>(6, (cl_float) vscale);
-            kernel1.setArg<cl_float>(7, (cl_float) fscale);
-            kernel1.setArg<cl_float>(8, (cl_float) noisescale);
-            kernel1.setArg<cl_float>(9, (cl_float) vscaleDrude);
-            kernel1.setArg<cl_float>(10, (cl_float) fscaleDrude);
-            kernel1.setArg<cl_float>(11, (cl_float) noisescaleDrude);
-            hardwallKernel.setArg<cl_float>(5, (cl_float) maxDrudeDistance);
-            hardwallKernel.setArg<cl_float>(6, (cl_float) hardwallscaleDrude);
+            kernel1->setArg(6, (float) vscale);
+            kernel1->setArg(7, (float) fscale);
+            kernel1->setArg(8, (float) noisescale);
+            kernel1->setArg(9, (float) vscaleDrude);
+            kernel1->setArg(10, (float) fscaleDrude);
+            kernel1->setArg(11, (float) noisescaleDrude);
+            hardwallKernel->setArg(5, (float) maxDrudeDistance);
+            hardwallKernel->setArg(6, (float) hardwallscaleDrude);
    }

    // Call the first integration kernel.

-    kernel1.setArg<cl_uint>(13, integration.prepareRandomNumbers(normalParticles.getSize()+2*pairParticles.getSize()));
-    cl.executeKernel(kernel1, numAtoms);
+    kernel1->setArg(13, integration.prepareRandomNumbers(normalParticles.getSize()+2*pairParticles.getSize()));
+    kernel1->execute(numAtoms);

    // Apply constraints.

@@ -352,32 +352,33 @@ void OpenCLIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const

    // Call the second integration kernel.

-    cl.executeKernel(kernel2, numAtoms);
+    kernel2->execute(numAtoms);
    
    // Apply hard wall constraints.
    
    if (maxDrudeDistance > 0)
-        cl.executeKernel(hardwallKernel, pairParticles.getSize());
+        hardwallKernel->execute(pairParticles.getSize());
    integration.computeVirtualSites();

    // Update the time and step count.

-    cl.setTime(cl.getTime()+stepSize);
-    cl.setStepCount(cl.getStepCount()+1);
-    cl.reorderAtoms();
+    cc.setTime(cc.getTime()+stepSize);
+    cc.setStepCount(cc.getStepCount()+1);
+    cc.reorderAtoms();
 }

-double OpenCLIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
-    return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
+double CommonIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
+    return cc.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
 }

-OpenCLIntegrateDrudeSCFStepKernel::~OpenCLIntegrateDrudeSCFStepKernel() {
+CommonIntegrateDrudeSCFStepKernel::~CommonIntegrateDrudeSCFStepKernel() {
    if (minimizerPos != NULL)
        lbfgs_free(minimizerPos);
 }

-void OpenCLIntegrateDrudeSCFStepKernel::initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force) {
-    cl.getPlatformData().initializeContexts(system);
+void CommonIntegrateDrudeSCFStepKernel::initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force) {
+    cc.initializeContexts();
+    cc.setAsCurrent();

    // Identify Drude particles.
    
@@ -398,49 +399,53 @@ void OpenCLIntegrateDrudeSCFStepKernel::initialize(const System& system, const D

    // Create the kernels.
    
-    cl::Program program = cl.createProgram(OpenCLKernelSources::verlet, "");
-    kernel1 = cl::Kernel(program, "integrateVerletPart1");
-    kernel2 = cl::Kernel(program, "integrateVerletPart2");
+    ComputeProgram program = cc.compileProgram(CommonKernelSources::verlet);
+    kernel1 = program->createKernel("integrateVerletPart1");
+    kernel2 = program->createKernel("integrateVerletPart2");
    prevStepSize = -1.0;
 }

-void OpenCLIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
-    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
-    int numAtoms = cl.getNumAtoms();
+void CommonIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
+    cc.setAsCurrent();
+    IntegrationUtilities& integration = cc.getIntegrationUtilities();
+    int numAtoms = cc.getNumAtoms();
    double dt = integrator.getStepSize();
    if (!hasInitializedKernels) {
        hasInitializedKernels = true;
-        kernel1.setArg<cl_int>(0, numAtoms);
-        kernel1.setArg<cl::Buffer>(1, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
-        setPosqCorrectionArg(cl, kernel1, 3);
-        kernel1.setArg<cl::Buffer>(4, cl.getVelm().getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(5, cl.getForce().getDeviceBuffer());
-        kernel1.setArg<cl::Buffer>(6, integration.getPosDelta().getDeviceBuffer());
-        kernel2.setArg<cl_int>(0, numAtoms);
-        kernel2.setArg<cl::Buffer>(1, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
-        kernel2.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
-        setPosqCorrectionArg(cl, kernel2, 3);
-        kernel2.setArg<cl::Buffer>(4, cl.getVelm().getDeviceBuffer());
-        kernel2.setArg<cl::Buffer>(5, integration.getPosDelta().getDeviceBuffer());
+        kernel1->addArg(numAtoms);
+        kernel1->addArg(cc.getPaddedNumAtoms());
+        kernel1->addArg(cc.getIntegrationUtilities().getStepSize());
+        kernel1->addArg(cc.getPosq());
+        kernel1->addArg(cc.getVelm());
+        kernel1->addArg(cc.getLongForceBuffer());
+        kernel1->addArg(integration.getPosDelta());
+        if (cc.getUseMixedPrecision())
+            kernel1->addArg(cc.getPosqCorrection());
+        kernel2->addArg(numAtoms);
+        kernel2->addArg(cc.getIntegrationUtilities().getStepSize());
+        kernel2->addArg(cc.getPosq());
+        kernel2->addArg(cc.getVelm());
+        kernel2->addArg(integration.getPosDelta());
+        if (cc.getUseMixedPrecision())
+            kernel2->addArg(cc.getPosqCorrection());
    }
    if (dt != prevStepSize) {
-        if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
+        if (cc.getUseDoublePrecision() || cc.getUseMixedPrecision()) {
            vector<mm_double2> stepSizeVec(1);
            stepSizeVec[0] = mm_double2(dt, dt);
-            cl.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
+            cc.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
        }
        else {
            vector<mm_float2> stepSizeVec(1);
-            stepSizeVec[0] = mm_float2((cl_float) dt, (cl_float) dt);
-            cl.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
+            stepSizeVec[0] = mm_float2((float) dt, (float) dt);
+            cc.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
        }
        prevStepSize = dt;
    }

    // Call the first integration kernel.

-    cl.executeKernel(kernel1, numAtoms);
+    kernel1->execute(numAtoms);

    // Apply constraints.

@@ -448,7 +453,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const Drud

    // Call the second integration kernel.

-    cl.executeKernel(kernel2, numAtoms);
+    kernel2->execute(numAtoms);

    // Update the positions of virtual sites and Drude particles.

@@ -457,40 +462,40 @@ void OpenCLIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const Drud

    // Update the time and step count.

-    cl.setTime(cl.getTime()+dt);
-    cl.setStepCount(cl.getStepCount()+1);
-    cl.reorderAtoms();
+    cc.setTime(cc.getTime()+dt);
+    cc.setStepCount(cc.getStepCount()+1);
+    cc.reorderAtoms();
    
    // Reduce UI lag.
    
 #ifdef WIN32
-    cl.getQueue().flush();
+    cc.flushQueue();
 #endif
 }

-double OpenCLIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
-    return cl.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
+double CommonIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
+    return cc.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
 }

 struct MinimizerData {
    ContextImpl& context;
-    OpenCLContext& cl;
+    ComputeContext& cc;
    vector<int>& drudeParticles;
-    MinimizerData(ContextImpl& context, OpenCLContext& cl, vector<int>& drudeParticles) : context(context), cl(cl), drudeParticles(drudeParticles) {}
+    MinimizerData(ContextImpl& context, ComputeContext& cc, vector<int>& drudeParticles) : context(context), cc(cc), drudeParticles(drudeParticles) {}
 };

 static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
    MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
    ContextImpl& context = data->context;
-    OpenCLContext& cl = data->cl;
+    ComputeContext& cc = data->cc;
    vector<int>& drudeParticles = data->drudeParticles;
    int numDrudeParticles = drudeParticles.size();

    // Set the particle positions.
    
-    cl.getPosq().download(cl.getPinnedBuffer());
-    if (cl.getUseDoublePrecision()) {
-        mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
+    cc.getPosq().download(cc.getPinnedBuffer());
+    if (cc.getUseDoublePrecision()) {
+        mm_double4* posq = (mm_double4*) cc.getPinnedBuffer();
        for (int i = 0; i < numDrudeParticles; ++i) {
            mm_double4& p = posq[drudeParticles[i]];
            p.x = x[3*i];
@@ -499,7 +504,7 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
        }
    }
    else {
-        mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
+        mm_float4* posq = (mm_float4*) cc.getPinnedBuffer();
        for (int i = 0; i < numDrudeParticles; ++i) {
            mm_float4& p = posq[drudeParticles[i]];
            p.x = x[3*i];
@@ -507,40 +512,31 @@ static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsf
            p.z = x[3*i+2];
        }
    }
-    cl.getPosq().upload(cl.getPinnedBuffer());
+    cc.getPosq().upload(cc.getPinnedBuffer());

    // Compute the forces and energy for this configuration.

    double energy = context.calcForcesAndEnergy(true, true);
-    cl.getForce().download(cl.getPinnedBuffer());
-    if (cl.getUseDoublePrecision()) {
-        mm_double4* force = (mm_double4*) cl.getPinnedBuffer();
-        for (int i = 0; i < numDrudeParticles; ++i) {
-            int index = drudeParticles[i];
-            g[3*i] = -force[index].x;
-            g[3*i+1] = -force[index].y;
-            g[3*i+2] = -force[index].z;
-        }
-    }
-    else {
-        mm_float4* force = (mm_float4*) cl.getPinnedBuffer();
-        for (int i = 0; i < numDrudeParticles; ++i) {
-            int index = drudeParticles[i];
-            g[3*i] = -force[index].x;
-            g[3*i+1] = -force[index].y;
-            g[3*i+2] = -force[index].z;
-        }
+    long long* force = (long long*) cc.getPinnedBuffer();
+    cc.getLongForceBuffer().download(force);
+    double forceScale = -1.0/0x100000000;
+    int paddedNumAtoms = cc.getPaddedNumAtoms();
+    for (int i = 0; i < numDrudeParticles; ++i) {
+        int index = drudeParticles[i];
+        g[3*i] = forceScale*force[index];
+        g[3*i+1] = forceScale*force[index+paddedNumAtoms];
+        g[3*i+2] = forceScale*force[index+paddedNumAtoms*2];
    }
    return energy;
 }

-void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
+void CommonIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
    // Record the initial positions.

    int numDrudeParticles = drudeParticles.size();
-    cl.getPosq().download(cl.getPinnedBuffer());
-    if (cl.getUseDoublePrecision()) {
-        mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
+    cc.getPosq().download(cc.getPinnedBuffer());
+    if (cc.getUseDoublePrecision()) {
+        mm_double4* posq = (mm_double4*) cc.getPinnedBuffer();
        for (int i = 0; i < numDrudeParticles; ++i) {
            mm_double4 p = posq[drudeParticles[i]];
            minimizerPos[3*i] = p.x;
@@ -549,7 +545,7 @@ void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double to
        }
    }
    else {
-        mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
+        mm_float4* posq = (mm_float4*) cc.getPinnedBuffer();
        for (int i = 0; i < numDrudeParticles; ++i) {
            mm_float4 p = posq[drudeParticles[i]];
            minimizerPos[3*i] = p.x;
@@ -571,6 +567,6 @@ void OpenCLIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double to
    // Perform the minimization.

    lbfgsfloatval_t fx;
-    MinimizerData data(context, cl, drudeParticles);
+    MinimizerData data(context, cc, drudeParticles);
    lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
 }
\ No newline at end of file
--- a/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
+++ b/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
-#ifndef OPENCL_DRUDE_KERNELS_H_
-#define OPENCL_DRUDE_KERNELS_H_
+#ifndef COMMON_DRUDE_KERNELS_H_
+#define COMMON_DRUDE_KERNELS_H_

 /* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2019 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -33,8 +33,8 @@
 * -------------------------------------------------------------------------- */

 #include "openmm/DrudeKernels.h"
-#include "OpenCLContext.h"
-#include "OpenCLArray.h"
+#include "openmm/common/ComputeContext.h"
+#include "openmm/common/ComputeArray.h"
 #include "lbfgs.h"

 namespace OpenMM {
@@ -42,10 +42,10 @@ namespace OpenMM {
 /**
 * This kernel is invoked by DrudeForce to calculate the forces acting on the system and the energy of the system.
 */
-class OpenCLCalcDrudeForceKernel : public CalcDrudeForceKernel {
+class CommonCalcDrudeForceKernel : public CalcDrudeForceKernel {
 public:
-    OpenCLCalcDrudeForceKernel(const std::string& name, const Platform& platform, OpenCLContext& cl) :
-            CalcDrudeForceKernel(name, platform), cl(cl) {
+    CommonCalcDrudeForceKernel(const std::string& name, const Platform& platform, ComputeContext& cc) :
+            CalcDrudeForceKernel(name, platform), cc(cc) {
    }
    /**
     * Initialize the kernel.
@@ -71,18 +71,18 @@ public:
     */
    void copyParametersToContext(ContextImpl& context, const DrudeForce& force);
 private:
-    OpenCLContext& cl;
-    OpenCLArray particleParams;
-    OpenCLArray pairParams;
+    ComputeContext& cc;
+    ComputeArray particleParams;
+    ComputeArray pairParams;
 };

 /**
 * This kernel is invoked by DrudeLangevinIntegrator to take one time step
 */
-class OpenCLIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
+class CommonIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
 public:
-    OpenCLIntegrateDrudeLangevinStepKernel(const std::string& name, const Platform& platform, OpenCLContext& cl) :
-            IntegrateDrudeLangevinStepKernel(name, platform), cl(cl), hasInitializedKernels(false) {
+    CommonIntegrateDrudeLangevinStepKernel(const std::string& name, const Platform& platform, ComputeContext& cc) :
+            IntegrateDrudeLangevinStepKernel(name, platform), cc(cc), hasInitializedKernels(false) {
    }
    /**
     * Initialize the kernel.
@@ -107,23 +107,23 @@ public:
     */
    double computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator);
 private:
-    OpenCLContext& cl;
-    bool hasInitializedKernels;
+    ComputeContext& cc;
    double prevStepSize;
-    OpenCLArray normalParticles;
-    OpenCLArray pairParticles;
-    cl::Kernel kernel1, kernel2, hardwallKernel;
+    bool hasInitializedKernels;
+    ComputeArray normalParticles;
+    ComputeArray pairParticles;
+    ComputeKernel kernel1, kernel2, hardwallKernel;
 };

 /**
 * This kernel is invoked by DrudeSCFIntegrator to take one time step
 */
-class OpenCLIntegrateDrudeSCFStepKernel : public IntegrateDrudeSCFStepKernel {
+class CommonIntegrateDrudeSCFStepKernel : public IntegrateDrudeSCFStepKernel {
 public:
-    OpenCLIntegrateDrudeSCFStepKernel(const std::string& name, const Platform& platform, OpenCLContext& cl) :
-            IntegrateDrudeSCFStepKernel(name, platform), cl(cl), hasInitializedKernels(false), minimizerPos(NULL) {
+    CommonIntegrateDrudeSCFStepKernel(const std::string& name, const Platform& platform, ComputeContext& cc) :
+            IntegrateDrudeSCFStepKernel(name, platform), cc(cc), minimizerPos(NULL), hasInitializedKernels(false) {
    }
-    ~OpenCLIntegrateDrudeSCFStepKernel();
+    ~CommonIntegrateDrudeSCFStepKernel();
    /**
     * Initialize the kernel.
     *
@@ -148,15 +148,15 @@ public:
    double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator);
 private:
    void minimize(ContextImpl& context, double tolerance);
-    OpenCLContext& cl;
-    bool hasInitializedKernels;
+    ComputeContext& cc;
    double prevStepSize;
+    bool hasInitializedKernels;
    std::vector<int> drudeParticles;
    lbfgsfloatval_t *minimizerPos;
    lbfgs_parameter_t minimizerParams;
-    cl::Kernel kernel1, kernel2;
+    ComputeKernel kernel1, kernel2;
 };

 } // namespace OpenMM

-#endif /*OPENCL_DRUDE_KERNELS_H_*/
+#endif /*COMMON_DRUDE_KERNELS_H_*/
--- a/plugins/drude/platforms/cuda/src/kernels/drudeLangevin.cu
+++ b/plugins/drude/platforms/cuda/src/kernels/drudeLangevin.cu
@@ -2,14 +2,14 @@
 * Perform the first step of Langevin integration.
 */

-extern "C" __global__ void integrateDrudeLangevinPart1(mixed4* __restrict__ velm, const long long* __restrict__ force, mixed4* __restrict__ posDelta,
-        const int* __restrict__ normalParticles, const int2* __restrict__ pairParticles, const mixed2* __restrict__ dt, mixed vscale, mixed fscale,
-        mixed noisescale, mixed vscaleDrude, mixed fscaleDrude, mixed noisescaleDrude, const float4* __restrict__ random, unsigned int randomIndex) {
+KERNEL void integrateDrudeLangevinPart1(GLOBAL mixed4* RESTRICT velm, GLOBAL const mm_long* RESTRICT force, GLOBAL mixed4* RESTRICT posDelta,
+        GLOBAL const int* RESTRICT normalParticles, GLOBAL const int2* RESTRICT pairParticles, GLOBAL const mixed2* RESTRICT dt, mixed vscale, mixed fscale,
+        mixed noisescale, mixed vscaleDrude, mixed fscaleDrude, mixed noisescaleDrude, GLOBAL const float4* RESTRICT random, unsigned int randomIndex) {
    mixed stepSize = dt[0].y;
    
    // Update normal particles.

-    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_NORMAL_PARTICLES; i += blockDim.x*gridDim.x) {
+    for (int i = GLOBAL_ID; i < NUM_NORMAL_PARTICLES; i += GLOBAL_SIZE) {
        int index = normalParticles[i];
        mixed4 velocity = velm[index];
        if (velocity.w != 0) {
@@ -26,7 +26,7 @@ extern "C" __global__ void integrateDrudeLangevinPart1(mixed4* __restrict__ velm
    // Update Drude particle pairs.
    
    randomIndex += NUM_NORMAL_PARTICLES;
-    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_PAIRS; i += blockDim.x*gridDim.x) {
+    for (int i = GLOBAL_ID; i < NUM_PAIRS; i += GLOBAL_SIZE) {
        int2 particles = pairParticles[i];
        mixed4 velocity1 = velm[particles.x];
        mixed4 velocity2 = velm[particles.y];
@@ -69,14 +69,17 @@ extern "C" __global__ void integrateDrudeLangevinPart1(mixed4* __restrict__ velm
 * Perform the second step of Langevin integration.
 */

-extern "C" __global__ void integrateDrudeLangevinPart2(real4* __restrict__ posq, real4* __restrict__ posqCorrection, const mixed4* __restrict__ posDelta, mixed4* __restrict__ velm, const mixed2* __restrict__ dt) {
+KERNEL void integrateDrudeLangevinPart2(GLOBAL real4* RESTRICT posq, GLOBAL real4* RESTRICT posqCorrection, GLOBAL const mixed4* RESTRICT posDelta, GLOBAL mixed4* RESTRICT velm, GLOBAL const mixed2* RESTRICT dt) {
+#ifdef SUPPORTS_DOUBLE_PRECISION
    double invStepSize = 1.0/dt[0].y;
-    int index = blockIdx.x*blockDim.x+threadIdx.x;
+#else
+    float invStepSize = 1.0f/dt[0].y;
+#endif
+    int index = GLOBAL_ID;
    while (index < NUM_ATOMS) {
        mixed4 vel = velm[index];
        if (vel.w != 0) {
 #ifdef USE_MIXED_PRECISION
- 
            real4 pos1 = posq[index];
            real4 pos2 = posqCorrection[index];
            mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
@@ -98,17 +101,17 @@ extern "C" __global__ void integrateDrudeLangevinPart2(real4* __restrict__ posq,
 #endif
            velm[index] = vel;
        }
-        index += blockDim.x*gridDim.x;
+        index += GLOBAL_SIZE;
    }
 }

 /**
 * Apply hard wall constraints
 */
-extern "C" __global__ void applyHardWallConstraints(real4* __restrict__ posq, real4* __restrict__ posqCorrection, mixed4* __restrict__ velm,
-        const int2* __restrict__ pairParticles, const mixed2* __restrict__ dt, mixed maxDrudeDistance, mixed hardwallscaleDrude) {
+KERNEL void applyHardWallConstraints(GLOBAL real4* RESTRICT posq, GLOBAL real4* RESTRICT posqCorrection, GLOBAL mixed4* RESTRICT velm,
+        GLOBAL const int2* RESTRICT pairParticles, GLOBAL const mixed2* RESTRICT dt, mixed maxDrudeDistance, mixed hardwallscaleDrude) {
    mixed stepSize = dt[0].y;
-    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_PAIRS; i += blockDim.x*gridDim.x) {
+    for (int i = GLOBAL_ID; i < NUM_PAIRS; i += GLOBAL_SIZE) {
        int2 particles = pairParticles[i];
 #ifdef USE_MIXED_PRECISION
        real4 posReal1 = posq[particles.x];

--- a/plugins/drude/platforms/cuda/src/kernels/drudePairForce.cu
+++ b/plugins/drude/platforms/cuda/src/kernels/drudePairForce.cu
--- a/plugins/drude/platforms/cuda/src/kernels/drudeParticleForce.cu
+++ b/plugins/drude/platforms/cuda/src/kernels/drudeParticleForce.cu
--- a/plugins/drude/platforms/cuda/CMakeLists.txt
+++ b/plugins/drude/platforms/cuda/CMakeLists.txt
@@ -12,7 +12,7 @@

 # The source is organized into subdirectories, but we handle them all from
 # this CMakeLists file rather than letting CMake visit them as SUBDIRS.
-SET(OPENMM_SOURCE_SUBDIRS .)
+SET(OPENMM_SOURCE_SUBDIRS . ../common)


 # Collect up information about the version of the OpenMM library we're building
@@ -59,32 +59,25 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
    INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
 ENDFOREACH(subdir)

+SET(COMMON_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/../common/src/CommonDrudeKernelSources.cpp)
+SET(SOURCE_FILES ${SOURCE_FILES} ${COMMON_KERNELS_CPP})
+
 INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/../common/src)
 INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/cuda/include)
 INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/cuda/src)
 INCLUDE_DIRECTORIES(BEFORE ${CMAKE_BINARY_DIR}/platforms/cuda/src)
-
-# Set variables needed for encoding kernel sources into a C++ class
-
-SET(CUDA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
-SET(CUDA_SOURCE_CLASS CudaDrudeKernelSources)
-SET(CUDA_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${CUDA_SOURCE_CLASS}.cpp)
-SET(CUDA_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CUDA_SOURCE_CLASS}.h)
-SET(SOURCE_FILES ${SOURCE_FILES} ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H})
-INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/common/include)
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_BINARY_DIR}/platforms/common/src)
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/../common/src)

 # Create the library

 INCLUDE_DIRECTORIES(${CUDA_TOOLKIT_INCLUDE})

-FILE(GLOB CUDA_KERNELS ${CUDA_SOURCE_DIR}/kernels/*.cu)
-ADD_CUSTOM_COMMAND(OUTPUT ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H}
-    COMMAND ${CMAKE_COMMAND}
-    ARGS -D CUDA_SOURCE_DIR=${CUDA_SOURCE_DIR} -D CUDA_KERNELS_CPP=${CUDA_KERNELS_CPP} -D CUDA_KERNELS_H=${CUDA_KERNELS_H} -D CUDA_SOURCE_CLASS=${CUDA_SOURCE_CLASS} -P ${CMAKE_SOURCE_DIR}/platforms/cuda/EncodeCUDAFiles.cmake
-    DEPENDS ${CUDA_KERNELS}
-)
-SET_SOURCE_FILES_PROPERTIES(${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H} PROPERTIES GENERATED TRUE)
+SET_SOURCE_FILES_PROPERTIES(${COMMON_KERNELS_CPP} PROPERTIES GENERATED TRUE)
 ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
+ADD_DEPENDENCIES(${SHARED_TARGET} DrudeCommonKernels)

 TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} ${PTHREADS_LIB})
 TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME}CUDA)

--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernelFactory.cpp
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernelFactory.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2012 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2019 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -27,7 +27,8 @@
 #include <exception>

 #include "CudaDrudeKernelFactory.h"
-#include "CudaDrudeKernels.h"
+#include "CommonDrudeKernels.h"
+#include "CudaContext.h"
 #include "openmm/internal/windowsExport.h"
 #include "openmm/internal/ContextImpl.h"
 #include "openmm/OpenMMException.h"
@@ -63,10 +64,10 @@ extern "C" OPENMM_EXPORT void registerDrudeCudaKernelFactories() {
 KernelImpl* CudaDrudeKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
    CudaContext& cu = *static_cast<CudaPlatform::PlatformData*>(context.getPlatformData())->contexts[0];
    if (name == CalcDrudeForceKernel::Name())
-        return new CudaCalcDrudeForceKernel(name, platform, cu);
+        return new CommonCalcDrudeForceKernel(name, platform, cu);
    if (name == IntegrateDrudeLangevinStepKernel::Name())
-        return new CudaIntegrateDrudeLangevinStepKernel(name, platform, cu);
+        return new CommonIntegrateDrudeLangevinStepKernel(name, platform, cu);
    if (name == IntegrateDrudeSCFStepKernel::Name())
-        return new CudaIntegrateDrudeSCFStepKernel(name, platform, cu);
+        return new CommonIntegrateDrudeSCFStepKernel(name, platform, cu);
    throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
 }
--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "CudaDrudeKernels.h"
-#include "CudaDrudeKernelSources.h"
-#include "openmm/internal/ContextImpl.h"
-#include "CudaBondedUtilities.h"
-#include "CudaForceInfo.h"
-#include "CudaIntegrationUtilities.h"
-#include "CudaKernelSources.h"
-#include "SimTKOpenMMRealType.h"
-#include <set>
-
-using namespace OpenMM;
-using namespace std;
-
-class CudaDrudeForceInfo : public CudaForceInfo {
-public:
-    CudaDrudeForceInfo(const DrudeForce& force) : force(force) {
-    }
-    int getNumParticleGroups() {
-        return force.getNumParticles()+force.getNumScreenedPairs();
-    }
-    void getParticlesInGroup(int index, vector<int>& particles) {
-        particles.clear();
-        if (index < force.getNumParticles()) {
-            int p, p1, p2, p3, p4;
-            double charge, polarizability, aniso12, aniso34;
-            force.getParticleParameters(index, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
-            particles.push_back(p);
-            particles.push_back(p1);
-            if (p2 != -1)
-                particles.push_back(p2);
-            if (p3 != -1)
-                particles.push_back(p3);
-            if (p4 != -1)
-                particles.push_back(p4);
-        }
-        else {
-            int drude1, drude2;
-            double thole;
-            force.getScreenedPairParameters(index-force.getNumParticles(), drude1, drude2, thole);
-            int p, p1, p2, p3, p4;
-            double charge, polarizability, aniso12, aniso34;
-            force.getParticleParameters(drude1, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
-            particles.push_back(p);
-            particles.push_back(p1);
-            force.getParticleParameters(drude2, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
-            particles.push_back(p);
-            particles.push_back(p1);
-        }
-    }
-    bool areGroupsIdentical(int group1, int group2) {
-        if (group1 < force.getNumParticles() && group2 < force.getNumParticles()) {
-            int p, p1, p2, p3, p4;
-            double charge1, polarizability1, aniso12_1, aniso34_1;
-            double charge2, polarizability2, aniso12_2, aniso34_2;
-            force.getParticleParameters(group1, p, p1, p2, p3, p4, charge1, polarizability1, aniso12_1, aniso34_1);
-            force.getParticleParameters(group2, p, p1, p2, p3, p4, charge2, polarizability2, aniso12_2, aniso34_2);
-            return (charge1 == charge2 && polarizability1 == polarizability2 && aniso12_1 == aniso12_2 && aniso34_1 == aniso34_2);
-        }
-        if (group1 >= force.getNumParticles() && group2 >= force.getNumParticles()) {
-            int drude1, drude2;
-            double thole1, thole2;
-            force.getScreenedPairParameters(group1-force.getNumParticles(), drude1, drude2, thole1);
-            force.getScreenedPairParameters(group1-force.getNumParticles(), drude1, drude2, thole2);
-            return (thole1 == thole2);
-        }
-        return false;
-    }
-private:
-    const DrudeForce& force;
-};
-
-void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) {
-    cu.setAsCurrent();
-    if (cu.getContextIndex() != 0)
-        return; // This is run entirely on one device
-    int numParticles = force.getNumParticles();
-    if (numParticles > 0) {
-        // Create the harmonic interaction .
-        
-        vector<vector<int> > atoms(numParticles, vector<int>(5));
-        particleParams.initialize<float4>(cu, numParticles, "drudeParticleParams");
-        vector<float4> paramVector(numParticles);
-        for (int i = 0; i < numParticles; i++) {
-            double charge, polarizability, aniso12, aniso34;
-            force.getParticleParameters(i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], charge, polarizability, aniso12, aniso34);
-            double a1 = (atoms[i][2] == -1 ? 1 : aniso12);
-            double a2 = (atoms[i][3] == -1 || atoms[i][4] == -1 ? 1 : aniso34);
-            double a3 = 3-a1-a2;
-            double k3 = ONE_4PI_EPS0*charge*charge/(polarizability*a3);
-            double k1 = ONE_4PI_EPS0*charge*charge/(polarizability*a1) - k3;
-            double k2 = ONE_4PI_EPS0*charge*charge/(polarizability*a2) - k3;
-            if (atoms[i][2] == -1) {
-                atoms[i][2] = 0;
-                k1 = 0;
-            }
-            if (atoms[i][3] == -1 || atoms[i][4] == -1) {
-                atoms[i][3] = 0;
-                atoms[i][4] = 0;
-                k2 = 0;
-            }
-            paramVector[i] = make_float4((float) k1, (float) k2, (float) k3, 0.0f);
-        }
-        particleParams.upload(paramVector);
-        map<string, string> replacements;
-        replacements["PARAMS"] = cu.getBondedUtilities().addArgument(particleParams.getDevicePointer(), "float4");
-        cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup());
-    }
-    int numPairs = force.getNumScreenedPairs();
-    if (numPairs > 0) {
-        // Create the screened interaction between dipole pairs.
-        
-        vector<vector<int> > atoms(numPairs, vector<int>(4));
-        pairParams.initialize<float2>(cu, numPairs, "drudePairParams");
-        vector<float2> paramVector(numPairs);
-        for (int i = 0; i < numPairs; i++) {
-            int drude1, drude2;
-            double thole;
-            force.getScreenedPairParameters(i, drude1, drude2, thole);
-            int p2, p3, p4;
-            double charge1, charge2, polarizability1, polarizability2, aniso12, aniso34;
-            force.getParticleParameters(drude1, atoms[i][0], atoms[i][1], p2, p3, p4, charge1, polarizability1, aniso12, aniso34);
-            force.getParticleParameters(drude2, atoms[i][2], atoms[i][3], p2, p3, p4, charge2, polarizability2, aniso12, aniso34);
-            double screeningScale = thole/pow(polarizability1*polarizability2, 1.0/6.0);
-            double energyScale = ONE_4PI_EPS0*charge1*charge2;
-            paramVector[i] = make_float2((float) screeningScale, (float) energyScale);
-        }
-        pairParams.upload(paramVector);
-        map<string, string> replacements;
-        replacements["PARAMS"] = cu.getBondedUtilities().addArgument(pairParams.getDevicePointer(), "float2");
-        cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudePairForce, replacements), force.getForceGroup());
-    }
-    cu.addForce(new CudaDrudeForceInfo(force));
-}
-
-double CudaCalcDrudeForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    return 0.0;
-}
-
-void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, const DrudeForce& force) {
-    if (cu.getContextIndex() != 0)
-        return; // This is run entirely on one device
-    
-    // Set the particle parameters.
-    
-    int numParticles = force.getNumParticles();
-    if (numParticles > 0) {
-        if (!particleParams.isInitialized() || numParticles != particleParams.getSize())
-            throw OpenMMException("updateParametersInContext: The number of Drude particles has changed");
-        vector<float4> paramVector(numParticles);
-        for (int i = 0; i < numParticles; i++) {
-            int p, p1, p2, p3, p4;
-            double charge, polarizability, aniso12, aniso34;
-            force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
-            double a1 = (p2 == -1 ? 1 : aniso12);
-            double a2 = (p3 == -1 || p4 == -1 ? 1 : aniso34);
-            double a3 = 3-a1-a2;
-            double k3 = ONE_4PI_EPS0*charge*charge/(polarizability*a3);
-            double k1 = ONE_4PI_EPS0*charge*charge/(polarizability*a1) - k3;
-            double k2 = ONE_4PI_EPS0*charge*charge/(polarizability*a2) - k3;
-            if (p2 == -1)
-                k1 = 0;
-            if (p3 == -1 || p4 == -1)
-                k2 = 0;
-            paramVector[i] = make_float4((float) k1, (float) k2, (float) k3, 0.0f);
-        }
-        particleParams.upload(paramVector);
-    }
-    
-    // Set the pair parameters.
-    
-    int numPairs = force.getNumScreenedPairs();
-    if (numPairs > 0) {
-        if (!pairParams.isInitialized() || numPairs != pairParams.getSize())
-            throw OpenMMException("updateParametersInContext: The number of screened pairs has changed");
-        vector<float2> paramVector(numPairs);
-        for (int i = 0; i < numPairs; i++) {
-            int drude1, drude2;
-            double thole;
-            force.getScreenedPairParameters(i, drude1, drude2, thole);
-            int p, p1, p2, p3, p4;
-            double charge1, charge2, polarizability1, polarizability2, aniso12, aniso34;
-            force.getParticleParameters(drude1, p, p1, p2, p3, p4, charge1, polarizability1, aniso12, aniso34);
-            force.getParticleParameters(drude2, p, p1, p2, p3, p4, charge2, polarizability2, aniso12, aniso34);
-            double screeningScale = thole/pow(polarizability1*polarizability2, 1.0/6.0);
-            double energyScale = ONE_4PI_EPS0*charge1*charge2;
-            paramVector[i] = make_float2((float) screeningScale, (float) energyScale);
-        }
-        pairParams.upload(paramVector);
-    }
-}
-
-void CudaIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
-    cu.getPlatformData().initializeContexts(system);
-    cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
-    
-    // Identify particle pairs and ordinary particles.
-    
-    set<int> particles;
-    vector<int> normalParticleVec;
-    vector<int2> pairParticleVec;
-    for (int i = 0; i < system.getNumParticles(); i++)
-        particles.insert(i);
-    for (int i = 0; i < force.getNumParticles(); i++) {
-        int p, p1, p2, p3, p4;
-        double charge, polarizability, aniso12, aniso34;
-        force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
-        particles.erase(p);
-        particles.erase(p1);
-        pairParticleVec.push_back(make_int2(p, p1));
-    }
-    normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end());
-    normalParticles.initialize<int>(cu, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
-    pairParticles.initialize<int2>(cu, max((int) pairParticleVec.size(), 1), "drudePairParticles");
-    if (normalParticleVec.size() > 0)
-        normalParticles.upload(normalParticleVec);
-    if (pairParticleVec.size() > 0)
-        pairParticles.upload(pairParticleVec);
-
-    // Create kernels.
-    
-    map<string, string> defines;
-    defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
-    defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
-    defines["NUM_NORMAL_PARTICLES"] = cu.intToString(normalParticleVec.size());
-    defines["NUM_PAIRS"] = cu.intToString(pairParticleVec.size());
-    map<string, string> replacements;
-    CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaDrudeKernelSources::drudeLangevin, defines, "");
-    kernel1 = cu.getKernel(module, "integrateDrudeLangevinPart1");
-    kernel2 = cu.getKernel(module, "integrateDrudeLangevinPart2");
-    hardwallKernel = cu.getKernel(module, "applyHardWallConstraints");
-    prevStepSize = -1.0;
-}
-
-void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
-    cu.setAsCurrent();
-    CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
-    int numAtoms = cu.getNumAtoms();
-    
-    // Compute integrator coefficients.
-    
-    double stepSize = integrator.getStepSize();
-    double vscale = exp(-stepSize*integrator.getFriction());
-    double fscale = (1-vscale)/integrator.getFriction()/(double) 0x100000000;
-    double noisescale = sqrt(2*BOLTZ*integrator.getTemperature()*integrator.getFriction())*sqrt(0.5*(1-vscale*vscale)/integrator.getFriction());
-    double vscaleDrude = exp(-stepSize*integrator.getDrudeFriction());
-    double fscaleDrude = (1-vscaleDrude)/integrator.getDrudeFriction()/(double) 0x100000000;
-    double noisescaleDrude = sqrt(2*BOLTZ*integrator.getDrudeTemperature()*integrator.getDrudeFriction())*sqrt(0.5*(1-vscaleDrude*vscaleDrude)/integrator.getDrudeFriction());
-    double maxDrudeDistance = integrator.getMaxDrudeDistance();
-    double hardwallscaleDrude = sqrt(BOLTZ*integrator.getDrudeTemperature());
-    if (stepSize != prevStepSize) {
-        if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
-            double2 ss = make_double2(0, stepSize);
-            integration.getStepSize().upload(&ss);
-        }
-        else {
-            float2 ss = make_float2(0, (float) stepSize);
-            integration.getStepSize().upload(&ss);
-        }
-        prevStepSize = stepSize;
-    }
-    
-    // Create appropriate pointer for the precision mode.
-    
-    float vscaleFloat = (float) vscale;
-    float fscaleFloat = (float) fscale;
-    float noisescaleFloat = (float) noisescale;
-    float vscaleDrudeFloat = (float) vscaleDrude;
-    float fscaleDrudeFloat = (float) fscaleDrude;
-    float noisescaleDrudeFloat = (float) noisescaleDrude;
-    float maxDrudeDistanceFloat =(float) maxDrudeDistance;
-    float hardwallscaleDrudeFloat = (float) hardwallscaleDrude;
-    void *vscalePtr, *fscalePtr, *noisescalePtr, *vscaleDrudePtr, *fscaleDrudePtr, *noisescaleDrudePtr, *maxDrudeDistancePtr, *hardwallscaleDrudePtr;
-    if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
-        vscalePtr = &vscale;
-        fscalePtr = &fscale;
-        noisescalePtr = &noisescale;
-        vscaleDrudePtr = &vscaleDrude;
-        fscaleDrudePtr = &fscaleDrude;
-        noisescaleDrudePtr = &noisescaleDrude;
-        maxDrudeDistancePtr = &maxDrudeDistance;
-        hardwallscaleDrudePtr = &hardwallscaleDrude;
-    }
-    else {
-        vscalePtr = &vscaleFloat;
-        fscalePtr = &fscaleFloat;
-        noisescalePtr = &noisescaleFloat;
-        vscaleDrudePtr = &vscaleDrudeFloat;
-        fscaleDrudePtr = &fscaleDrudeFloat;
-        noisescaleDrudePtr = &noisescaleDrudeFloat;
-        maxDrudeDistancePtr = &maxDrudeDistanceFloat;
-        hardwallscaleDrudePtr = &hardwallscaleDrudeFloat;
-    }
-
-    // Call the first integration kernel.
-
-    int randomIndex = integration.prepareRandomNumbers(normalParticles.getSize()+2*pairParticles.getSize());
-    void* args1[] = {&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
-            &normalParticles.getDevicePointer(), &pairParticles.getDevicePointer(), &integration.getStepSize().getDevicePointer(),
-            vscalePtr, fscalePtr, noisescalePtr, vscaleDrudePtr, fscaleDrudePtr, noisescaleDrudePtr, &integration.getRandom().getDevicePointer(), &randomIndex};
-    cu.executeKernel(kernel1, args1, numAtoms);
-
-    // Apply constraints.
-
-    integration.applyConstraints(integrator.getConstraintTolerance());
-
-    // Call the second integration kernel.
-
-    CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
-    void* args2[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &integration.getPosDelta().getDevicePointer(),
-            &cu.getVelm().getDevicePointer(), &integration.getStepSize().getDevicePointer()};
-    cu.executeKernel(kernel2, args2, numAtoms);
-    
-    // Apply hard wall constraints.
-    
-    if (maxDrudeDistance > 0) {
-        void* hardwallArgs[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &cu.getVelm().getDevicePointer(),
-                &pairParticles.getDevicePointer(), &integration.getStepSize().getDevicePointer(), maxDrudeDistancePtr, hardwallscaleDrudePtr};
-        cu.executeKernel(hardwallKernel, hardwallArgs, pairParticles.getSize());
-    }
-    integration.computeVirtualSites();
-
-    // Update the time and step count.
-
-    cu.setTime(cu.getTime()+stepSize);
-    cu.setStepCount(cu.getStepCount()+1);
-    cu.reorderAtoms();
-}
-
-double CudaIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
-    return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
-}
-
-CudaIntegrateDrudeSCFStepKernel::~CudaIntegrateDrudeSCFStepKernel() {
-    if (minimizerPos != NULL)
-        lbfgs_free(minimizerPos);
-}
-
-void CudaIntegrateDrudeSCFStepKernel::initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force) {
-    cu.getPlatformData().initializeContexts(system);
-    cu.setAsCurrent();
-
-    // Identify Drude particles.
-    
-    for (int i = 0; i < force.getNumParticles(); i++) {
-        int p, p1, p2, p3, p4;
-        double charge, polarizability, aniso12, aniso34;
-        force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
-        drudeParticles.push_back(p);
-    }
-    
-    // Initialize the energy minimizer.
-    
-    minimizerPos = lbfgs_malloc(drudeParticles.size()*3);
-    if (minimizerPos == NULL)
-        throw OpenMMException("DrudeSCFIntegrator: Failed to allocate memory");
-    lbfgs_parameter_init(&minimizerParams);
-    minimizerParams.linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;    
-
-    // Create the kernels.
-    
-    map<string, string> defines;
-    defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
-    defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
-    CUmodule module = cu.createModule(CudaKernelSources::verlet, defines, "");
-    kernel1 = cu.getKernel(module, "integrateVerletPart1");
-    kernel2 = cu.getKernel(module, "integrateVerletPart2");
-    prevStepSize = -1.0;
-}
-
-void CudaIntegrateDrudeSCFStepKernel::execute(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
-    cu.setAsCurrent();
-    CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
-    int numAtoms = cu.getNumAtoms();
-    int paddedNumAtoms = cu.getPaddedNumAtoms();
-    double dt = integrator.getStepSize();
-    if (dt != prevStepSize) {
-        if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
-            vector<double2> stepSizeVec(1);
-            stepSizeVec[0] = make_double2(dt, dt);
-            cu.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
-        }
-        else {
-            vector<float2> stepSizeVec(1);
-            stepSizeVec[0] = make_float2((float) dt, (float) dt);
-            cu.getIntegrationUtilities().getStepSize().upload(stepSizeVec);
-        }
-        prevStepSize = dt;
-    }
-
-    // Call the first integration kernel.
-
-    CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
-    void* args1[] = {&numAtoms, &paddedNumAtoms, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
-            &cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
-    cu.executeKernel(kernel1, args1, numAtoms);
-
-    // Apply constraints.
-
-    integration.applyConstraints(integrator.getConstraintTolerance());
-
-    // Call the second integration kernel.
-
-    void* args2[] = {&numAtoms, &cu.getIntegrationUtilities().getStepSize().getDevicePointer(), &cu.getPosq().getDevicePointer(), &posCorrection,
-            &cu.getVelm().getDevicePointer(), &integration.getPosDelta().getDevicePointer()};
-    cu.executeKernel(kernel2, args2, numAtoms);
-
-    // Update the positions of virtual sites and Drude particles.
-
-    integration.computeVirtualSites();
-    minimize(context, integrator.getMinimizationErrorTolerance());
-
-    // Update the time and step count.
-
-    cu.setTime(cu.getTime()+dt);
-    cu.setStepCount(cu.getStepCount()+1);
-    cu.reorderAtoms();
-}
-
-double CudaIntegrateDrudeSCFStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator) {
-    return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
-}
-
-struct MinimizerData {
-    ContextImpl& context;
-    CudaContext& cu;
-    vector<int>& drudeParticles;
-    MinimizerData(ContextImpl& context, CudaContext& cu, vector<int>& drudeParticles) : context(context), cu(cu), drudeParticles(drudeParticles) {}
-};
-
-static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) {
-    MinimizerData* data = reinterpret_cast<MinimizerData*>(instance);
-    ContextImpl& context = data->context;
-    CudaContext& cu = data->cu;
-    vector<int>& drudeParticles = data->drudeParticles;
-    int numDrudeParticles = drudeParticles.size();
-
-    // Set the particle positions.
-    
-    cu.getPosq().download(cu.getPinnedBuffer());
-    if (cu.getUseDoublePrecision()) {
-        double4* posq = (double4*) cu.getPinnedBuffer();
-        for (int i = 0; i < numDrudeParticles; ++i) {
-            double4& p = posq[drudeParticles[i]];
-            p.x = x[3*i];
-            p.y = x[3*i+1];
-            p.z = x[3*i+2];
-        }
-    }
-    else {
-        float4* posq = (float4*) cu.getPinnedBuffer();
-        for (int i = 0; i < numDrudeParticles; ++i) {
-            float4& p = posq[drudeParticles[i]];
-            p.x = x[3*i];
-            p.y = x[3*i+1];
-            p.z = x[3*i+2];
-        }
-    }
-    cu.getPosq().upload(cu.getPinnedBuffer());
-
-    // Compute the forces and energy for this configuration.
-
-    double energy = context.calcForcesAndEnergy(true, true);
-    long long* force = (long long*) cu.getPinnedBuffer();
-    cu.getForce().download(force);
-    double forceScale = -1.0/0x100000000;
-    int paddedNumAtoms = cu.getPaddedNumAtoms();
-    for (int i = 0; i < numDrudeParticles; ++i) {
-        int index = drudeParticles[i];
-        g[3*i] = forceScale*force[index];
-        g[3*i+1] = forceScale*force[index+paddedNumAtoms];
-        g[3*i+2] = forceScale*force[index+paddedNumAtoms*2];
-    }
-    return energy;
-}
-
-void CudaIntegrateDrudeSCFStepKernel::minimize(ContextImpl& context, double tolerance) {
-    // Record the initial positions.
-
-    int numDrudeParticles = drudeParticles.size();
-    cu.getPosq().download(cu.getPinnedBuffer());
-    if (cu.getUseDoublePrecision()) {
-        double4* posq = (double4*) cu.getPinnedBuffer();
-        for (int i = 0; i < numDrudeParticles; ++i) {
-            double4 p = posq[drudeParticles[i]];
-            minimizerPos[3*i] = p.x;
-            minimizerPos[3*i+1] = p.y;
-            minimizerPos[3*i+2] = p.z;
-        }
-    }
-    else {
-        float4* posq = (float4*) cu.getPinnedBuffer();
-        for (int i = 0; i < numDrudeParticles; ++i) {
-            float4 p = posq[drudeParticles[i]];
-            minimizerPos[3*i] = p.x;
-            minimizerPos[3*i+1] = p.y;
-            minimizerPos[3*i+2] = p.z;
-        }
-        minimizerParams.xtol = 1e-7;
-    }
-    
-    // Determine a normalization constant for scaling the tolerance.
-    
-    double norm = 0.0;
-    for (int i = 0; i < 3*numDrudeParticles; i++)
-        norm += minimizerPos[i]*minimizerPos[i];
-    norm /= numDrudeParticles;
-    norm = (norm < 1 ? 1 : sqrt(norm));
-    minimizerParams.epsilon = tolerance/norm;
-    
-    // Perform the minimization.
-
-    lbfgsfloatval_t fx;
-    MinimizerData data(context, cu, drudeParticles);
-    lbfgs(numDrudeParticles*3, minimizerPos, &fx, evaluate, NULL, &data, &minimizerParams);
-}
\ No newline at end of file
--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
-#ifndef CUDA_DRUDE_KERNELS_H_
-#define CUDA_DRUDE_KERNELS_H_
-
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
- * Authors: Peter Eastman                                                     *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-
-#include "openmm/DrudeKernels.h"
-#include "CudaContext.h"
-#include "CudaArray.h"
-#include "lbfgs.h"
-
-namespace OpenMM {
-
-/**
- * This kernel is invoked by DrudeForce to calculate the forces acting on the system and the energy of the system.
- */
-class CudaCalcDrudeForceKernel : public CalcDrudeForceKernel {
-public:
-    CudaCalcDrudeForceKernel(const std::string& name, const Platform& platform, CudaContext& cu) :
-            CalcDrudeForceKernel(name, platform), cu(cu) {
-    }
-    /**
-     * Initialize the kernel.
-     * 
-     * @param system     the System this kernel will be applied to
-     * @param force      the DrudeForce this kernel will be used for
-     */
-    void initialize(const System& system, const DrudeForce& force);
-    /**
-     * Execute the kernel to calculate the forces and/or energy.
-     *
-     * @param context        the context in which to execute this kernel
-     * @param includeForces  true if forces should be calculated
-     * @param includeEnergy  true if the energy should be calculated
-     * @return the potential energy due to the force
-     */
-    double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
-    /**
-     * Copy changed parameters over to a context.
-     *
-     * @param context    the context to copy parameters to
-     * @param force      the DrudeForce to copy the parameters from
-     */
-    void copyParametersToContext(ContextImpl& context, const DrudeForce& force);
-private:
-    CudaContext& cu;
-    CudaArray particleParams;
-    CudaArray pairParams;
-};
-
-/**
- * This kernel is invoked by DrudeLangevinIntegrator to take one time step
- */
-class CudaIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
-public:
-    CudaIntegrateDrudeLangevinStepKernel(const std::string& name, const Platform& platform, CudaContext& cu) :
-            IntegrateDrudeLangevinStepKernel(name, platform), cu(cu) {
-    }
-    /**
-     * Initialize the kernel.
-     *
-     * @param system     the System this kernel will be applied to
-     * @param integrator the DrudeLangevinIntegrator this kernel will be used for
-     * @param force      the DrudeForce to get particle parameters from
-     */
-    void initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force);
-    /**
-     * Execute the kernel.
-     *
-     * @param context        the context in which to execute this kernel
-     * @param integrator     the DrudeLangevinIntegrator this kernel is being used for
-     */
-    void execute(ContextImpl& context, const DrudeLangevinIntegrator& integrator);
-    /**
-     * Compute the kinetic energy.
-     * 
-     * @param context     the context in which to execute this kernel
-     * @param integrator  the DrudeLangevinIntegrator this kernel is being used for
-     */
-    double computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator);
-private:
-    CudaContext& cu;
-    double prevStepSize;
-    CudaArray normalParticles;
-    CudaArray pairParticles;
-    CUfunction kernel1, kernel2, hardwallKernel;
-};
-
-/**
- * This kernel is invoked by DrudeSCFIntegrator to take one time step
- */
-class CudaIntegrateDrudeSCFStepKernel : public IntegrateDrudeSCFStepKernel {
-public:
-    CudaIntegrateDrudeSCFStepKernel(const std::string& name, const Platform& platform, CudaContext& cu) :
-            IntegrateDrudeSCFStepKernel(name, platform), cu(cu), minimizerPos(NULL) {
-    }
-    ~CudaIntegrateDrudeSCFStepKernel();
-    /**
-     * Initialize the kernel.
-     *
-     * @param system     the System this kernel will be applied to
-     * @param integrator the DrudeSCFIntegrator this kernel will be used for
-     * @param force      the DrudeForce to get particle parameters from
-     */
-    void initialize(const System& system, const DrudeSCFIntegrator& integrator, const DrudeForce& force);
-    /**
-     * Execute the kernel.
-     *
-     * @param context        the context in which to execute this kernel
-     * @param integrator     the DrudeSCFIntegrator this kernel is being used for
-     */
-    void execute(ContextImpl& context, const DrudeSCFIntegrator& integrator);
-    /**
-     * Compute the kinetic energy.
-     * 
-     * @param context     the context in which to execute this kernel
-     * @param integrator  the DrudeSCFIntegrator this kernel is being used for
-     */
-    double computeKineticEnergy(ContextImpl& context, const DrudeSCFIntegrator& integrator);
-private:
-    void minimize(ContextImpl& context, double tolerance);
-    CudaContext& cu;
-    double prevStepSize;
-    std::vector<int> drudeParticles;
-    lbfgsfloatval_t *minimizerPos;
-    lbfgs_parameter_t minimizerParams;
-    CUfunction kernel1, kernel2;
-};
-
-} // namespace OpenMM
-
-#endif /*CUDA_DRUDE_KERNELS_H_*/
--- a/plugins/drude/platforms/cuda/tests/CMakeLists.txt
+++ b/plugins/drude/platforms/cuda/tests/CMakeLists.txt
@@ -5,6 +5,7 @@
 ENABLE_TESTING()

 INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIR})
+INCLUDE_DIRECTORIES(${OPENMM_DIR}/plugins/drude/tests)

 # Automatically create tests using files named "Test*.cpp"
 FILE(GLOB TEST_PROGS "*Test*.cpp")

--- a/plugins/drude/platforms/cuda/tests/TestCudaDrudeNoseHoover.cpp
+++ b/plugins/drude/platforms/cuda/tests/TestCudaDrudeNoseHoover.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+//#include "ReferenceTests.h"
+
+#include "openmm/internal/AssertionUtilities.h"
+#include "openmm/Context.h"
+#include "openmm/NonbondedForce.h"
+#include "openmm/Platform.h"
+#include "openmm/System.h"
+#include "openmm/VerletIntegrator.h"
+#include "openmm/DrudeForce.h"
+#include "CudaPlatform.h"
+#include "SimTKOpenMMUtilities.h"
+#include <iostream>
+#include <vector>
+
+using namespace OpenMM;
+using namespace std;
+
+extern "C" OPENMM_EXPORT void registerDrudeCudaKernelFactories();
+
+void runPlatformTests() { }
+
+#include "TestDrudeNoseHoover.h"
+
+Platform& initializePlatform(int argc, char* argv[]) {
+    registerDrudeCudaKernelFactories();
+    if (argc > 1) Platform::getPlatformByName("CUDA").setPropertyDefaultValue("Precision", std::string(argv[1]));   
+    return Platform::getPlatformByName("CUDA");
+}
+
+
--- a/plugins/drude/platforms/opencl/CMakeLists.txt
+++ b/plugins/drude/platforms/opencl/CMakeLists.txt
@@ -12,7 +12,7 @@

 # The source is organized into subdirectories, but we handle them all from
 # this CMakeLists file rather than letting CMake visit them as SUBDIRS.
-SET(OPENMM_SOURCE_SUBDIRS .)
+SET(OPENMM_SOURCE_SUBDIRS . ../common)


 # Collect up information about the version of the OpenMM library we're building
@@ -59,32 +59,25 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
    INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
 ENDFOREACH(subdir)

+SET(COMMON_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/../common/src/CommonDrudeKernelSources.cpp)
+SET(SOURCE_FILES ${SOURCE_FILES} ${COMMON_KERNELS_CPP})
+
 INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/../common/src)
 INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/opencl/include)
 INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/opencl/src)
 INCLUDE_DIRECTORIES(BEFORE ${CMAKE_BINARY_DIR}/platforms/opencl/src)
-
-# Set variables needed for encoding kernel sources into a C++ class
-
-SET(CL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
-SET(CL_SOURCE_CLASS OpenCLDrudeKernelSources)
-SET(CL_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.cpp)
-SET(CL_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.h)
-SET(SOURCE_FILES ${SOURCE_FILES} ${CL_KERNELS_CPP} ${CL_KERNELS_H})
-INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/common/include)
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_BINARY_DIR}/platforms/common/src)
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/../common/src)

 # Create the library

 INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})

-FILE(GLOB OPENCL_KERNELS ${CL_SOURCE_DIR}/kernels/*.cl)
-ADD_CUSTOM_COMMAND(OUTPUT ${CL_KERNELS_CPP} ${CL_KERNELS_H}
-    COMMAND ${CMAKE_COMMAND}
-    ARGS -D CL_SOURCE_DIR=${CL_SOURCE_DIR} -D CL_KERNELS_CPP=${CL_KERNELS_CPP} -D CL_KERNELS_H=${CL_KERNELS_H} -D CL_SOURCE_CLASS=${CL_SOURCE_CLASS} -P ${CMAKE_SOURCE_DIR}/platforms/opencl/EncodeCLFiles.cmake
-    DEPENDS ${OPENCL_KERNELS}
-)
-SET_SOURCE_FILES_PROPERTIES(${CL_KERNELS_CPP} ${CL_KERNELS_H} PROPERTIES GENERATED TRUE)
+SET_SOURCE_FILES_PROPERTIES(${COMMON_KERNELS_CPP} PROPERTIES GENERATED TRUE)
 ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
+ADD_DEPENDENCIES(${SHARED_TARGET} DrudeCommonKernels)

 TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME}  ${OPENCL_LIBRARIES} ${PTHREADS_LIB})
 TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME}OpenCL)

--- a/plugins/drude/platforms/opencl/src/OpenCLDrudeKernelFactory.cpp
+++ b/plugins/drude/platforms/opencl/src/OpenCLDrudeKernelFactory.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2013 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2019 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -27,7 +27,8 @@
 #include <exception>

 #include "OpenCLDrudeKernelFactory.h"
-#include "OpenCLDrudeKernels.h"
+#include "CommonDrudeKernels.h"
+#include "OpenCLContext.h"
 #include "openmm/internal/windowsExport.h"
 #include "openmm/internal/ContextImpl.h"
 #include "openmm/OpenMMException.h"
@@ -63,10 +64,10 @@ extern "C" OPENMM_EXPORT void registerDrudeOpenCLKernelFactories() {
 KernelImpl* OpenCLDrudeKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
    OpenCLContext& cl = *static_cast<OpenCLPlatform::PlatformData*>(context.getPlatformData())->contexts[0];
    if (name == CalcDrudeForceKernel::Name())
-        return new OpenCLCalcDrudeForceKernel(name, platform, cl);
+        return new CommonCalcDrudeForceKernel(name, platform, cl);
    if (name == IntegrateDrudeLangevinStepKernel::Name())
-        return new OpenCLIntegrateDrudeLangevinStepKernel(name, platform, cl);
+        return new CommonIntegrateDrudeLangevinStepKernel(name, platform, cl);
    if (name == IntegrateDrudeSCFStepKernel::Name())
-        return new OpenCLIntegrateDrudeSCFStepKernel(name, platform, cl);
+        return new CommonIntegrateDrudeSCFStepKernel(name, platform, cl);
    throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
 }
--- a/plugins/drude/platforms/opencl/src/kernels/drudeLangevin.cl
+++ b/plugins/drude/platforms/opencl/src/kernels/drudeLangevin.cl
-/**
- * Perform the first step of Langevin integration.
- */
-
-__kernel void integrateDrudeLangevinPart1(__global mixed4* restrict velm, __global const real4* restrict force, __global mixed4* restrict posDelta,
-        __global const int* restrict normalParticles, __global const int2* restrict pairParticles, __global const mixed2* restrict dt, mixed vscale, mixed fscale,
-        mixed noisescale, mixed vscaleDrude, mixed fscaleDrude, mixed noisescaleDrude, __global const float4* restrict random, unsigned int randomIndex) {
-    mixed stepSize = dt[0].y;
-    
-    // Update normal particles.
-
-    for (int i = get_global_id(0); i < NUM_NORMAL_PARTICLES; i += get_global_size(0)) {
-        int index = normalParticles[i];
-        mixed4 velocity = velm[index];
-        if (velocity.w != 0) {
-            mixed sqrtInvMass = sqrt(velocity.w);
-            float4 rand = random[randomIndex+index];
-            real4 f = force[index];
-            velocity.x = vscale*velocity.x + fscale*velocity.w*f.x + noisescale*sqrtInvMass*rand.x;
-            velocity.y = vscale*velocity.y + fscale*velocity.w*f.y + noisescale*sqrtInvMass*rand.y;
-            velocity.z = vscale*velocity.z + fscale*velocity.w*f.z + noisescale*sqrtInvMass*rand.z;
-            velm[index] = velocity;
-            posDelta[index] = (mixed4) (stepSize*velocity.x, stepSize*velocity.y, stepSize*velocity.z, 0);
-        }
-    }
-    
-    // Update Drude particle pairs.
-    
-    randomIndex += NUM_NORMAL_PARTICLES;
-    for (int i = get_global_id(0); i < NUM_PAIRS; i += get_global_size(0)) {
-        int2 particles = pairParticles[i];
-        mixed4 velocity1 = velm[particles.x];
-        mixed4 velocity2 = velm[particles.y];
-        mixed mass1 = 1/velocity1.w;
-        mixed mass2 = 1/velocity2.w;
-        mixed invTotalMass = 1/(mass1+mass2);
-        mixed invReducedMass = (mass1+mass2)*velocity1.w*velocity2.w;
-        mixed mass1fract = invTotalMass*mass1;
-        mixed mass2fract = invTotalMass*mass2;
-        mixed sqrtInvTotalMass = sqrt(invTotalMass);
-        mixed sqrtInvReducedMass = sqrt(invReducedMass);
-        mixed4 cmVel = velocity1*mass1fract+velocity2*mass2fract;
-        mixed4 relVel = velocity2-velocity1;
-        mixed4 force1 = convert_mixed4(force[particles.x]);
-        mixed4 force2 = convert_mixed4(force[particles.y]);
-        mixed4 cmForce = force1+force2;
-        mixed4 relForce = force2*mass1fract - force1*mass2fract;
-        float4 rand1 = random[randomIndex+2*i];
-        float4 rand2 = random[randomIndex+2*i+1];
-        cmVel.x = vscale*cmVel.x + fscale*invTotalMass*cmForce.x + noisescale*sqrtInvTotalMass*rand1.x;
-        cmVel.y = vscale*cmVel.y + fscale*invTotalMass*cmForce.y + noisescale*sqrtInvTotalMass*rand1.y;
-        cmVel.z = vscale*cmVel.z + fscale*invTotalMass*cmForce.z + noisescale*sqrtInvTotalMass*rand1.z;
-        relVel.x = vscaleDrude*relVel.x + fscaleDrude*invReducedMass*relForce.x + noisescaleDrude*sqrtInvReducedMass*rand2.x;
-        relVel.y = vscaleDrude*relVel.y + fscaleDrude*invReducedMass*relForce.y + noisescaleDrude*sqrtInvReducedMass*rand2.y;
-        relVel.z = vscaleDrude*relVel.z + fscaleDrude*invReducedMass*relForce.z + noisescaleDrude*sqrtInvReducedMass*rand2.z;
-        velocity1.xyz = cmVel.xyz-relVel.xyz*mass2fract;
-        velocity2.xyz = cmVel.xyz+relVel.xyz*mass1fract;
-        velm[particles.x] = velocity1;
-        velm[particles.y] = velocity2;
-        posDelta[particles.x] = (mixed4) (stepSize*velocity1.x, stepSize*velocity1.y, stepSize*velocity1.z, 0);
-        posDelta[particles.y] = (mixed4) (stepSize*velocity2.x, stepSize*velocity2.y, stepSize*velocity2.z, 0);
-    }
-}
-
-/**
- * Perform the second step of Langevin integration.
- */
-
-__kernel void integrateDrudeLangevinPart2(__global real4* restrict posq, __global real4* restrict posqCorrection, __global const mixed4* restrict posDelta, __global mixed4* restrict velm, __global const mixed2* restrict dt) {
-#ifdef SUPPORTS_DOUBLE_PRECISION
-    double invStepSize = 1.0/dt[0].y;
-#else
-    float invStepSize = 1.0f/dt[0].y;
-#endif
-    int index = get_global_id(0);
-    while (index < NUM_ATOMS) {
-        mixed4 vel = velm[index];
-        if (vel.w != 0.0) {
-#ifdef USE_MIXED_PRECISION
-            real4 pos1 = posq[index];
-            real4 pos2 = posqCorrection[index];
-            mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
-#else
-            real4 pos = posq[index];
-#endif
-            mixed4 delta = posDelta[index];
-            pos.xyz += delta.xyz;
-#ifdef SUPPORTS_DOUBLE_PRECISION
-            vel.xyz = convert_mixed4(invStepSize*convert_double4(delta)).xyz;
-#else
-            vel.xyz = invStepSize*delta.xyz;
-#endif
-#ifdef USE_MIXED_PRECISION
-            posq[index] = convert_real4(pos);
-            posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
-#else
-            posq[index] = pos;
-#endif
-            velm[index] = vel;
-        }
-        index += get_global_size(0);
-    }
-}
-
-/**
- * Apply hard wall constraints
- */
-__kernel void applyHardWallConstraints(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict velm,
-        __global const int2* restrict pairParticles, __global const mixed2* restrict dt, mixed maxDrudeDistance, mixed hardwallscaleDrude) {
-    mixed stepSize = dt[0].y;
-    for (int i = get_global_id(0); i < NUM_PAIRS; i += get_global_size(0)) {
-        int2 particles = pairParticles[i];
-#ifdef USE_MIXED_PRECISION
-        real4 posReal1 = posq[particles.x];
-        real4 posReal2 = posq[particles.y];
-        real4 posCorr1 = posqCorrection[particles.x];
-        real4 posCorr2 = posqCorrection[particles.y];
-        mixed4 pos1 = (mixed4) (posReal1.x+(mixed)posCorr1.x, posReal1.y+(mixed)posCorr1.y, posReal1.z+(mixed)posCorr1.z, posReal1.w);
-        mixed4 pos2 = (mixed4) (posReal2.x+(mixed)posCorr2.x, posReal2.y+(mixed)posCorr2.y, posReal2.z+(mixed)posCorr2.z, posReal2.w);
-#else
-        mixed4 pos1 = posq[particles.x];
-        mixed4 pos2 = posq[particles.y];
-#endif
-        mixed4 delta = pos1-pos2;
-        mixed r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
-        mixed rInv = 1/r;
-        if (rInv*maxDrudeDistance < 1) {
-            // The constraint has been violated, so make the inter-particle distance "bounce"
-            // off the hard wall.
-
-            mixed4 bondDir = delta*rInv;
-            mixed4 vel1 = velm[particles.x];
-            mixed4 vel2 = velm[particles.y];
-            mixed mass1 = 1/vel1.w;
-            mixed mass2 = 1/vel2.w;
-            mixed deltaR = r-maxDrudeDistance;
-            mixed deltaT = stepSize;
-            mixed dotvr1 = vel1.x*bondDir.x + vel1.y*bondDir.y + vel1.z*bondDir.z;
-            mixed4 vb1 = bondDir*dotvr1;
-            mixed4 vp1 = vel1-vb1;
-            if (vel2.w == 0) {
-                // The parent particle is massless, so move only the Drude particle.
-
-                if (dotvr1 != 0)
-                    deltaT = deltaR/fabs(dotvr1);
-                if (deltaT > stepSize)
-                    deltaT = stepSize;
-                dotvr1 = -dotvr1*hardwallscaleDrude/(fabs(dotvr1)*sqrt(mass1));
-                mixed dr = -deltaR + deltaT*dotvr1;
-                pos1.xyz += bondDir.xyz*dr;
-#ifdef USE_MIXED_PRECISION
-                posq[particles.x] = (real4) ((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
-                posqCorrection[particles.x] = (real4) (pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
-#else
-                posq[particles.x] = pos1;
-#endif
-                vel1.xyz = vp1.xyz + bondDir.xyz*dotvr1;
-                velm[particles.x] = vel1;
-            }
-            else {
-                // Move both particles.
-
-                mixed invTotalMass = 1/(mass1+mass2);
-                mixed dotvr2 = vel2.x*bondDir.x + vel2.y*bondDir.y + vel2.z*bondDir.z;
-                mixed4 vb2 = bondDir*dotvr2;
-                mixed4 vp2 = vel2-vb2;
-                mixed vbCMass = (mass1*dotvr1 + mass2*dotvr2)*invTotalMass;
-                dotvr1 -= vbCMass;
-                dotvr2 -= vbCMass;
-                if (dotvr1 != dotvr2)
-                    deltaT = deltaR/fabs(dotvr1-dotvr2);
-                if (deltaT > stepSize)
-                    deltaT = stepSize;
-                mixed vBond = hardwallscaleDrude/sqrt(mass1);
-                dotvr1 = -dotvr1*vBond*mass2*invTotalMass/fabs(dotvr1);
-                dotvr2 = -dotvr2*vBond*mass1*invTotalMass/fabs(dotvr2);
-                mixed dr1 = -deltaR*mass2*invTotalMass + deltaT*dotvr1;
-                mixed dr2 = deltaR*mass1*invTotalMass + deltaT*dotvr2;
-                dotvr1 += vbCMass;
-                dotvr2 += vbCMass;
-                pos1.xyz += bondDir.xyz*dr1;
-                pos2.xyz += bondDir.xyz*dr2;
-#ifdef USE_MIXED_PRECISION
-                posq[particles.x] = (real4) ((real) pos1.x, (real) pos1.y, (real) pos1.z, (real) pos1.w);
-                posq[particles.y] = (real4) ((real) pos2.x, (real) pos2.y, (real) pos2.z, (real) pos2.w);
-                posqCorrection[particles.x] = (real4) (pos1.x-(real) pos1.x, pos1.y-(real) pos1.y, pos1.z-(real) pos1.z, 0);
-                posqCorrection[particles.y] = (real4) (pos2.x-(real) pos2.x, pos2.y-(real) pos2.y, pos2.z-(real) pos2.z, 0);
-#else
-                posq[particles.x] = pos1;
-                posq[particles.y] = pos2;
-#endif
-                vel1.xyz = vp1.xyz + bondDir.xyz*dotvr1;
-                vel2.xyz = vp2.xyz + bondDir.xyz*dotvr2;
-                velm[particles.x] = vel1;
-                velm[particles.y] = vel2;
-            }
-        }
-    }
-}
--- a/plugins/drude/platforms/opencl/src/kernels/drudePairForce.cl
+++ b/plugins/drude/platforms/opencl/src/kernels/drudePairForce.cl
-float2 drudeParams = PARAMS[index];
-real4 force1 = 0;
-real4 force2 = 0;
-real4 force3 = 0;
-real4 force4 = 0;
-
-// First pair.
-
-real4 delta = (real4) (pos1.xyz-pos3.xyz, 0);
-real rInv = RSQRT(dot(delta, delta));
-real r = RECIP(rInv);
-real u = drudeParams.x*r;
-real screening = 1-(1+0.5f*u)*EXP(-u);
-real pairEnergy = drudeParams.y*screening*rInv;
-energy += pairEnergy;
-real4 f = delta*(drudeParams.y*rInv*rInv)*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x);
-force1 += f;
-force3 -= f;
-
-// Second pair.
-
-delta = (real4) (pos1.xyz-pos4.xyz, 0);
-rInv = RSQRT(dot(delta, delta));
-r = RECIP(rInv);
-u = drudeParams.x*r;
-screening = 1-(1+0.5f*u)*EXP(-u);
-pairEnergy = -drudeParams.y*screening*rInv;
-energy += pairEnergy;
-f = delta*(-drudeParams.y*rInv*rInv)*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x);
-force1 += f;
-force4 -= f;
-
-// Third pair.
-
-delta = (real4) (pos2.xyz-pos3.xyz, 0);
-rInv = RSQRT(dot(delta, delta));
-r = RECIP(rInv);
-u = drudeParams.x*r;
-screening = 1-(1+0.5f*u)*EXP(-u);
-pairEnergy = -drudeParams.y*screening*rInv;
-energy += pairEnergy;
-f = delta*(-drudeParams.y*rInv*rInv)*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x);
-force2 += f;
-force3 -= f;
-
-// Fourth pair.
-
-delta = (real4) (pos2.xyz-pos4.xyz, 0);
-rInv = RSQRT(dot(delta, delta));
-r = RECIP(rInv);
-u = drudeParams.x*r;
-screening = 1-(1+0.5f*u)*EXP(-u);
-pairEnergy = drudeParams.y*screening*rInv;
-energy += pairEnergy;
-f = delta*(drudeParams.y*rInv*rInv)*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x);
-force2 += f;
-force4 -= f;
--- a/plugins/drude/platforms/opencl/src/kernels/drudeParticleForce.cl
+++ b/plugins/drude/platforms/opencl/src/kernels/drudeParticleForce.cl
-real4 delta = (real4) (pos1.xyz-pos2.xyz, 0);
-real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
-float4 drudeParams = PARAMS[index];
-float k1 = drudeParams.x;
-float k2 = drudeParams.y;
-float k3 = drudeParams.z;
-
-// Compute the isotropic force.
-
-energy += 0.5f*k3*r2;
-real4 force1 = -delta*k3;
-real4 force2 = delta*k3;
-real4 force3 = 0;
-real4 force4 = 0;
-real4 force5 = 0;
-
-// Compute the first anisotropic force.
-
-if (k1 != 0) {
-    real4 dir = (real4) (pos2.xyz-pos3.xyz, 0);
-    real invDist = RSQRT(dot(dir, dir));
-    dir *= invDist;
-    real rprime = dot(dir, delta);
-    energy += 0.5f*k1*rprime*rprime;
-    real4 f1 = dir*(k1*rprime); 
-    real4 f2 = (delta-dir*rprime)*(k1*rprime*invDist);
-    force1 -= f1;
-    force2 += f1-f2;
-    force3 += f2;
-}
-
-// Compute the second anisotropic force.
-
-if (k2 != 0) {
-    real4 dir = (real4) (pos4.xyz-pos5.xyz, 0);
-    real invDist = RSQRT(dot(dir, dir));
-    dir *= invDist;
-    real rprime = dot(dir, delta);
-    energy += 0.5f*k2*rprime*rprime;
-    real4 f1 = dir*(k2*rprime);
-    real4 f2 = (delta-dir*rprime)*(k2*rprime*invDist);
-    force1 -= f1;
-    force2 += f1;
-    force4 -= f2;
-    force5 += f2;
-}
--- a/plugins/drude/platforms/opencl/tests/CMakeLists.txt
+++ b/plugins/drude/platforms/opencl/tests/CMakeLists.txt
@@ -5,6 +5,7 @@
 ENABLE_TESTING()

 INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})
+INCLUDE_DIRECTORIES(${OPENMM_DIR}/plugins/drude/tests)

 # Automatically create tests using files named "Test*.cpp"
 FILE(GLOB TEST_PROGS "*Test*.cpp")

--- a/plugins/drude/platforms/opencl/tests/TestOpenCLDrudeNoseHoover.cpp
+++ b/plugins/drude/platforms/opencl/tests/TestOpenCLDrudeNoseHoover.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+//#include "ReferenceTests.h"
+
+#include "openmm/internal/AssertionUtilities.h"
+#include "openmm/Context.h"
+#include "openmm/NonbondedForce.h"
+#include "openmm/Platform.h"
+#include "openmm/System.h"
+#include "openmm/VerletIntegrator.h"
+#include "openmm/DrudeForce.h"
+#include "OpenCLPlatform.h"
+#include "SimTKOpenMMUtilities.h"
+#include <iostream>
+#include <vector>
+
+using namespace OpenMM;
+using namespace std;
+
+extern "C" OPENMM_EXPORT void registerDrudeOpenCLKernelFactories();
+
+void runPlatformTests() { }
+
+#include "TestDrudeNoseHoover.h"
+
+Platform& initializePlatform(int argc, char* argv[]) {
+    registerDrudeOpenCLKernelFactories();
+    if (argc > 1) Platform::getPlatformByName("OpenCL").setPropertyDefaultValue("Precision", std::string(argv[1]));   
+    return Platform::getPlatformByName("OpenCL");
+}
+
--- a/plugins/drude/platforms/reference/tests/CMakeLists.txt
+++ b/plugins/drude/platforms/reference/tests/CMakeLists.txt
@@ -5,6 +5,7 @@ ENABLE_TESTING()
 INCLUDE_DIRECTORIES(${OPENMM_DIR}/platforms/reference/include)
 INCLUDE_DIRECTORIES(${OPENMM_DIR}/openmmapi/include/openmm)
 INCLUDE_DIRECTORIES(${OPENMM_DIR}/platforms/reference/src)
+INCLUDE_DIRECTORIES(${OPENMM_DIR}/plugins/drude/tests)

 SET(SHARED_OPENMM_DRUDE_TARGET OpenMMDrude)