Created CUDA implementation of DrudeForce and DrudeLangevinIntegrator

09a8475d · Peter Eastman · c7711f74 · 09a8475d · 09a8475d · 09a8475d
Commit 09a8475d authored May 15, 2013 by Peter Eastman
17 changed files
--- a/plugins/drude/platforms/cuda/CMakeLists.txt
+++ b/plugins/drude/platforms/cuda/CMakeLists.txt
+#---------------------------------------------------
+# OpenMM CUDA Drude Integrator
+#
+# Creates OpenMM library, base name=OpenMMDrudeCUDA.
+# Default libraries are shared & optimized. Variants
+# are created for debug (_d).
+#
+# Windows:
+#   OpenMMDrudeCUDA[_d].dll
+#   OpenMMDrudeCUDA[_d].lib
+# Unix:
+#   libOpenMMDrudeCUDA[_d].so
+#----------------------------------------------------
+
+IF (APPLE)
+    SET (CMAKE_OSX_DEPLOYMENT_TARGET "10.6")
+    SET (CMAKE_OSX_SYSROOT "/Developer/SDKs/MacOSX10.6.sdk")
+ENDIF (APPLE)
+
+# The source is organized into subdirectories, but we handle them all from
+# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
+SET(OPENMM_SOURCE_SUBDIRS .)
+
+
+# Collect up information about the version of the OpenMM library we're building
+# and make it available to the code so it can be built into the binaries.
+
+SET(OPENMMDRUDECUDA_LIBRARY_NAME OpenMMDrudeCUDA)
+
+SET(SHARED_TARGET ${OPENMMDRUDECUDA_LIBRARY_NAME})
+
+
+# Ensure that debug libraries have "_d" appended to their names.
+# CMake gets this right on Windows automatically with this definition.
+IF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
+    SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE)
+ENDIF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
+
+# But on Unix or Cygwin we have to add the suffix manually
+IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
+    SET(SHARED_TARGET ${SHARED_TARGET}_d)
+ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
+
+
+# These are all the places to search for header files which are
+# to be part of the API.
+SET(API_INCLUDE_DIRS) # start empty
+FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
+    # append
+    SET(API_INCLUDE_DIRS ${API_INCLUDE_DIRS}
+                         ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include
+                         ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include/internal)
+ENDFOREACH(subdir)
+
+# We'll need both *relative* path names, starting with their API_INCLUDE_DIRS,
+# and absolute pathnames.
+SET(API_REL_INCLUDE_FILES)   # start these out empty
+SET(API_ABS_INCLUDE_FILES)
+
+FOREACH(dir ${API_INCLUDE_DIRS})
+    FILE(GLOB fullpaths ${dir}/*.h)	# returns full pathnames
+    SET(API_ABS_INCLUDE_FILES ${API_ABS_INCLUDE_FILES} ${fullpaths})
+
+    FOREACH(pathname ${fullpaths})
+        GET_FILENAME_COMPONENT(filename ${pathname} NAME)
+        SET(API_REL_INCLUDE_FILES ${API_REL_INCLUDE_FILES} ${dir}/${filename})
+    ENDFOREACH(pathname)
+ENDFOREACH(dir)
+
+# collect up source files
+SET(SOURCE_FILES) # empty
+SET(SOURCE_INCLUDE_FILES)
+
+FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
+    FILE(GLOB_RECURSE src_files  ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.c)
+    FILE(GLOB incl_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.h)
+    SET(SOURCE_FILES         ${SOURCE_FILES}         ${src_files})   #append
+    SET(SOURCE_INCLUDE_FILES ${SOURCE_INCLUDE_FILES} ${incl_files})
+    INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
+ENDFOREACH(subdir)
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/cuda/include)
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/cuda/src)
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_BINARY_DIR}/platforms/cuda/src)
+
+# Set variables needed for encoding kernel sources into a C++ class
+
+SET(CUDA_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
+SET(CUDA_SOURCE_CLASS CudaDrudeKernelSources)
+SET(CUDA_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${CUDA_SOURCE_CLASS}.cpp)
+SET(CUDA_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CUDA_SOURCE_CLASS}.h)
+SET(SOURCE_FILES ${SOURCE_FILES} ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H})
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)
+
+# Create the library
+
+INCLUDE_DIRECTORIES(${CUDA_TOOLKIT_INCLUDE})
+
+FILE(GLOB CUDA_KERNELS ${CUDA_SOURCE_DIR}/kernels/*.cu)
+ADD_CUSTOM_COMMAND(OUTPUT ${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H}
+    COMMAND ${CMAKE_COMMAND}
+    ARGS -D CUDA_SOURCE_DIR=${CUDA_SOURCE_DIR} -D CUDA_KERNELS_CPP=${CUDA_KERNELS_CPP} -D CUDA_KERNELS_H=${CUDA_KERNELS_H} -D CUDA_SOURCE_CLASS=${CUDA_SOURCE_CLASS} -P ${CMAKE_SOURCE_DIR}/platforms/cuda/EncodeCUDAFiles.cmake
+    DEPENDS ${CUDA_KERNELS}
+)
+SET_SOURCE_FILES_PROPERTIES(${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H} PROPERTIES GENERATED TRUE)
+ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
+
+IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
+    SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}_d)
+ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
+    SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
+ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
+TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB}  ${CUDA_LIBRARIES} ${PTHREADS_LIB})
+TARGET_LINK_LIBRARIES(${SHARED_TARGET} debug ${OPENMM_LIBRARY_NAME}CUDA_d optimized ${OPENMM_LIBRARY_NAME}CUDA)
+TARGET_LINK_LIBRARIES(${SHARED_TARGET} debug ${SHARED_DRUDE_TARGET} optimized ${SHARED_DRUDE_TARGET})
+SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_BUILDING_SHARED_LIBRARY")
+
+INSTALL(TARGETS ${SHARED_TARGET} DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/plugins)
+# Ensure that links to the main CUDA library will be resolved.
+IF (APPLE)
+    IF (CMAKE_BUILD_TYPE MATCHES Debug)
+        SET(CUDA_LIBRARY libOpenMMCUDA_d.dylib)
+    ELSE (CMAKE_BUILD_TYPE MATCHES Debug)
+        SET(CUDA_LIBRARY libOpenMMCUDA.dylib)
+    ENDIF (CMAKE_BUILD_TYPE MATCHES Debug)
+    INSTALL(CODE "EXECUTE_PROCESS(COMMAND install_name_tool -change ${CUDA_LIBRARY} @loader_path/${CUDA_LIBRARY} ${CMAKE_INSTALL_PREFIX}/lib/plugins/lib${SHARED_TARGET}.dylib)")
+ENDIF (APPLE)
+if(OPENMM_BUILD_CUDA_TESTS)
+    SUBDIRS (tests)
+endif(OPENMM_BUILD_CUDA_TESTS)
--- a/plugins/drude/platforms/cuda/include/CudaDrudeKernelFactory.h
+++ b/plugins/drude/platforms/cuda/include/CudaDrudeKernelFactory.h
+#ifndef OPENMM_CUDADRUDEKERNELFACTORY_H_
+#define OPENMM_CUDADRUDEKERNELFACTORY_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/KernelFactory.h"
+
+namespace OpenMM {
+
+/**
+ * This KernelFactory creates kernels for the CUDA implementation of the Drude plugin.
+ */
+
+class CudaDrudeKernelFactory : public KernelFactory {
+public:
+    KernelImpl* createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const;
+};
+
+} // namespace OpenMM
+
+#endif /*OPENMM_CUDADRUDEKERNELFACTORY_H_*/
--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernelFactory.cpp
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernelFactory.cpp
+/* -------------------------------------------------------------------------- *
+ *                              OpenMMAmoeba                                  *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2011-2012 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include <exception>
+
+#include "CudaDrudeKernelFactory.h"
+#include "CudaDrudeKernels.h"
+#include "openmm/internal/windowsExport.h"
+#include "openmm/internal/ContextImpl.h"
+#include "openmm/OpenMMException.h"
+
+using namespace OpenMM;
+
+extern "C" void registerPlatforms() {
+}
+
+extern "C" void registerKernelFactories() {
+    try {
+        Platform& platform = Platform::getPlatformByName("CUDA");
+        CudaDrudeKernelFactory* factory = new CudaDrudeKernelFactory();
+        platform.registerKernelFactory(CalcDrudeForceKernel::Name(), factory);
+        platform.registerKernelFactory(IntegrateDrudeLangevinStepKernel::Name(), factory);
+    }
+    catch (std::exception ex) {
+        // Ignore
+    }
+}
+
+extern "C" OPENMM_EXPORT void registerDrudeCudaKernelFactories() {
+    try {
+        Platform::getPlatformByName("CUDA");
+    }
+    catch (...) {
+        Platform::registerPlatform(new CudaPlatform());
+    }
+    registerKernelFactories();
+}
+
+KernelImpl* CudaDrudeKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
+    CudaContext& cu = *static_cast<CudaPlatform::PlatformData*>(context.getPlatformData())->contexts[0];
+    if (name == CalcDrudeForceKernel::Name())
+        return new CudaCalcDrudeForceKernel(name, platform, cu);
+    if (name == IntegrateDrudeLangevinStepKernel::Name())
+        return new CudaIntegrateDrudeLangevinStepKernel(name, platform, cu);
+    throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
+}
--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernelSources.cpp.in
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernelSources.cpp.in
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2010 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "CudaDrudeKernelSources.h"
+
+using namespace OpenMM;
+using namespace std;
+
--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernelSources.h.in
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernelSources.h.in
+#ifndef OPENMM_CUDADRUDEKERNELSOURCES_H_
+#define OPENMM_CUDADRUDEKERNELSOURCES_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2010 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include <string>
+
+namespace OpenMM {
+
+/**
+ * This class is a central holding place for the source code of CUDA kernels.
+ * The CMake build script inserts declarations into it based on the .cu files in the
+ * kernels subfolder.
+ */
+
+class CudaDrudeKernelSources {
+public:
+@CUDA_FILE_DECLARATIONS@
+};
+
+} // namespace OpenMM
+
+#endif /*OPENMM_CUDADRUDEKERNELSOURCES_H_*/
--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "CudaDrudeKernels.h"
+#include "CudaDrudeKernelSources.h"
+#include "openmm/internal/ContextImpl.h"
+#include "CudaBondedUtilities.h"
+#include "CudaForceInfo.h"
+#include "CudaIntegrationUtilities.h"
+#include "CudaKernelSources.h"
+#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
+#include <set>
+
+using namespace OpenMM;
+using namespace std;
+
+class CudaDrudeForceInfo : public CudaForceInfo {
+public:
+    CudaDrudeForceInfo(const DrudeForce& force) : force(force) {
+    }
+    int getNumParticleGroups() {
+        return force.getNumParticles()+force.getNumScreenedPairs();
+    }
+    void getParticlesInGroup(int index, vector<int>& particles) {
+        particles.clear();
+        if (index < force.getNumParticles()) {
+            int p, p1, p2, p3, p4;
+            double charge, polarizability, aniso12, aniso34;
+            force.getParticleParameters(index, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
+            particles.push_back(p);
+            particles.push_back(p1);
+            if (p2 != -1)
+                particles.push_back(p2);
+            if (p3 != -1)
+                particles.push_back(p3);
+            if (p4 != -1)
+                particles.push_back(p4);
+        }
+        else {
+            int drude1, drude2;
+            double thole;
+            force.getScreenedPairParameters(index-force.getNumParticles(), drude1, drude2, thole);
+            int p, p1, p2, p3, p4;
+            double charge, polarizability, aniso12, aniso34;
+            force.getParticleParameters(drude1, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
+            particles.push_back(p);
+            particles.push_back(p1);
+            force.getParticleParameters(drude2, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
+            particles.push_back(p);
+            particles.push_back(p1);
+        }
+    }
+    bool areGroupsIdentical(int group1, int group2) {
+        if (group1 < force.getNumParticles() && group2 < force.getNumParticles()) {
+            int p, p1, p2, p3, p4;
+            double charge1, polarizability1, aniso12_1, aniso34_1;
+            double charge2, polarizability2, aniso12_2, aniso34_2;
+            force.getParticleParameters(group1, p, p1, p2, p3, p4, charge1, polarizability1, aniso12_1, aniso34_1);
+            force.getParticleParameters(group2, p, p1, p2, p3, p4, charge2, polarizability2, aniso12_2, aniso34_2);
+            return (charge1 == charge2 && polarizability1 == polarizability2 && aniso12_1 == aniso12_2 && aniso34_1 == aniso34_2);
+        }
+        if (group1 >= force.getNumParticles() && group2 >= force.getNumParticles()) {
+            int drude1, drude2;
+            double thole1, thole2;
+            force.getScreenedPairParameters(group1-force.getNumParticles(), drude1, drude2, thole1);
+            force.getScreenedPairParameters(group1-force.getNumParticles(), drude1, drude2, thole2);
+            return (thole1 == thole2);
+        }
+        return false;
+    }
+private:
+    const DrudeForce& force;
+};
+
+CudaCalcDrudeForceKernel::~CudaCalcDrudeForceKernel() {
+    cu.setAsCurrent();
+    if (particleParams != NULL)
+        delete particleParams;
+    if (pairParams != NULL)
+        delete pairParams;
+}
+#include <iostream>
+void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) {
+    cu.setAsCurrent();
+    int numContexts = cu.getPlatformData().contexts.size();
+    int startParticleIndex = cu.getContextIndex()*force.getNumParticles()/numContexts;
+    int endParticleIndex = (cu.getContextIndex()+1)*force.getNumParticles()/numContexts;
+    int numParticles = endParticleIndex-startParticleIndex;
+    if (numParticles > 0) {
+        // Create the harmonic interaction .
+        
+        vector<vector<int> > atoms(numParticles, vector<int>(5));
+        particleParams = CudaArray::create<float4>(cu, numParticles, "drudeParticleParams");
+        vector<float4> paramVector(numParticles);
+        for (int i = 0; i < numParticles; i++) {
+            double charge, polarizability, aniso12, aniso34;
+            force.getParticleParameters(startParticleIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], charge, polarizability, aniso12, aniso34);
+            double a1 = (atoms[i][2] == -1 ? 1 : aniso12);
+            double a2 = (atoms[i][3] == -1 || atoms[i][4] == -1 ? 1 : aniso34);
+            double a3 = 3-a1-a2;
+            double k3 = charge*charge/(polarizability*a3);
+            double k1 = charge*charge/(polarizability*a1) - k3;
+            double k2 = charge*charge/(polarizability*a2) - k3;
+            if (atoms[i][2] == -1) {
+                atoms[i][2] = 0;
+                k1 = 0;
+            }
+            if (atoms[i][3] == -1 || atoms[i][4] == -1) {
+                atoms[i][3] = 0;
+                atoms[i][4] = 0;
+                k2 = 0;
+            }
+            paramVector[i] = make_float4((float) k1, (float) k2, (float) k3, 0.0f);
+        }
+        particleParams->upload(paramVector);
+        map<string, string> replacements;
+        replacements["PARAMS"] = cu.getBondedUtilities().addArgument(particleParams->getDevicePointer(), "float4");
+        cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup());
+    }
+    int startPairIndex = cu.getContextIndex()*force.getNumScreenedPairs()/numContexts;
+    int endPairIndex = (cu.getContextIndex()+1)*force.getNumScreenedPairs()/numContexts;
+    int numPairs = endPairIndex-startPairIndex;
+    if (numPairs > 0) {
+        // Create the screened interaction between dipole pairs.
+        
+        vector<vector<int> > atoms(numPairs, vector<int>(4));
+        pairParams = CudaArray::create<float2>(cu, numPairs, "drudePairParams");
+        vector<float2> paramVector(numPairs);
+        for (int i = 0; i < numPairs; i++) {
+            int drude1, drude2;
+            double thole;
+            force.getScreenedPairParameters(startPairIndex+i, drude1, drude2, thole);
+            int p2, p3, p4;
+            double charge1, charge2, polarizability1, polarizability2, aniso12, aniso34;
+            force.getParticleParameters(drude1, atoms[i][0], atoms[i][1], p2, p3, p4, charge1, polarizability1, aniso12, aniso34);
+            force.getParticleParameters(drude2, atoms[i][2], atoms[i][3], p2, p3, p4, charge2, polarizability2, aniso12, aniso34);
+            double screeningScale = thole/pow(polarizability1*polarizability2, 1.0/6.0);
+            double energyScale = ONE_4PI_EPS0*charge1*charge2;
+            paramVector[i] = make_float2((float) screeningScale, (float) energyScale);
+        }
+        pairParams->upload(paramVector);
+        map<string, string> replacements;
+        replacements["PARAMS"] = cu.getBondedUtilities().addArgument(pairParams->getDevicePointer(), "float2");
+        cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudePairForce, replacements), force.getForceGroup());
+    }
+    cu.addForce(new CudaDrudeForceInfo(force));
+}
+
+double CudaCalcDrudeForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
+    return 0.0;
+}
+
+void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, const DrudeForce& force) {
+    
+}
+
+CudaIntegrateDrudeLangevinStepKernel::~CudaIntegrateDrudeLangevinStepKernel() {
+    if (normalParticles != NULL)
+        delete normalParticles;
+    if (pairParticles != NULL)
+        delete pairParticles;
+}
+
+void CudaIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
+    cu.getPlatformData().initializeContexts(system);
+    cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
+    
+    // Identify particle pairs and ordinary particles.
+    
+    set<int> particles;
+    vector<int> normalParticleVec;
+    vector<int2> pairParticleVec;
+    for (int i = 0; i < system.getNumParticles(); i++)
+        particles.insert(i);
+    for (int i = 0; i < force.getNumParticles(); i++) {
+        int p, p1, p2, p3, p4;
+        double charge, polarizability, aniso12, aniso34;
+        force.getParticleParameters(i, p, p1, p2, p3, p4, charge, polarizability, aniso12, aniso34);
+        particles.erase(p);
+        particles.erase(p1);
+        pairParticleVec.push_back(make_int2(p, p1));
+    }
+    normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end());
+    normalParticles = CudaArray::create<int>(cu, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
+    pairParticles = CudaArray::create<int2>(cu, max((int) pairParticleVec.size(), 1), "drudePairParticles");
+    if (normalParticleVec.size() > 0)
+        normalParticles->upload(normalParticleVec);
+    if (pairParticleVec.size() > 0)
+        pairParticles->upload(pairParticleVec);
+
+    // Create kernels.
+    
+    map<string, string> defines;
+    defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
+    defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
+    defines["NUM_NORMAL_PARTICLES"] = cu.intToString(normalParticleVec.size());
+    defines["NUM_PAIRS"] = cu.intToString(pairParticleVec.size());
+    map<string, string> replacements;
+    CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaDrudeKernelSources::drudeLangevin, defines, "");
+    kernel1 = cu.getKernel(module, "integrateDrudeLangevinPart1");
+    kernel2 = cu.getKernel(module, "integrateDrudeLangevinPart2");
+    prevStepSize = -1.0;
+}
+
+void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
+    cu.setAsCurrent();
+    CudaIntegrationUtilities& integration = cu.getIntegrationUtilities();
+    int numAtoms = cu.getNumAtoms();
+    
+    // Compute integrator coefficients.
+    
+    double stepSize = integrator.getStepSize();
+    double vscale = exp(-stepSize*integrator.getFriction());
+    double fscale = (1-vscale)/integrator.getFriction()/(double) 0x100000000;
+    double noisescale = sqrt(2*BOLTZ*integrator.getTemperature()*integrator.getFriction())*sqrt(0.5*(1-vscale*vscale)/integrator.getFriction());
+    double vscaleDrude = exp(-stepSize*integrator.getDrudeFriction());
+    double fscaleDrude = (1-vscaleDrude)/integrator.getDrudeFriction()/(double) 0x100000000;
+    double noisescaleDrude = sqrt(2*BOLTZ*integrator.getDrudeTemperature()*integrator.getDrudeFriction())*sqrt(0.5*(1-vscaleDrude*vscaleDrude)/integrator.getDrudeFriction());
+    if (stepSize != prevStepSize) {
+        if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
+            double2 ss = make_double2(0, stepSize);
+            integration.getStepSize().upload(&ss);
+        }
+        else {
+            float2 ss = make_float2(0, (float) stepSize);
+            integration.getStepSize().upload(&ss);
+        }
+        prevStepSize = stepSize;
+    }
+    
+    // Create appropriate pointer for the precision mode.
+    
+    float vscaleFloat = (float) vscale;
+    float fscaleFloat = (float) fscale;
+    float noisescaleFloat = (float) noisescale;
+    float vscaleDrudeFloat = (float) vscaleDrude;
+    float fscaleDrudeFloat = (float) fscaleDrude;
+    float noisescaleDrudeFloat = (float) noisescaleDrude;
+    void *vscalePtr, *fscalePtr, *noisescalePtr, *vscaleDrudePtr, *fscaleDrudePtr, *noisescaleDrudePtr;
+    if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
+        vscalePtr = &vscale;
+        fscalePtr = &fscale;
+        noisescalePtr = &noisescale;
+        vscaleDrudePtr = &vscaleDrude;
+        fscaleDrudePtr = &fscaleDrude;
+        noisescaleDrudePtr = &noisescaleDrude;
+    }
+    else {
+        vscalePtr = &vscaleFloat;
+        fscalePtr = &fscaleFloat;
+        noisescalePtr = &noisescaleFloat;
+        vscaleDrudePtr = &vscaleDrudeFloat;
+        fscaleDrudePtr = &fscaleDrudeFloat;
+        noisescaleDrudePtr = &noisescaleDrudeFloat;
+    }
+
+    // Call the first integration kernel.
+
+    int randomIndex = integration.prepareRandomNumbers(normalParticles->getSize()+2*pairParticles->getSize());
+    void* args1[] = {&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
+            &normalParticles->getDevicePointer(), &pairParticles->getDevicePointer(), &integration.getStepSize().getDevicePointer(),
+            vscalePtr, fscalePtr, noisescalePtr, vscaleDrudePtr, fscaleDrudePtr, noisescaleDrudePtr, &integration.getRandom().getDevicePointer(), &randomIndex};
+    cu.executeKernel(kernel1, args1, numAtoms);
+
+    // Apply constraints.
+
+    integration.applyConstraints(integrator.getConstraintTolerance());
+
+    // Call the second integration kernel.
+
+    CUdeviceptr posCorrection = (cu.getUseMixedPrecision() ? cu.getPosqCorrection().getDevicePointer() : 0);
+    void* args2[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &integration.getPosDelta().getDevicePointer(),
+            &cu.getVelm().getDevicePointer(), &integration.getStepSize().getDevicePointer()};
+    cu.executeKernel(kernel2, args2, numAtoms);
+    integration.computeVirtualSites();
+
+    // Update the time and step count.
+
+    cu.setTime(cu.getTime()+stepSize);
+    cu.setStepCount(cu.getStepCount()+1);
+}
+
+double CudaIntegrateDrudeLangevinStepKernel::computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator) {
+    return cu.getIntegrationUtilities().computeKineticEnergy(0.5*integrator.getStepSize());
+}
--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
+#ifndef CUDA_DRUDE_KERNELS_H_
+#define CUDA_DRUDE_KERNELS_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/DrudeKernels.h"
+#include "CudaContext.h"
+#include "CudaArray.h"
+
+namespace OpenMM {
+
+/**
+ * This kernel is invoked by DrudeForce to calculate the forces acting on the system and the energy of the system.
+ */
+class CudaCalcDrudeForceKernel : public CalcDrudeForceKernel {
+public:
+    CudaCalcDrudeForceKernel(std::string name, const Platform& platform, CudaContext& cu) :
+            CalcDrudeForceKernel(name, platform), cu(cu), particleParams(NULL), pairParams(NULL) {
+    }
+    ~CudaCalcDrudeForceKernel();
+    /**
+     * Initialize the kernel.
+     * 
+     * @param system     the System this kernel will be applied to
+     * @param force      the DrudeForce this kernel will be used for
+     */
+    void initialize(const System& system, const DrudeForce& force);
+    /**
+     * Execute the kernel to calculate the forces and/or energy.
+     *
+     * @param context        the context in which to execute this kernel
+     * @param includeForces  true if forces should be calculated
+     * @param includeEnergy  true if the energy should be calculated
+     * @return the potential energy due to the force
+     */
+    double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
+    /**
+     * Copy changed parameters over to a context.
+     *
+     * @param context    the context to copy parameters to
+     * @param force      the DrudeForce to copy the parameters from
+     */
+    void copyParametersToContext(ContextImpl& context, const DrudeForce& force);
+private:
+    CudaContext& cu;
+    CudaArray* particleParams;
+    CudaArray* pairParams;
+};
+
+/**
+ * This kernel is invoked by DrudeLangevinIntegrator to take one time step
+ */
+class CudaIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
+public:
+    CudaIntegrateDrudeLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
+            IntegrateDrudeLangevinStepKernel(name, platform), cu(cu), normalParticles(NULL), pairParticles(NULL) {
+    }
+    ~CudaIntegrateDrudeLangevinStepKernel();
+    /**
+     * Initialize the kernel.
+     *
+     * @param system     the System this kernel will be applied to
+     * @param integrator the DrudeLangevinIntegrator this kernel will be used for
+     * @param force      the DrudeForce to get particle parameters from
+     */
+    void initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force);
+    /**
+     * Execute the kernel.
+     *
+     * @param context        the context in which to execute this kernel
+     * @param integrator     the DrudeLangevinIntegrator this kernel is being used for
+     */
+    void execute(ContextImpl& context, const DrudeLangevinIntegrator& integrator);
+    /**
+     * Compute the kinetic energy.
+     * 
+     * @param context     the context in which to execute this kernel
+     * @param integrator  the DrudeLangevinIntegrator this kernel is being used for
+     */
+    double computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator);
+private:
+    CudaContext& cu;
+    double prevStepSize;
+    CudaArray* normalParticles;
+    CudaArray* pairParticles;
+    CUfunction kernel1, kernel2;
+};
+
+} // namespace OpenMM
+
+#endif /*CUDA_DRUDE_KERNELS_H_*/
--- a/plugins/drude/platforms/cuda/src/kernels/drudeLangevin.cu
+++ b/plugins/drude/platforms/cuda/src/kernels/drudeLangevin.cu
+/**
+ * Perform the first step of Langevin integration.
+ */
+
+extern "C" __global__ void integrateDrudeLangevinPart1(mixed4* __restrict__ velm, const long long* __restrict__ force, mixed4* __restrict__ posDelta,
+        const int* __restrict__ normalParticles, const int2* __restrict__ pairParticles, const mixed2* __restrict__ dt, mixed vscale, mixed fscale,
+        mixed noisescale, mixed vscaleDrude, mixed fscaleDrude, mixed noisescaleDrude, const float4* __restrict__ random, unsigned int randomIndex) {
+    mixed stepSize = dt[0].y;
+    
+    // Update normal particles.
+
+    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_NORMAL_PARTICLES; i += blockDim.x*gridDim.x) {
+        int index = normalParticles[i];
+        mixed4 velocity = velm[index];
+        if (velocity.w != 0) {
+            mixed sqrtInvMass = SQRT(velocity.w);
+            float4 rand = random[randomIndex+index];
+            velocity.x = vscale*velocity.x + fscale*velocity.w*force[index] + noisescale*sqrtInvMass*rand.x;
+            velocity.y = vscale*velocity.y + fscale*velocity.w*force[index+PADDED_NUM_ATOMS] + noisescale*sqrtInvMass*rand.y;
+            velocity.z = vscale*velocity.z + fscale*velocity.w*force[index+PADDED_NUM_ATOMS*2] + noisescale*sqrtInvMass*rand.z;
+            velm[index] = velocity;
+            posDelta[index] = make_mixed4(stepSize*velocity.x, stepSize*velocity.y, stepSize*velocity.z, 0);
+        }
+    }
+    
+    // Update Drude particle pairs.
+    
+    randomIndex += NUM_NORMAL_PARTICLES;
+    for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_PAIRS; i += blockDim.x*gridDim.x) {
+        int2 particles = pairParticles[i];
+        mixed4 velocity1 = velm[particles.x];
+        mixed4 velocity2 = velm[particles.y];
+        mixed mass1 = RECIP(velocity1.w);
+        mixed mass2 = RECIP(velocity2.w);
+        mixed invTotalMass = RECIP(mass1+mass2);
+        mixed invReducedMass = (mass1+mass2)*velocity1.w*velocity2.w;
+        mixed mass1fract = invTotalMass*mass1;
+        mixed mass2fract = invTotalMass*mass2;
+        mixed sqrtInvTotalMass = SQRT(invTotalMass);
+        mixed sqrtInvReducedMass = SQRT(invReducedMass);
+        mixed4 cmVel = velocity1*mass1fract+velocity2*mass2fract;
+        mixed4 relVel = velocity2-velocity1;
+        mixed3 force1 = make_mixed3(force[particles.x], force[particles.x+PADDED_NUM_ATOMS], force[particles.x+PADDED_NUM_ATOMS*2]);
+        mixed3 force2 = make_mixed3(force[particles.y], force[particles.y+PADDED_NUM_ATOMS], force[particles.y+PADDED_NUM_ATOMS*2]);
+        mixed3 cmForce = force1+force2;
+        mixed3 relForce = force2*mass1fract - force1*mass2fract;
+        float4 rand1 = random[randomIndex+2*i];
+        float4 rand2 = random[randomIndex+2*i+1];
+        cmVel.x = vscale*cmVel.x + fscale*invTotalMass*cmForce.x + noisescale*sqrtInvTotalMass*rand1.x;
+        cmVel.y = vscale*cmVel.y + fscale*invTotalMass*cmForce.y + noisescale*sqrtInvTotalMass*rand1.y;
+        cmVel.z = vscale*cmVel.z + fscale*invTotalMass*cmForce.z + noisescale*sqrtInvTotalMass*rand1.z;
+        relVel.x = vscaleDrude*relVel.x + fscaleDrude*invReducedMass*relForce.x + noisescaleDrude*sqrtInvReducedMass*rand2.x;
+        relVel.y = vscaleDrude*relVel.y + fscaleDrude*invReducedMass*relForce.y + noisescaleDrude*sqrtInvReducedMass*rand2.y;
+        relVel.z = vscaleDrude*relVel.z + fscaleDrude*invReducedMass*relForce.z + noisescaleDrude*sqrtInvReducedMass*rand2.z;
+        velocity1.x = cmVel.x-relVel.x*mass2fract;
+        velocity1.y = cmVel.y-relVel.y*mass2fract;
+        velocity1.z = cmVel.z-relVel.z*mass2fract;
+        velocity2.x = cmVel.x+relVel.x*mass1fract;
+        velocity2.y = cmVel.y+relVel.y*mass1fract;
+        velocity2.z = cmVel.z+relVel.z*mass1fract;
+        velm[particles.x] = velocity1;
+        velm[particles.y] = velocity2;
+        posDelta[particles.x] = make_mixed4(stepSize*velocity1.x, stepSize*velocity1.y, stepSize*velocity1.z, 0);
+        posDelta[particles.y] = make_mixed4(stepSize*velocity2.x, stepSize*velocity2.y, stepSize*velocity2.z, 0);
+    }
+}
+
+/**
+ * Perform the second step of Langevin integration.
+ */
+
+extern "C" __global__ void integrateDrudeLangevinPart2(real4* __restrict__ posq, real4* __restrict__ posqCorrection, const mixed4* __restrict__ posDelta, mixed4* __restrict__ velm, const mixed2* __restrict__ dt) {
+    double invStepSize = 1.0/dt[0].y;
+    int index = blockIdx.x*blockDim.x+threadIdx.x;
+    while (index < NUM_ATOMS) {
+        mixed4 vel = velm[index];
+        if (vel.w != 0) {
+#ifdef USE_MIXED_PRECISION
+ 
+            real4 pos1 = posq[index];
+            real4 pos2 = posqCorrection[index];
+            mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
+#else
+            real4 pos = posq[index];
+#endif
+            mixed4 delta = posDelta[index];
+            pos.x += delta.x;
+            pos.y += delta.y;
+            pos.z += delta.z;
+            vel.x = (mixed) (invStepSize*delta.x);
+            vel.y = (mixed) (invStepSize*delta.y);
+            vel.z = (mixed) (invStepSize*delta.z);
+#ifdef USE_MIXED_PRECISION
+            posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
+            posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
+#else
+            posq[index] = pos;
+#endif
+            velm[index] = vel;
+        }
+        index += blockDim.x*gridDim.x;
+    }
+}
--- a/plugins/drude/platforms/cuda/src/kernels/drudePairForce.cu
+++ b/plugins/drude/platforms/cuda/src/kernels/drudePairForce.cu
+float2 drudeParams = PARAMS[index];
+real3 force1 = make_real3(0);
+real3 force2 = make_real3(0);
+real3 force3 = make_real3(0);
+real3 force4 = make_real3(0);
+
+// First pair.
+
+real3 delta = make_real3(pos1.x-pos3.x, pos1.y-pos3.y, pos1.z-pos3.z);
+real rInv = RSQRT(dot(delta, delta));
+real r = RECIP(rInv);
+real u = drudeParams.x*r;
+real screening = 1-(1+0.5f*u)*EXP(-u);
+real pairEnergy = drudeParams.y*screening*rInv;
+energy += pairEnergy;
+real3 f = delta*(pairEnergy*rInv*rInv);
+force1 += f;
+force3 -= f;
+
+// Second pair.
+
+delta = make_real3(pos1.x-pos4.x, pos1.y-pos4.y, pos1.z-pos4.z);
+rInv = RSQRT(dot(delta, delta));
+r = RECIP(rInv);
+u = drudeParams.x*r;
+screening = 1-(1+0.5f*u)*EXP(-u);
+pairEnergy = -drudeParams.y*screening*rInv;
+energy += pairEnergy;
+f = delta*(pairEnergy*rInv*rInv);
+force1 += f;
+force4 -= f;
+
+// Third pair.
+
+delta = make_real3(pos2.x-pos3.x, pos2.y-pos3.y, pos2.z-pos3.z);
+rInv = RSQRT(dot(delta, delta));
+r = RECIP(rInv);
+u = drudeParams.x*r;
+screening = 1-(1+0.5f*u)*EXP(-u);
+pairEnergy = -drudeParams.y*screening*rInv;
+energy += pairEnergy;
+f = delta*(pairEnergy*rInv*rInv);
+force2 += f;
+force3 -= f;
+
+// Fourth pair.
+
+delta = make_real3(pos2.x-pos4.x, pos2.y-pos4.y, pos2.z-pos4.z);
+rInv = RSQRT(dot(delta, delta));
+r = RECIP(rInv);
+u = drudeParams.x*r;
+screening = 1-(1+0.5f*u)*EXP(-u);
+pairEnergy = drudeParams.y*screening*rInv;
+energy += pairEnergy;
+f = delta*(pairEnergy*rInv*rInv);
+force2 += f;
+force4 -= f;
--- a/plugins/drude/platforms/cuda/src/kernels/drudeParticleForce.cu
+++ b/plugins/drude/platforms/cuda/src/kernels/drudeParticleForce.cu
+real3 delta = make_real3(pos1.x-pos2.x, pos1.y-pos2.y, pos1.z-pos2.z);
+real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
+float4 drudeParams = PARAMS[index];
+float k1 = drudeParams.x;
+float k2 = drudeParams.y;
+float k3 = drudeParams.z;
+
+// Compute the isotropic force.
+
+energy += 0.5f*k3*r2;
+real3 force1 = -delta*k3;
+real3 force2 = delta*k3;
+real3 force3 = make_real3(0);
+real3 force4 = make_real3(0);
+real3 force5 = make_real3(0);
+
+// Compute the first anisotropic force.
+
+if (k1 != 0) {
+    real3 dir = make_real3(pos2.x-pos3.x, pos2.y-pos3.y, pos2.z-pos3.z);
+    real invDist = RSQRT(dot(dir, dir));
+    dir *= invDist;
+    real rprime = dot(dir, delta);
+    energy += 0.5f*k1*rprime*rprime;
+    real3 f1 = dir*(k1*rprime); 
+    real3 f2 = (delta-dir*rprime)*(k1*rprime*invDist);
+    force1 -= f1;
+    force2 += f1-f2;
+    force3 += f2;
+}
+
+// Compute the second anisotropic force.
+
+if (k2 != 0) {
+    real3 dir = make_real3(pos3.x-pos4.x, pos3.y-pos4.y, pos3.z-pos4.z);
+    real invDist = RSQRT(dot(dir, dir));
+    dir *= invDist;
+    real rprime = dot(dir, delta);
+    energy += 0.5f*k2*rprime*rprime;
+    real3 f1 = dir*(k2*rprime);
+    real3 f2 = (delta-dir*rprime)*(k2*rprime*invDist);
+    force1 -= f1;
+    force2 += f1;
+    force4 -= f2;
+    force5 += f2;
+}
--- a/plugins/drude/platforms/cuda/tests/CMakeLists.txt
+++ b/plugins/drude/platforms/cuda/tests/CMakeLists.txt
+#
+# Testing
+#
+
+ENABLE_TESTING()
+
+INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIR})
+
+# Automatically create tests using files named "Test*.cpp"
+FILE(GLOB TEST_PROGS "*Test*.cpp")
+FOREACH(TEST_PROG ${TEST_PROGS})
+    GET_FILENAME_COMPONENT(TEST_ROOT ${TEST_PROG} NAME_WE)
+
+    # Link with shared library
+    ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
+    TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_DRUDE_TARGET} ${SHARED_TARGET})
+    ADD_TEST(${TEST_ROOT}Single ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} single)
+    IF (OPENMM_BUILD_CUDA_DOUBLE_PRECISION_TESTS)
+        ADD_TEST(${TEST_ROOT}Mixed ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} mixed)
+        ADD_TEST(${TEST_ROOT}Double ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} double)
+    ENDIF(OPENMM_BUILD_CUDA_DOUBLE_PRECISION_TESTS)
+
+ENDFOREACH(TEST_PROG ${TEST_PROGS})
--- a/plugins/drude/platforms/cuda/tests/TestCudaDrudeForce.cpp
+++ b/plugins/drude/platforms/cuda/tests/TestCudaDrudeForce.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+/**
+ * This tests the CUDA implementation of DrudeForce.
+ */
+
+#include "openmm/internal/AssertionUtilities.h"
+#include "openmm/Context.h"
+#include "openmm/NonbondedForce.h"
+#include "openmm/Platform.h"
+#include "openmm/System.h"
+#include "openmm/VerletIntegrator.h"
+#include "openmm/DrudeForce.h"
+#include "SimTKUtilities/SimTKOpenMMUtilities.h"
+#include <iostream>
+#include <vector>
+
+using namespace OpenMM;
+using namespace std;
+
+extern "C" void registerDrudeCudaKernelFactories();
+
+void validateForce(System& system, vector<Vec3>& positions, double expectedEnergy) {
+    // Given a System containing a Drude force, check that its energy has the expected value.
+    
+    VerletIntegrator integ(1.0);
+    Platform& platform = Platform::getPlatformByName("CUDA");
+    Context context(system, integ, platform);
+    context.setPositions(positions);
+    State state = context.getState(State::Energy | State::Forces);
+    ASSERT_EQUAL_TOL(expectedEnergy, state.getPotentialEnergy(), 1e-5);
+    
+    // Try moving each particle along each axis, and see if the energy changes by the correct amount.
+    
+    double offset = 1e-3;
+    for (int i = 0; i < system.getNumParticles(); i++)
+        for (int j = 0; j < 3; j++) {
+            vector<Vec3> offsetPos = positions;
+            offsetPos[i][j] = positions[i][j]-offset;
+            context.setPositions(offsetPos);
+            double e1 = context.getState(State::Energy | State::Forces).getPotentialEnergy();
+            offsetPos[i][j] = positions[i][j]+offset;
+            context.setPositions(offsetPos);
+            double e2 = context.getState(State::Energy | State::Forces).getPotentialEnergy();
+            ASSERT_EQUAL_TOL(state.getForces()[i][j], (e1-e2)/(2*offset), 1e-3);
+        }
+}
+
+void testSingleParticle() {
+    const double k = 1.5;
+    const double charge = 0.1;
+    const double alpha = charge*charge/k;
+    System system;
+    system.addParticle(1.0);
+    system.addParticle(1.0);
+    DrudeForce* drude = new DrudeForce();
+    drude->addParticle(1, 0, -1, -1, -1, charge, alpha, 1, 1);
+    system.addForce(drude);
+    vector<Vec3> positions(2);
+    positions[0] = Vec3(-1, 0, 0);
+    positions[1] = Vec3(2, 0, 0);
+    validateForce(system, positions, 0.5*k*3*3);
+}
+
+void testAnisotropicParticle() {
+    const double k = 1.5;
+    const double charge = 0.1;
+    const double alpha = charge*charge/k;
+    const double a1 = 0.8;
+    const double a2 = 1.1;
+    const double k1 = k/a1;
+    const double k2 = k/a2;
+    const double k3 = k/(3-a1-a2);
+    System system;
+    system.addParticle(1.0);
+    system.addParticle(1.0);
+    system.addParticle(1.0);
+    system.addParticle(1.0);
+    system.addParticle(1.0);
+    DrudeForce* drude = new DrudeForce();
+    drude->addParticle(1, 0, 2, 3, 4, charge, alpha, a1, a2);
+    system.addForce(drude);
+    vector<Vec3> positions(5);
+    positions[0] = Vec3(0, 0, 0);
+    positions[1] = Vec3(0.1, -0.5, 0.8);
+    positions[2] = Vec3(0, 2, 0);
+    positions[3] = Vec3(1, 2, 0);
+    positions[4] = Vec3(1, 2, 3);
+    validateForce(system, positions, 0.5*k1*0.5*0.5 + 0.5*k2*0.8*0.8 + 0.5*k3*0.1*0.1);
+}
+
+double computeScreening(double r, double thole, double alpha1, double alpha2) {
+    double u = r*thole/pow(alpha1*alpha2, 1.0/6.0);
+    return 1.0-(1.0+u/2)*exp(-u);
+}
+
+void testThole() {
+    const double k = 1.5;
+    const double charge = 0.1;
+    const double alpha = charge*charge/k;
+    const double thole = 2.5;
+    System system;
+    system.addParticle(1.0);
+    system.addParticle(1.0);
+    system.addParticle(1.0);
+    system.addParticle(1.0);
+    DrudeForce* drude = new DrudeForce();
+    drude->addParticle(1, 0, -1, -1, -1, charge, alpha, 1, 1);
+    drude->addParticle(3, 2, -1, -1, -1, charge, alpha, 1, 1);
+    drude->addScreenedPair(0, 1, thole);
+    system.addForce(drude);
+    vector<Vec3> positions(4);
+    positions[0] = Vec3(0, 0, 0);
+    positions[1] = Vec3(0, -0.5, 0);
+    positions[2] = Vec3(1, 0, 0);
+    positions[3] = Vec3(1, 0, 0.3);
+    double energySpring1 = 0.5*k*0.5*0.5;
+    double energySpring2 = 0.5*k*0.3*0.3;
+    double energyDipole = 0.0;
+    double q[] = {-charge, charge, -charge, charge};
+    for (int i = 0; i < 2; i++)
+        for (int j = 2; j < 4; j++) {
+            Vec3 delta = positions[i]-positions[j];
+            double r = sqrt(delta.dot(delta));
+            energyDipole += ONE_4PI_EPS0*q[i]*q[j]*computeScreening(r, thole, alpha, alpha)/r;
+        }
+    validateForce(system, positions, energySpring1+energySpring2+energyDipole);
+}
+
+int main(int argc, char* argv[]) {
+    try {
+        registerDrudeCudaKernelFactories();
+        if (argc > 1)
+            Platform::getPlatformByName("CUDA").setPropertyDefaultValue("CudaPrecision", std::string(argv[1]));
+        testSingleParticle();
+        testAnisotropicParticle();
+        testThole();
+    }
+    catch(const std::exception& e) {
+        std::cout << "exception: " << e.what() << std::endl;
+        std::cout << "FAIL - ERROR.  Test failed." << std::endl;
+        return 1;
+    }
+    std::cout << "Done" << std::endl;
+    return 0;
+}
--- a/plugins/drude/platforms/cuda/tests/TestCudaDrudeLangevinIntegrator.cpp
+++ b/plugins/drude/platforms/cuda/tests/TestCudaDrudeLangevinIntegrator.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2013 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+/**
+ * This tests the CUDA implementation of DrudeLangevinIntegrator.
+ */
+
+#include "openmm/internal/AssertionUtilities.h"
+#include "openmm/Context.h"
+#include "openmm/NonbondedForce.h"
+#include "openmm/Platform.h"
+#include "openmm/System.h"
+#include "openmm/VirtualSite.h"
+#include "openmm/DrudeForce.h"
+#include "openmm/DrudeLangevinIntegrator.h"
+#include "SimTKUtilities/SimTKOpenMMUtilities.h"
+#include <iostream>
+#include <vector>
+
+using namespace OpenMM;
+using namespace std;
+
+extern "C" OPENMM_EXPORT void registerDrudeCudaKernelFactories();
+
+void testSinglePair() {
+    const double temperature = 300.0;
+    const double temperatureDrude = 10.0;
+    const double k = 1.5;
+    const double charge = 0.1;
+    const double alpha = charge*charge/k;
+    const double mass1 = 1.0;
+    const double mass2 = 0.1;
+    const double totalMass = mass1+mass2;
+    const double reducedMass = (mass1*mass2)/(mass1+mass2);
+    System system;
+    system.addParticle(mass1);
+    system.addParticle(mass2);
+    DrudeForce* drude = new DrudeForce();
+    drude->addParticle(1, 0, -1, -1, -1, charge, alpha, 1, 1);
+    system.addForce(drude);
+    vector<Vec3> positions(2);
+    positions[0] = Vec3(0, 0, 0);
+    positions[1] = Vec3(0, 0, 0);
+    DrudeLangevinIntegrator integ(temperature, 20.0, temperatureDrude, 20.0, 0.003);
+    Platform& platform = Platform::getPlatformByName("CUDA");
+    Context context(system, integ, platform);
+    context.setPositions(positions);
+    
+    // Equilibrate.
+    
+    integ.step(1000);
+    
+    // Compute the internal and center of mass temperatures.
+    
+    double keCM = 0, keInternal = 0;
+    int numSteps = 10000;
+    for (int i = 0; i < numSteps; i++) {
+        integ.step(10);
+        State state = context.getState(State::Velocities);
+        const vector<Vec3>& vel = state.getVelocities();
+        Vec3 velCM = vel[0]*(mass1/totalMass) + vel[1]*(mass2/totalMass);
+        keCM += 0.5*totalMass*velCM.dot(velCM);
+        Vec3 velInternal = vel[0]-vel[1];
+        keInternal += 0.5*reducedMass*velInternal.dot(velInternal);
+    }
+    ASSERT_USUALLY_EQUAL_TOL(3*0.5*BOLTZ*temperature, keCM/numSteps, 0.1);
+    ASSERT_USUALLY_EQUAL_TOL(3*0.5*BOLTZ*temperatureDrude, keInternal/numSteps, 0.01);
+}
+
+void testWater() {
+    // Create a box of SWM4-NDP water molecules.  This involves constraints, virtual sites,
+    // and Drude particles.
+    
+    const int gridSize = 4;
+    const int numMolecules = gridSize*gridSize*gridSize;
+    const double spacing = 0.6;
+    const double boxSize = spacing*(gridSize+1);
+    const double temperature = 300.0;
+    const double temperatureDrude = 10.0;
+    System system;
+    NonbondedForce* nonbonded = new NonbondedForce();
+    DrudeForce* drude = new DrudeForce();
+    system.addForce(nonbonded);
+    system.addForce(drude);
+    system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
+    nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
+    nonbonded->setCutoffDistance(1.0);
+    for (int i = 0; i < numMolecules; i++) {
+        int startIndex = system.getNumParticles();
+        system.addParticle(15.6); // O
+        system.addParticle(0.4);  // D
+        system.addParticle(1.0);  // H1
+        system.addParticle(1.0);  // H2
+        system.addParticle(0.0);  // M
+        nonbonded->addParticle(1.71636, 0.318395, 0.21094*4.184);
+        nonbonded->addParticle(-1.71636, 1, 0);
+        nonbonded->addParticle(0.55733, 1, 0);
+        nonbonded->addParticle(0.55733, 1, 0);
+        nonbonded->addParticle(-1.11466, 1, 0);
+        for (int j = 0; j < 5; j++)
+            for (int k = 0; k < j; k++)
+                nonbonded->addException(startIndex+j, startIndex+k, 0, 1, 0);
+        system.addConstraint(startIndex, startIndex+2, 0.09572);
+        system.addConstraint(startIndex, startIndex+3, 0.09572);
+        system.addConstraint(startIndex+2, startIndex+3, 0.15139);
+        system.setVirtualSite(startIndex+4, new ThreeParticleAverageSite(startIndex, startIndex+2, startIndex+3, 0.786646558, 0.106676721, 0.106676721));
+        drude->addParticle(startIndex+1, startIndex, -1, -1, -1, -1.71636, 1.71636*1.71636/(100000*4.184), 1, 1);
+    }
+    vector<Vec3> positions;
+    for (int i = 0; i < gridSize; i++)
+        for (int j = 0; j < gridSize; j++)
+            for (int k = 0; k < gridSize; k++) {
+                Vec3 pos(i*spacing, j*spacing, k*spacing);
+                positions.push_back(pos);
+                positions.push_back(pos);
+                positions.push_back(pos+Vec3(0.09572, 0, 0));
+                positions.push_back(pos+Vec3(-0.023999, 0.092663, 0));
+                positions.push_back(pos);
+            }
+    
+    // Simulate it and check the temperature.
+    
+    DrudeLangevinIntegrator integ(temperature, 50.0, temperatureDrude, 50.0, 0.0005);
+    Platform& platform = Platform::getPlatformByName("CUDA");
+    Context context(system, integ, platform);
+    context.setPositions(positions);
+    context.applyConstraints(1e-5);
+    
+    // Equilibrate.
+    
+    integ.step(1000);
+    
+    // Compute the internal and center of mass temperatures.
+    
+    double ke = 0;
+    int numSteps = 10000;
+    for (int i = 0; i < numSteps; i++) {
+        integ.step(1);
+        ke += context.getState(State::Energy).getKineticEnergy();
+    }
+    ke /= numSteps;
+    int numStandardDof = 3*3*numMolecules-system.getNumConstraints();
+    int numDrudeDof = 3*numMolecules;
+    int numDof = numStandardDof+numDrudeDof;
+    double expectedTemp = (numStandardDof*temperature+numDrudeDof*temperatureDrude)/numDof;
+    ASSERT_USUALLY_EQUAL_TOL(expectedTemp, ke/(0.5*numDof*BOLTZ), 0.02);
+}
+
+int main(int argc, char* argv[]) {
+    try {
+        registerDrudeCudaKernelFactories();
+        if (argc > 1)
+            Platform::getPlatformByName("CUDA").setPropertyDefaultValue("CudaPrecision", string(argv[1]));
+        testSinglePair();
+        testWater();
+    }
+    catch(const std::exception& e) {
+        std::cout << "exception: " << e.what() << std::endl;
+        std::cout << "FAIL - ERROR.  Test failed." << std::endl;
+        return 1;
+    }
+    std::cout << "Done" << std::endl;
+    return 0;
+}
--- a/plugins/drude/platforms/reference/src/ReferenceDrudeKernels.cpp
+++ b/plugins/drude/platforms/reference/src/ReferenceDrudeKernels.cpp
@@ -209,7 +209,6 @@ ReferenceIntegrateDrudeLangevinStepKernel::~ReferenceIntegrateDrudeLangevinStepK
 }

 void ReferenceIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
-    drudeForce = &force;
    SimTKOpenMMUtilities::setRandomNumberSeed((unsigned int) integrator.getRandomNumberSeed());
    
    // Identify particle pairs and ordinary particles.

--- a/plugins/drude/platforms/reference/src/ReferenceDrudeKernels.h
+++ b/plugins/drude/platforms/reference/src/ReferenceDrudeKernels.h
@@ -106,13 +106,12 @@ public:
    /**
     * Compute the kinetic energy.
     * 
-     * @param context    the context in which to execute this kernel
-     * @param integrator     the DrudeLangevinIntegrator this kernel is being used for
+     * @param context     the context in which to execute this kernel
+     * @param integrator  the DrudeLangevinIntegrator this kernel is being used for
     */
    double computeKineticEnergy(ContextImpl& context, const DrudeLangevinIntegrator& integrator);
 private:
    ReferencePlatform::PlatformData& data;
-    const DrudeForce* drudeForce;
    std::vector<int> normalParticles;
    std::vector<std::pair<int, int> > pairParticles;
    std::vector<double> particleInvMass;

--- a/plugins/drude/platforms/reference/tests/TestReferenceDrudeForce.cpp
+++ b/plugins/drude/platforms/reference/tests/TestReferenceDrudeForce.cpp
@@ -41,7 +41,6 @@
 #include "openmm/VerletIntegrator.h"
 #include "openmm/DrudeForce.h"
 #include "SimTKUtilities/SimTKOpenMMUtilities.h"
-#include "sfmt/SFMT.h"
 #include <iostream>
 #include <vector>


--- a/plugins/drude/platforms/reference/tests/TestReferenceDrudeLangevinIntegrator.cpp
+++ b/plugins/drude/platforms/reference/tests/TestReferenceDrudeLangevinIntegrator.cpp
@@ -42,7 +42,6 @@
 #include "openmm/DrudeForce.h"
 #include "openmm/DrudeLangevinIntegrator.h"
 #include "SimTKUtilities/SimTKOpenMMUtilities.h"
-#include "sfmt/SFMT.h"
 #include <iostream>
 #include <vector>

@@ -160,7 +159,7 @@ void testWater() {
    // Compute the internal and center of mass temperatures.
    
    double ke = 0;
-    int numSteps = 2000;
+    int numSteps = 4000;
    for (int i = 0; i < numSteps; i++) {
        integ.step(1);
        ke += context.getState(State::Energy).getKineticEnergy();