Created OpenCL implementation of RPMD

a713aabf · Peter Eastman · f355f568 · a713aabf · a713aabf · a713aabf
Commit a713aabf authored Aug 13, 2011 by Peter Eastman
13 changed files
--- a/platforms/opencl/CMakeLists.txt
+++ b/platforms/opencl/CMakeLists.txt
@@ -135,8 +135,9 @@ INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
 # Set variables needed for encoding kernel sources into a C++ class

 SET(CL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
-SET(CL_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/OpenCLKernelSources.cpp)
-SET(CL_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/OpenCLKernelSources.h)
+SET(CL_SOURCE_CLASS OpenCLKernelSources)
+SET(CL_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.cpp)
+SET(CL_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.h)
 SET(SOURCE_FILES ${SOURCE_FILES} ${CL_KERNELS_CPP} ${CL_KERNELS_H})
 INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)


--- a/platforms/opencl/EncodeCLFiles.cmake
+++ b/platforms/opencl/EncodeCLFiles.cmake
 FILE(GLOB OPENCL_KERNELS ${CL_SOURCE_DIR}/kernels/*.cl)
 SET(CL_FILE_DECLARATIONS)
 SET(CL_FILE_DEFINITIONS)
-CONFIGURE_FILE(${CL_SOURCE_DIR}/OpenCLKernelSources.cpp.in ${CL_KERNELS_CPP})
+CONFIGURE_FILE(${CL_SOURCE_DIR}/${CL_SOURCE_CLASS}.cpp.in ${CL_KERNELS_CPP})
 FOREACH(file ${OPENCL_KERNELS})
    # Load the file contents and process it.
    FILE(STRINGS ${file} file_content NEWLINE_CONSUME)
@@ -17,6 +17,6 @@ FOREACH(file ${OPENCL_KERNELS})

    # Record the variable declaration and definition.
    SET(CL_FILE_DECLARATIONS ${CL_FILE_DECLARATIONS}static\ const\ std::string\ ${variable_name};\n)
-    FILE(APPEND ${CL_KERNELS_CPP} const\ string\ OpenCLKernelSources::${variable_name}\ =\ \"${file_content}\"\;\n)
+    FILE(APPEND ${CL_KERNELS_CPP} const\ string\ ${CL_SOURCE_CLASS}::${variable_name}\ =\ \"${file_content}\"\;\n)
 ENDFOREACH(file)
-CONFIGURE_FILE(${CL_SOURCE_DIR}/OpenCLKernelSources.h.in ${CL_KERNELS_H})
+CONFIGURE_FILE(${CL_SOURCE_DIR}/${CL_SOURCE_CLASS}.h.in ${CL_KERNELS_H})
--- a/platforms/opencl/sharedTarget/CMakeLists.txt
+++ b/platforms/opencl/sharedTarget/CMakeLists.txt
@@ -7,7 +7,7 @@ INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})
 FILE(GLOB OPENCL_KERNELS ${CL_SOURCE_DIR}/kernels/*.cl)
 ADD_CUSTOM_COMMAND(OUTPUT ${CL_KERNELS_CPP} ${CL_KERNELS_H}
    COMMAND ${CMAKE_COMMAND}
-    ARGS -D CL_SOURCE_DIR=${CL_SOURCE_DIR} -D CL_KERNELS_CPP=${CL_KERNELS_CPP} -D CL_KERNELS_H=${CL_KERNELS_H} -P ${CMAKE_CURRENT_SOURCE_DIR}/../EncodeCLFiles.cmake
+    ARGS -D CL_SOURCE_DIR=${CL_SOURCE_DIR} -D CL_KERNELS_CPP=${CL_KERNELS_CPP} -D CL_KERNELS_H=${CL_KERNELS_H} -D CL_SOURCE_CLASS=${CL_SOURCE_CLASS} -P ${CMAKE_CURRENT_SOURCE_DIR}/../EncodeCLFiles.cmake
    DEPENDS ${OPENCL_KERNELS}
 )
 SET_SOURCE_FILES_PROPERTIES(${CL_KERNELS_CPP} ${CL_KERNELS_H} PROPERTIES GENERATED TRUE)

--- a/plugins/rpmd/CMakeLists.txt
+++ b/plugins/rpmd/CMakeLists.txt
@@ -204,4 +204,4 @@ ENDIF (EXECUTABLE_OUTPUT_PATH)
 #INCLUDE(ApiDoxygen.cmake)

 #ADD_SUBDIRECTORY(tests)
-#ADD_SUBDIRECTORY(examples)
+ADD_SUBDIRECTORY(platforms/opencl)
--- a/plugins/rpmd/platforms/opencl/CMakeLists.txt
+++ b/plugins/rpmd/platforms/opencl/CMakeLists.txt
+#---------------------------------------------------
+# OpenMM OpenCL RPMD Integrator
+#
+# Creates OpenMM library, base name=OpenMMRPMDOpenCL.
+# Default libraries are shared & optimized. Variants
+# are created for debug (_d).
+#
+# Windows:
+#   OpenMMRPMDOpenCL[_d].dll
+#   OpenMMRPMDOpenCL[_d].lib
+# Unix:
+#   libOpenMMRPMDOpenCL[_d].so
+#----------------------------------------------------
+
+IF (APPLE)
+    SET (CMAKE_OSX_DEPLOYMENT_TARGET "10.6")
+    SET (CMAKE_OSX_SYSROOT "/Developer/SDKs/MacOSX10.6.sdk")
+ENDIF (APPLE)
+
+# The source is organized into subdirectories, but we handle them all from
+# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
+SET(OPENMM_SOURCE_SUBDIRS .)
+
+
+# Collect up information about the version of the OpenMM library we're building
+# and make it available to the code so it can be built into the binaries.
+
+SET(OPENMMRPMDOPENCL_LIBRARY_NAME OpenMMRPMDOpenCL)
+
+SET(SHARED_TARGET ${OPENMMRPMDOPENCL_LIBRARY_NAME})
+
+
+# Ensure that debug libraries have "_d" appended to their names.
+# CMake gets this right on Windows automatically with this definition.
+IF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
+    SET(CMAKE_DEBUG_POSTFIX "_d" CACHE INTERNAL "" FORCE)
+ENDIF (${CMAKE_GENERATOR} MATCHES "Visual Studio")
+
+# But on Unix or Cygwin we have to add the suffix manually
+IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
+    SET(SHARED_TARGET ${SHARED_TARGET}_d)
+ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
+
+
+# These are all the places to search for header files which are
+# to be part of the API.
+SET(API_INCLUDE_DIRS) # start empty
+FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
+    # append
+    SET(API_INCLUDE_DIRS ${API_INCLUDE_DIRS}
+                         ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include
+                         ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include/internal)
+ENDFOREACH(subdir)
+
+# We'll need both *relative* path names, starting with their API_INCLUDE_DIRS,
+# and absolute pathnames.
+SET(API_REL_INCLUDE_FILES)   # start these out empty
+SET(API_ABS_INCLUDE_FILES)
+
+FOREACH(dir ${API_INCLUDE_DIRS})
+    FILE(GLOB fullpaths ${dir}/*.h)	# returns full pathnames
+    SET(API_ABS_INCLUDE_FILES ${API_ABS_INCLUDE_FILES} ${fullpaths})
+
+    FOREACH(pathname ${fullpaths})
+        GET_FILENAME_COMPONENT(filename ${pathname} NAME)
+        SET(API_REL_INCLUDE_FILES ${API_REL_INCLUDE_FILES} ${dir}/${filename})
+    ENDFOREACH(pathname)
+ENDFOREACH(dir)
+
+# collect up source files
+SET(SOURCE_FILES) # empty
+SET(SOURCE_INCLUDE_FILES)
+
+FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
+    FILE(GLOB_RECURSE src_files  ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.c)
+    FILE(GLOB incl_files ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/src/*.h)
+    SET(SOURCE_FILES         ${SOURCE_FILES}         ${src_files})   #append
+    SET(SOURCE_INCLUDE_FILES ${SOURCE_INCLUDE_FILES} ${incl_files})
+    INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/${subdir}/include)
+ENDFOREACH(subdir)
+
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/src)
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/opencl/include)
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_SOURCE_DIR}/platforms/opencl/src)
+
+# Set variables needed for encoding kernel sources into a C++ class
+
+SET(CL_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
+SET(CL_SOURCE_CLASS OpenCLRpmdKernelSources)
+SET(CL_KERNELS_CPP ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.cpp)
+SET(CL_KERNELS_H ${CMAKE_CURRENT_BINARY_DIR}/src/${CL_SOURCE_CLASS}.h)
+SET(SOURCE_FILES ${SOURCE_FILES} ${CL_KERNELS_CPP} ${CL_KERNELS_H})
+INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/src)
+
+# Create the library
+
+INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})
+
+FILE(GLOB OPENCL_KERNELS ${CL_SOURCE_DIR}/kernels/*.cl)
+ADD_CUSTOM_COMMAND(OUTPUT ${CL_KERNELS_CPP} ${CL_KERNELS_H}
+    COMMAND ${CMAKE_COMMAND}
+    ARGS -D CL_SOURCE_DIR=${CL_SOURCE_DIR} -D CL_KERNELS_CPP=${CL_KERNELS_CPP} -D CL_KERNELS_H=${CL_KERNELS_H} -D CL_SOURCE_CLASS=${CL_SOURCE_CLASS} -P ${CMAKE_SOURCE_DIR}/platforms/opencl/EncodeCLFiles.cmake
+    DEPENDS ${OPENCL_KERNELS}
+)
+SET_SOURCE_FILES_PROPERTIES(${CL_KERNELS_CPP} ${CL_KERNELS_H} PROPERTIES GENERATED TRUE)
+ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
+
+IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
+    SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}_d)
+ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
+    SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
+ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
+TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB}  ${OPENCL_LIBRARIES} ${PTHREADS_LIB})
+TARGET_LINK_LIBRARIES(${SHARED_TARGET} debug ${OPENMM_LIBRARY_NAME}OpenCL_d optimized ${OPENMM_LIBRARY_NAME}OpenCL)
+SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_BUILDING_SHARED_LIBRARY")
+
+INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
+
+SUBDIRS (tests)
--- a/plugins/rpmd/platforms/opencl/include/OpenCLRpmdKernelFactory.h
+++ b/plugins/rpmd/platforms/opencl/include/OpenCLRpmdKernelFactory.h
+#ifndef OPENMM_OPENCLRPMDKERNELFACTORY_H_
+#define OPENMM_OPENCLRPMDKERNELFACTORY_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2011 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/KernelFactory.h"
+
+namespace OpenMM {
+
+/**
+ * This KernelFactory creates kernels for the OpenCL implementation of RPMDIntegrator.
+ */
+
+class OpenCLRpmdKernelFactory : public KernelFactory {
+public:
+    KernelImpl* createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const;
+};
+
+} // namespace OpenMM
+
+#endif /*OPENMM_OPENCLRPMDKERNELFACTORY_H_*/
--- a/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernelFactory.cpp
+++ b/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernelFactory.cpp
+/* -------------------------------------------------------------------------- *
+ *                              OpenMMAmoeba                                  *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2011 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "OpenCLRpmdKernelFactory.h"
+#include "OpenCLRpmdKernels.h"
+#include "openmm/internal/windowsExport.h"
+#include "openmm/internal/ContextImpl.h"
+#include "openmm/OpenMMException.h"
+
+using namespace OpenMM;
+
+extern "C" void registerPlatforms() {
+}
+
+extern "C" void registerKernelFactories() {
+    Platform& platform = Platform::getPlatformByName("OpenCL");
+    OpenCLRpmdKernelFactory* factory = new OpenCLRpmdKernelFactory();
+    platform.registerKernelFactory(IntegrateRPMDStepKernel::Name(), factory);
+}
+
+KernelImpl* OpenCLRpmdKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
+    OpenCLContext& cl = *static_cast<OpenCLPlatform::PlatformData*>(context.getPlatformData())->contexts[0];
+    if (name == IntegrateRPMDStepKernel::Name())
+        return new OpenCLIntegrateRPMDStepKernel(name, platform, cl);
+    throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
+}
--- a/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernelSources.cpp.in
+++ b/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernelSources.cpp.in
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2010 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "OpenCLRpmdKernelSources.h"
+
+using namespace OpenMM;
+using namespace std;
+
--- a/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernelSources.h.in
+++ b/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernelSources.h.in
+#ifndef OPENMM_OPENCLRPMDKERNELSOURCES_H_
+#define OPENMM_OPENCLRPMDKERNELSOURCES_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2010 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include <string>
+
+namespace OpenMM {
+
+/**
+ * This class is a central holding place for the source code of OpenCL kernels.
+ * The CMake build script inserts declarations into it based on the .cl files in the
+ * kernels subfolder.
+ */
+
+class OpenCLRpmdKernelSources {
+public:
+@CL_FILE_DECLARATIONS@
+};
+
+} // namespace OpenMM
+
+#endif /*OPENMM_OPENCLRPMDKERNELSOURCES_H_*/
--- a/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
+++ b/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2011 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "OpenCLRpmdKernels.h"
+#include "OpenCLRpmdKernelSources.h"
+#include "openmm/internal/ContextImpl.h"
+#include "OpenCLIntegrationUtilities.h"
+#include "OpenCLExpressionUtilities.h"
+#include "OpenCLFFT3D.h"
+#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
+
+using namespace OpenMM;
+using namespace std;
+
+OpenCLIntegrateRPMDStepKernel::~OpenCLIntegrateRPMDStepKernel() {
+    if (forces != NULL)
+        delete forces;
+    if (positions != NULL)
+        delete positions;
+    if (velocities != NULL)
+        delete velocities;
+}
+void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
+    cl.getPlatformData().initializeContexts(system);
+    numCopies = integrator.getNumCopies();
+    numParticles = system.getNumParticles();
+    workgroupSize = numCopies;
+    while (workgroupSize < 128-numCopies)
+        workgroupSize += numCopies;
+    if (numCopies != OpenCLFFT3D::findLegalDimension(numCopies))
+        throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5.");
+    int paddedParticles = cl.getPaddedNumAtoms();
+    forces = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdForces");
+    positions = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdPositions");
+    velocities = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdVelocities");
+    cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
+    
+    // Fill in the posq and velm arrays with safe values to avoid a risk of nans.
+    
+    vector<mm_float4> temp(positions->getSize());
+    for (int i = 0; i < positions->getSize(); i++)
+        temp[i] = mm_float4(0, 0, 0, 0);
+    for (int i = 0; i < velocities->getSize(); i++)
+        temp[i] = mm_float4(0, 0, 0, 1);
+    velocities->upload(temp);
+
+    // Create kernels.
+    
+    map<string, string> defines;
+    defines["NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(cl.getNumAtoms());
+    defines["PADDED_NUM_ATOMS"] = OpenCLExpressionUtilities::intToString(cl.getPaddedNumAtoms());
+    defines["NUM_COPIES"] = OpenCLExpressionUtilities::intToString(numCopies);
+    defines["HBAR"] = OpenCLExpressionUtilities::doubleToString(1.054571628e-34*AVOGADRO/(1000*1e-12));
+    defines["SCALE"] = OpenCLExpressionUtilities::doubleToString(1.0/sqrt((double) numCopies));
+    defines["M_PI"] = OpenCLExpressionUtilities::doubleToString(M_PI);
+    map<string, string> replacements;
+    replacements["FFT_Q_FORWARD"] = createFFT(numCopies, "q", true);
+    replacements["FFT_Q_BACKWARD"] = createFFT(numCopies, "q", false);
+    replacements["FFT_V_FORWARD"] = createFFT(numCopies, "v", true);
+    replacements["FFT_V_BACKWARD"] = createFFT(numCopies, "v", false);
+    cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLRpmdKernelSources::rpmd, replacements), defines, "");
+    pileKernel = cl::Kernel(program, "applyPileThermostat");
+    stepKernel = cl::Kernel(program, "integrateStep");
+    velocitiesKernel = cl::Kernel(program, "advanceVelocities");
+}
+
+void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid) {
+    const System& system = context.getSystem();
+    const int paddedParticles = cl.getPaddedNumAtoms();
+    OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
+
+    if (!hasInitializedKernel) {
+        hasInitializedKernel = true;
+        pileKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
+        pileKernel.setArg(1, 2*workgroupSize*sizeof(mm_float4), NULL);
+        pileKernel.setArg(2, 2*workgroupSize*sizeof(mm_float4), NULL);
+        pileKernel.setArg(3, numCopies*sizeof(mm_float2), NULL);
+        pileKernel.setArg<cl::Buffer>(4, integration.getRandom().getDeviceBuffer());
+        stepKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
+        stepKernel.setArg<cl::Buffer>(1, velocities->getDeviceBuffer());
+        stepKernel.setArg<cl::Buffer>(2, forces->getDeviceBuffer());
+        stepKernel.setArg(3, 2*workgroupSize*sizeof(mm_float4), NULL);
+        stepKernel.setArg(4, 2*workgroupSize*sizeof(mm_float4), NULL);
+        stepKernel.setArg(5, 2*workgroupSize*sizeof(mm_float4), NULL);
+        stepKernel.setArg(6, numCopies*sizeof(mm_float2), NULL);
+        velocitiesKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
+        velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
+    }
+    
+    // Loop over copies and compute the force on each one.
+    
+    if (!forcesAreValid) {
+        for (int i = 0; i < numCopies; i++) {
+            cl.getQueue().enqueueCopyBuffer(positions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(),
+                    i*paddedParticles, 0, paddedParticles*sizeof(mm_float4));
+            context.calcForcesAndEnergy(true, false);
+            cl.getQueue().enqueueCopyBuffer(cl.getForce().getDeviceBuffer(), forces->getDeviceBuffer(),
+                    0, i*paddedParticles, paddedParticles*sizeof(mm_float4));
+        }
+    }
+    
+    // Apply the PILE-L thermostat.
+    
+    const double dt = integrator.getStepSize();
+    pileKernel.setArg<cl_uint>(5, integration.prepareRandomNumbers(cl.getPaddedNumAtoms()));
+    pileKernel.setArg<cl_float>(6, dt);
+    pileKernel.setArg<cl_float>(7, integrator.getTemperature()*BOLTZ);
+    pileKernel.setArg<cl_float>(8, integrator.getFriction());
+    cl.executeKernel(pileKernel, numParticles*numCopies, workgroupSize);
+
+    // Update positions and velocities.
+    
+    stepKernel.setArg<cl_float>(7, dt);
+    stepKernel.setArg<cl_float>(8, integrator.getTemperature()*BOLTZ);
+    cl.executeKernel(stepKernel, numParticles*numCopies, workgroupSize);
+
+    // Calculate forces based on the updated positions.
+    
+    for (int i = 0; i < numCopies; i++) {
+        cl.getQueue().enqueueCopyBuffer(positions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(),
+                i*paddedParticles, 0, paddedParticles*sizeof(mm_float4));
+        context.calcForcesAndEnergy(true, false);
+        cl.getQueue().enqueueCopyBuffer(cl.getForce().getDeviceBuffer(), forces->getDeviceBuffer(),
+                0, i*paddedParticles, paddedParticles*sizeof(mm_float4));
+    }
+    
+    // Update velocities.
+    velocitiesKernel.setArg<cl_float>(2, dt);
+    cl.executeKernel(velocitiesKernel, numParticles*numCopies, workgroupSize);
+
+    // Apply the PILE-L thermostat again.
+
+    cl.executeKernel(pileKernel, numParticles*numCopies, workgroupSize);
+
+    // Update the time and step count.
+
+    cl.setTime(cl.getTime()+dt);
+    cl.setStepCount(cl.getStepCount()+1);
+}
+
+void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
+    if (positions == NULL)
+        throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
+    if (pos.size() != numParticles)
+        throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
+    vector<mm_float4> posq(numParticles);
+    for (int i = 0; i < numParticles; i++)
+        posq[i] = mm_float4(pos[i][0], pos[i][1], pos[i][2], cl.getPosq()[i].w);
+    cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]);
+}
+
+void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) {
+    if (velocities == NULL)
+        throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
+    if (vel.size() != numParticles)
+        throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
+    vector<mm_float4> velm(numParticles);
+    for (int i = 0; i < numParticles; i++)
+        velm[i] = mm_float4(vel[i][0], vel[i][1], vel[i][2], cl.getVelm()[i].w);
+    cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
+}
+
+void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) const {
+    int paddedParticles = cl.getPaddedNumAtoms();
+    cl.getQueue().enqueueCopyBuffer(positions->getDeviceBuffer(), cl.getPosq().getDeviceBuffer(),
+            copy*paddedParticles*sizeof(mm_float4), 0, paddedParticles*sizeof(mm_float4));
+    cl.getQueue().enqueueCopyBuffer(velocities->getDeviceBuffer(), cl.getVelm().getDeviceBuffer(),
+            copy*paddedParticles*sizeof(mm_float4), 0, paddedParticles*sizeof(mm_float4));
+}
+
+string OpenCLIntegrateRPMDStepKernel::createFFT(int size, const string& variable, bool forward) {
+    stringstream source;
+    int unfactored = size;
+    int stage = 0;
+    int L = size;
+    int m = 1;
+    string sign = (forward ? "1.0f" : "-1.0f");
+
+    source<<"{\n";
+    source<<"__local float4* real0 = "<<variable<<"real;\n";
+    source<<"__local float4* imag0 = "<<variable<<"imag;\n";
+    source<<"__local float4* real1 = &temp[blockStart];\n";
+    source<<"__local float4* imag1 = &temp[blockStart+get_local_size(0)];\n";
+
+    // Factor size, generating an appropriate block of code for each factor.
+
+    while (unfactored > 1) {
+        int input = stage%2;
+        int output = 1-input;
+        source<<"{\n";
+        if (unfactored%5 == 0) {
+            L = L/5;
+            source<<"// Pass "<<(stage+1)<<" (radix 5)\n";
+            source<<"if (indexInBlock < "<<(L*m)<<") {\n";
+            source<<"int i = indexInBlock;\n";
+            source<<"int j = i/"<<m<<";\n";
+            source<<"float4 c0r = real"<<input<<"[i];\n";
+            source<<"float4 c0i = imag"<<input<<"[i];\n";
+            source<<"float4 c1r = real"<<input<<"[i+"<<(L*m)<<"];\n";
+            source<<"float4 c1i = imag"<<input<<"[i+"<<(L*m)<<"];\n";
+            source<<"float4 c2r = real"<<input<<"[i+"<<(2*L*m)<<"];\n";
+            source<<"float4 c2i = imag"<<input<<"[i+"<<(2*L*m)<<"];\n";
+            source<<"float4 c3r = real"<<input<<"[i+"<<(3*L*m)<<"];\n";
+            source<<"float4 c3i = imag"<<input<<"[i+"<<(3*L*m)<<"];\n";
+            source<<"float4 c4r = real"<<input<<"[i+"<<(4*L*m)<<"];\n";
+            source<<"float4 c4i = imag"<<input<<"[i+"<<(4*L*m)<<"];\n";
+            source<<"float4 d0r = c1r+c4r;\n";
+            source<<"float4 d0i = c1i+c4i;\n";
+            source<<"float4 d1r = c2r+c3r;\n";
+            source<<"float4 d1i = c2i+c3i;\n";
+            source<<"float4 d2r = "<<OpenCLExpressionUtilities::doubleToString(sin(0.4*M_PI))<<"*(c1r-c4r);\n";
+            source<<"float4 d2i = "<<OpenCLExpressionUtilities::doubleToString(sin(0.4*M_PI))<<"*(c1i-c4i);\n";
+            source<<"float4 d3r = "<<OpenCLExpressionUtilities::doubleToString(sin(0.4*M_PI))<<"*(c2r-c3r);\n";
+            source<<"float4 d3i = "<<OpenCLExpressionUtilities::doubleToString(sin(0.4*M_PI))<<"*(c2i-c3i);\n";
+            source<<"float4 d4r = d0r+d1r;\n";
+            source<<"float4 d4i = d0i+d1i;\n";
+            source<<"float4 d5r = "<<OpenCLExpressionUtilities::doubleToString(0.25*sqrt(5.0))<<"*(d0r-d1r);\n";
+            source<<"float4 d5i = "<<OpenCLExpressionUtilities::doubleToString(0.25*sqrt(5.0))<<"*(d0i-d1i);\n";
+            source<<"float4 d6r = c0r-0.25f*d4r;\n";
+            source<<"float4 d6i = c0i-0.25f*d4i;\n";
+            source<<"float4 d7r = d6r+d5r;\n";
+            source<<"float4 d7i = d6i+d5i;\n";
+            source<<"float4 d8r = d6r-d5r;\n";
+            source<<"float4 d8i = d6i-d5i;\n";
+            string coeff = OpenCLExpressionUtilities::doubleToString(sin(0.2*M_PI)/sin(0.4*M_PI));
+            source<<"float4 d9r = "<<sign<<"*(d2i+"<<coeff<<"*d3i);\n";
+            source<<"float4 d9i = "<<sign<<"*(-d2r-"<<coeff<<"*d3r);\n";
+            source<<"float4 d10r = "<<sign<<"*("<<coeff<<"*d2i-d3i);\n";
+            source<<"float4 d10i = "<<sign<<"*(d3r-"<<coeff<<"*d2r);\n";
+            source<<"real"<<output<<"[i+4*j*"<<m<<"] = c0r+d4r;\n";
+            source<<"imag"<<output<<"[i+4*j*"<<m<<"] = c0i+d4i;\n";
+            source<<"real"<<output<<"[i+(4*j+1)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<size<<"/"<<(5*L)<<"], d7r+d9r, d7i+d9i);\n";
+            source<<"imag"<<output<<"[i+(4*j+1)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<size<<"/"<<(5*L)<<"], d7r+d9r, d7i+d9i);\n";
+            source<<"real"<<output<<"[i+(4*j+2)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<(2*size)<<"/"<<(5*L)<<"], d8r+d10r, d8i+d10i);\n";
+            source<<"imag"<<output<<"[i+(4*j+2)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<(2*size)<<"/"<<(5*L)<<"], d8r+d10r, d8i+d10i);\n";
+            source<<"real"<<output<<"[i+(4*j+3)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<(3*size)<<"/"<<(5*L)<<"], d8r-d10r, d8i-d10i);\n";
+            source<<"imag"<<output<<"[i+(4*j+3)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<(3*size)<<"/"<<(5*L)<<"], d8r-d10r, d8i-d10i);\n";
+            source<<"real"<<output<<"[i+(4*j+4)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<(4*size)<<"/"<<(5*L)<<"], d7r-d9r, d7i-d9i);\n";
+            source<<"imag"<<output<<"[i+(4*j+4)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<(4*size)<<"/"<<(5*L)<<"], d7r-d9r, d7i-d9i);\n";
+            source<<"}\n";
+            m = m*5;
+            unfactored /= 5;
+        }
+        else if (unfactored%4 == 0) {
+            L = L/4;
+            source<<"// Pass "<<(stage+1)<<" (radix 4)\n";
+            source<<"if (indexInBlock < "<<(L*m)<<") {\n";
+            source<<"int i = indexInBlock;\n";
+            source<<"int j = i/"<<m<<";\n";
+            source<<"float4 c0r = real"<<input<<"[i];\n";
+            source<<"float4 c0i = imag"<<input<<"[i];\n";
+            source<<"float4 c1r = real"<<input<<"[i+"<<(L*m)<<"];\n";
+            source<<"float4 c1i = imag"<<input<<"[i+"<<(L*m)<<"];\n";
+            source<<"float4 c2r = real"<<input<<"[i+"<<(2*L*m)<<"];\n";
+            source<<"float4 c2i = imag"<<input<<"[i+"<<(2*L*m)<<"];\n";
+            source<<"float4 c3r = real"<<input<<"[i+"<<(3*L*m)<<"];\n";
+            source<<"float4 c3i = imag"<<input<<"[i+"<<(3*L*m)<<"];\n";
+            source<<"float4 d0r = c0r+c2r;\n";
+            source<<"float4 d0i = c0i+c2i;\n";
+            source<<"float4 d1r = c0r-c2r;\n";
+            source<<"float4 d1i = c0i-c2i;\n";
+            source<<"float4 d2r = c1r+c3r;\n";
+            source<<"float4 d2i = c1i+c3i;\n";
+            source<<"float4 d3r = "<<sign<<"*(c1i-c3i);\n";
+            source<<"float4 d3i = "<<sign<<"*(c3r-c1r);\n";
+            source<<"real"<<output<<"[i+3*j*"<<m<<"] = d0r+d2r;\n";
+            source<<"imag"<<output<<"[i+3*j*"<<m<<"] = d0i+d2i;\n";
+            source<<"real"<<output<<"[i+(3*j+1)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<size<<"/"<<(4*L)<<"], d1r+d3r, d1i+d3i);\n";
+            source<<"imag"<<output<<"[i+(3*j+1)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<size<<"/"<<(4*L)<<"], d1r+d3r, d1i+d3i);\n";
+            source<<"real"<<output<<"[i+(3*j+2)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<(2*size)<<"/"<<(4*L)<<"], d0r-d2r, d0i-d2i);\n";
+            source<<"imag"<<output<<"[i+(3*j+2)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<(2*size)<<"/"<<(4*L)<<"], d0r-d2r, d0i-d2i);\n";
+            source<<"real"<<output<<"[i+(3*j+3)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<(3*size)<<"/"<<(4*L)<<"], d1r-d3r, d1i-d3i);\n";
+            source<<"imag"<<output<<"[i+(3*j+3)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<(3*size)<<"/"<<(4*L)<<"], d1r-d3r, d1i-d3i);\n";
+            source<<"}\n";
+            m = m*4;
+            unfactored /= 4;
+        }
+        else if (unfactored%3 == 0) {
+            L = L/3;
+            source<<"// Pass "<<(stage+1)<<" (radix 3)\n";
+            source<<"if (indexInBlock < "<<(L*m)<<") {\n";
+            source<<"int i = indexInBlock;\n";
+            source<<"int j = i/"<<m<<";\n";
+            source<<"float4 c0r = real"<<input<<"[i];\n";
+            source<<"float4 c0i = imag"<<input<<"[i];\n";
+            source<<"float4 c1r = real"<<input<<"[i+"<<(L*m)<<"];\n";
+            source<<"float4 c1i = imag"<<input<<"[i+"<<(L*m)<<"];\n";
+            source<<"float4 c2r = real"<<input<<"[i+"<<(2*L*m)<<"];\n";
+            source<<"float4 c2i = imag"<<input<<"[i+"<<(2*L*m)<<"];\n";
+            source<<"float4 d0r = c1r+c2r;\n";
+            source<<"float4 d0i = c1i+c2i;\n";
+            source<<"float4 d1r = c0r-0.5f*d0r;\n";
+            source<<"float4 d1i = c0i-0.5f*d0i;\n";
+            source<<"float4 d2r = "<<sign<<"*"<<OpenCLExpressionUtilities::doubleToString(sin(M_PI/3.0))<<"*(c1i-c2i);\n";
+            source<<"float4 d2i = "<<sign<<"*"<<OpenCLExpressionUtilities::doubleToString(sin(M_PI/3.0))<<"*(c2r-c1r);\n";
+            source<<"real"<<output<<"[i+2*j*"<<m<<"] = c0r+d0r;\n";
+            source<<"imag"<<output<<"[i+2*j*"<<m<<"] = c0i+d0i;\n";
+            source<<"real"<<output<<"[i+(2*j+1)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<size<<"/"<<(3*L)<<"], d1r+d2r, d1i+d2i);\n";
+            source<<"imag"<<output<<"[i+(2*j+1)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<size<<"/"<<(3*L)<<"], d1r+d2r, d1i+d2i);\n";
+            source<<"real"<<output<<"[i+(2*j+2)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<(2*size)<<"/"<<(3*L)<<"], d1r-d2r, d1i-d2i);\n";
+            source<<"imag"<<output<<"[i+(2*j+2)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<(2*size)<<"/"<<(3*L)<<"], d1r-d2r, d1i-d2i);\n";
+            source<<"}\n";
+            m = m*3;
+            unfactored /= 3;
+        }
+        else if (unfactored%2 == 0) {
+            L = L/2;
+            source<<"// Pass "<<(stage+1)<<" (radix 2)\n";
+            source<<"if (indexInBlock < "<<(L*m)<<") {\n";
+            source<<"int i = indexInBlock;\n";
+            source<<"int j = i/"<<m<<";\n";
+            source<<"float4 c0r = real"<<input<<"[i];\n";
+            source<<"float4 c0i = imag"<<input<<"[i];\n";
+            source<<"float4 c1r = real"<<input<<"[i+"<<(L*m)<<"];\n";
+            source<<"float4 c1i = imag"<<input<<"[i+"<<(L*m)<<"];\n";
+            source<<"real"<<output<<"[i+j*"<<m<<"] = c0r+c1r;\n";
+            source<<"imag"<<output<<"[i+j*"<<m<<"] = c0i+c1i;\n";
+            source<<"real"<<output<<"[i+(j+1)*"<<m<<"] = multiplyComplexRealPart(w[j*"<<size<<"/"<<(2*L)<<"], c0r-c1r, c0i-c1i);\n";
+            source<<"imag"<<output<<"[i+(j+1)*"<<m<<"] = multiplyComplexImagPart(w[j*"<<size<<"/"<<(2*L)<<"], c0r-c1r, c0i-c1i);\n";
+            source<<"}\n";
+            m = m*2;
+            unfactored /= 2;
+        }
+        else
+            throw OpenMMException("Illegal size for FFT: "+OpenCLExpressionUtilities::intToString(size));
+        source<<"barrier(CLK_LOCAL_MEM_FENCE);\n";
+        source<<"}\n";
+        ++stage;
+    }
+
+    // Create the kernel.
+
+    if (stage%2 == 1) {
+        source<<variable<<"real[indexInBlock] = real1[indexInBlock];\n";
+        source<<variable<<"imag[indexInBlock] = imag1[indexInBlock];\n";
+    }
+    source<<"}\n";
+    return source.str();
+}
--- a/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
+++ b/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
+#ifndef OPENCL_RPMD_KERNELS_H_
+#define OPENCL_RPMD_KERNELS_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2011 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/RpmdKernels.h"
+#include "OpenCLContext.h"
+#include "OpenCLArray.h"
+
+namespace OpenMM {
+
+/**
+ * This kernel is invoked by RPMDIntegrator to take one time step, and to get and
+ * set the state of system copies.
+ */
+class OpenCLIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
+public:
+    OpenCLIntegrateRPMDStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
+            IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false), forces(NULL), positions(NULL), velocities(NULL) {
+    }
+    ~OpenCLIntegrateRPMDStepKernel();
+    /**
+     * Initialize the kernel.
+     *
+     * @param system     the System this kernel will be applied to
+     * @param integrator the RPMDIntegrator this kernel will be used for
+     */
+    void initialize(const System& system, const RPMDIntegrator& integrator);
+    /**
+     * Execute the kernel.
+     *
+     * @param context        the context in which to execute this kernel
+     * @param integrator     the RPMDIntegrator this kernel is being used for
+     * @param forcesAreValid if the context has been modified since the last time step, this will be
+     *                       false to show that cached forces are invalid and must be recalculated
+     */
+    void execute(ContextImpl& context, const RPMDIntegrator& integrator, bool forcesAreValid);
+    /**
+     * Get the positions of all particles in one copy of the system.
+     */
+    void setPositions(int copy, const std::vector<Vec3>& positions);
+    /**
+     * Get the velocities of all particles in one copy of the system.
+     */
+    void setVelocities(int copy, const std::vector<Vec3>& velocities);
+    /**
+     * Copy positions and velocities for one copy into the context.
+     */
+    void copyToContext(int copy, ContextImpl& context) const;
+private:
+    std::string createFFT(int size, const std::string& variable, bool forward);
+    OpenCLContext& cl;
+    bool hasInitializedKernel;
+    int numCopies, numParticles, workgroupSize;
+    OpenCLArray<mm_float4>* forces;
+    OpenCLArray<mm_float4>* positions;
+    OpenCLArray<mm_float4>* velocities;
+    cl::Kernel pileKernel, stepKernel, velocitiesKernel;
+};
+
+} // namespace OpenMM
+
+#endif /*OPENCL_RPMD_KERNELS_H_*/
--- a/plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
+++ b/plugins/rpmd/platforms/opencl/src/kernels/rpmd.cl
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+float4 multiplyComplexRealPart(float2 c1, float4 c2r, float4 c2i) {
+    return c1.x*c2r-c1.y*c2i;
+}
+
+float4 multiplyComplexImagPart(float2 c1, float4 c2r, float4 c2i) {
+    return c1.x*c2i+c1.y*c2r;
+}
+
+/**
+ * Apply the PILE-L thermostat.
+ */
+__kernel void applyPileThermostat(__global float4* velm, __local float4* v, __local float4* temp, __local float2* w, __global float4* random, unsigned int randomIndex,
+        float dt, float kT, float friction) {
+    const int numBlocks = get_global_size(0)/NUM_COPIES;
+    const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES);
+    const int indexInBlock = get_local_id(0)-blockStart;
+    const float nkT = NUM_COPIES*kT;
+    const float twown = 2.0f*nkT/HBAR;
+    const float c1_0 = EXP(-0.5f*dt*friction);
+    const float c2_0 = SQRT(1.0f-c1_0*c1_0);
+    __local float4* vreal = &v[blockStart];
+    __local float4* vimag = &v[blockStart+get_local_size(0)];
+    if (get_local_id(0) < NUM_COPIES)
+        w[get_local_id(0)] = (float2) (cos(get_local_id(0)*2*M_PI/NUM_COPIES), sin(-get_local_id(0)*2*M_PI/NUM_COPIES));
+    barrier(CLK_LOCAL_MEM_FENCE);
+    for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
+        float4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
+        float invMass = particleVelm.w;
+        float c3_0 = c2_0*SQRT(nkT*invMass);
+        
+        // Forward FFT.
+        
+        vreal[indexInBlock] = SCALE*particleVelm;
+        vimag[indexInBlock] = (float4) (0.0f, 0.0f, 0.0f, 0.0f);
+        barrier(CLK_GLOBAL_MEM_FENCE);
+        FFT_V_FORWARD
+
+        // Apply the thermostat.
+
+        if (indexInBlock == 0) {
+            // Apply a local Langevin thermostat to the centroid mode.
+
+            vreal[0].xyz = vreal[0].xyz*c1_0 + c3_0*random[randomIndex].xyz;
+        }
+        else {
+            // Use critical damping white noise for the remaining modes.
+
+            int k = (indexInBlock <= NUM_COPIES/2 ? indexInBlock : NUM_COPIES-indexInBlock);
+            const bool isCenter = (NUM_COPIES%2 == 0 && k == NUM_COPIES/2);
+            const float wk = twown*sin(k*M_PI/NUM_COPIES);
+            const float c1 = EXP(-wk*dt);
+            const float c2 = SQRT((1.0f-c1*c1)/2.0f) * (isCenter ? sqrt(2.0f) : 1.0f);
+            const float c3 = c2*SQRT(nkT*invMass);
+            float4 rand1 = c3*random[randomIndex+k];
+            float4 rand2 = (isCenter ? 0.0f : c3*random[randomIndex+NUM_COPIES-k]);
+            vreal[indexInBlock].xyz = c1*vreal[indexInBlock].xyz + rand1.xyz;
+            vimag[indexInBlock].xyz = c1*vimag[indexInBlock].xyz + (indexInBlock < NUM_COPIES/2 ? rand2.xyz : -rand2.xyz);
+        }
+        barrier(CLK_GLOBAL_MEM_FENCE);
+        
+        // Inverse FFT.
+        
+        FFT_V_BACKWARD
+        velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*vreal[indexInBlock].xyz;
+        randomIndex += numBlocks*NUM_COPIES;
+    }
+}
+
+/**
+ * Advance the positions and velocities.
+ */
+__kernel void integrateStep(__global float4* posq, __global float4* velm, __global float4* force,
+        __local float4* q, __local float4* v, __local float4* temp, __local float2* w, float dt, float kT) {
+    const int numBlocks = get_global_size(0)/NUM_COPIES;
+    const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES);
+    const int indexInBlock = get_local_id(0)-blockStart;
+    const float nkT = NUM_COPIES*kT;
+    const float twown = 2.0f*nkT/HBAR;
+
+    // Update velocities.
+    
+    for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
+        int index = particle+indexInBlock*PADDED_NUM_ATOMS;
+        float4 particleVelm = velm[index];
+        particleVelm.xyz += force[index].xyz*(0.5f*dt*particleVelm.w);
+        velm[index] = particleVelm;
+    }
+    
+    // Evolve the free ring polymer by transforming to the frequency domain.
+
+    __local float4* qreal = &q[blockStart];
+    __local float4* qimag = &q[blockStart+get_local_size(0)];
+    __local float4* vreal = &v[blockStart];
+    __local float4* vimag = &v[blockStart+get_local_size(0)];
+    if (get_local_id(0) < NUM_COPIES)
+        w[get_local_id(0)] = (float2) (cos(get_local_id(0)*2*M_PI/NUM_COPIES), sin(-get_local_id(0)*2*M_PI/NUM_COPIES));
+    barrier(CLK_LOCAL_MEM_FENCE);
+    for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
+        float4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS];
+        float4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
+        float invMass = particleVelm.w;
+        
+        // Forward FFT.
+        
+        qreal[indexInBlock] = SCALE*particlePosq;
+        qimag[indexInBlock] = (float4) (0.0f, 0.0f, 0.0f, 0.0f);
+        vreal[indexInBlock] = SCALE*particleVelm;
+        vimag[indexInBlock] = (float4) (0.0f, 0.0f, 0.0f, 0.0f);
+        barrier(CLK_GLOBAL_MEM_FENCE);
+        FFT_Q_FORWARD
+        FFT_V_FORWARD
+
+        // Apply the thermostat.
+
+        if (indexInBlock == 0) {
+            qreal[0].xyz += vreal[0].xyz*dt;
+            qimag[0].xyz += vimag[0].xyz*dt;
+        }
+        else {
+            const float wk = twown*sin(indexInBlock*M_PI/NUM_COPIES);
+            const float wt = wk*dt;
+            const float coswt = cos(wt);
+            const float sinwt = sin(wt);
+            const float wm = wk/particleVelm.w;
+            const float4 vprimereal = vreal[indexInBlock]*coswt - qreal[indexInBlock]*(wk*sinwt); // Advance velocity from t to t+dt
+            const float4 vprimeimag = vimag[indexInBlock]*coswt - qimag[indexInBlock]*(wk*sinwt);
+            qreal[indexInBlock] = vreal[indexInBlock]*(sinwt/wk) + qreal[indexInBlock]*coswt; // Advance position from t to t+dt
+            qimag[indexInBlock] = vimag[indexInBlock]*(sinwt/wk) + qimag[indexInBlock]*coswt;
+            vreal[indexInBlock] = vprimereal;
+            vimag[indexInBlock] = vprimeimag;
+        }
+        barrier(CLK_GLOBAL_MEM_FENCE);
+        
+        // Inverse FFT.
+        
+        FFT_Q_BACKWARD
+        FFT_V_BACKWARD
+        posq[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*qreal[indexInBlock].xyz;
+        velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*vreal[indexInBlock].xyz;
+    }
+}
+
+/**
+ * Advance the velocities by a half step.
+ */
+__kernel void advanceVelocities(__global float4* velm, __global float4* force, float dt) {
+    const int numBlocks = get_global_size(0)/NUM_COPIES;
+    const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES);
+    const int indexInBlock = get_local_id(0)-blockStart;
+
+    // Update velocities.
+    
+    for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
+        int index = particle+indexInBlock*PADDED_NUM_ATOMS;
+        float4 particleVelm = velm[index];
+        particleVelm.xyz += force[index].xyz*(0.5f*dt*particleVelm.w);
+        velm[index] = particleVelm;
+    }
+}
--- a/plugins/rpmd/platforms/opencl/tests/CMakeLists.txt
+++ b/plugins/rpmd/platforms/opencl/tests/CMakeLists.txt
+#
+# Testing
+#
+
+ENABLE_TESTING()
+
+INCLUDE_DIRECTORIES(${OPENCL_INCLUDE_DIR})
+
+# Automatically create tests using files named "Test*.cpp"
+FILE(GLOB TEST_PROGS "*Test*.cpp")
+FOREACH(TEST_PROG ${TEST_PROGS})
+    GET_FILENAME_COMPONENT(TEST_ROOT ${TEST_PROG} NAME_WE)
+
+    # Link with shared library
+    ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
+    TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_TARGET})
+    ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT})
+
+ENDFOREACH(TEST_PROG ${TEST_PROGS})