Merge https://github.com/openmm/openmm

5a06df78 · tic20 · 8dd60914 · a9223eea · 5a06df78 · 5a06df78
Commit 5a06df78 authored Mar 04, 2020 by tic20
20 changed files
--- a/platforms/common/include/openmm/common/ComputeEvent.h
+++ b/platforms/common/include/openmm/common/ComputeEvent.h
+#ifndef OPENMM_COMPUTEEVENT_H_
+#define OPENMM_COMPUTEEVENT_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2019 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include <memory>
+
+namespace OpenMM {
+
+/**
+ * This abstract class represents an event for synchronization between the host and
+ * device.  It is created by calling createEvent() on a ComputeContext, which returns
+ * an instance of a platform-specific subclass.  To use it, call enqueue() immediately
+ * after starting an asynchronous operation, such as a kernel invocation or non-blocking
+ * data transfer.  Then at a later point call wait().  This will cause the host to block
+ * until all operations started before the call to enequeue() have completed.
+ * 
+ * Instead of referring to this class directly, it is best to use a ComputeEvent, which is
+ * a typedef for a shared_ptr to a ComputeEventImpl.  This allows you to treat it as having
+ * value semantics, and frees you from having to manage memory.  
+ */
+
+class OPENMM_EXPORT_COMMON ComputeEventImpl {
+public:
+    virtual ~ComputeEventImpl() {
+    }
+    /**
+     * Place the event into the device's execution queue.
+     */
+    virtual void enqueue() = 0;
+    /**
+     * Block until all operations started before the call to enqueue() have completed.
+     */
+    virtual void wait() = 0;
+};
+
+typedef std::shared_ptr<ComputeEventImpl> ComputeEvent;
+
+} // namespace OpenMM
+
+#endif /*OPENMM_COMPUTEEVENT_H_*/
--- a/platforms/common/include/openmm/common/ComputeForceInfo.h
+++ b/platforms/common/include/openmm/common/ComputeForceInfo.h
+#ifndef OPENMM_COMPUTEFORCEINFO_H_
+#define OPENMM_COMPUTEFORCEINFO_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2019 Stanford University and the Authors.       *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/common/windowsExportCommon.h"
+#include <vector>
+
+namespace OpenMM {
+
+/**
+ * ComputeForceInfo objects describe information about the behavior and requirements of
+ * a force.  They exist primarily to help a ComputeContext determine how particles can be
+ * reordered without affecting forces.  Force kernels create them during initialization
+ * and add them to the ComputeContext by calling addForce().
+ */
+
+class OPENMM_EXPORT_COMMON ComputeForceInfo {
+public:
+    ComputeForceInfo() {
+    }
+    /**
+     * Get whether or not two particles have identical force field parameters.
+     */
+    virtual bool areParticlesIdentical(int particle1, int particle2);
+    /**
+     * Get the number of particle groups defined by this force.
+     */
+    virtual int getNumParticleGroups();
+    /**
+     * Get the list of particles in a particular group.
+     */
+    virtual void getParticlesInGroup(int index, std::vector<int>& particles);
+    /**
+     * Get whether two particle groups are identical.
+     */
+    virtual bool areGroupsIdentical(int group1, int group2);
+};
+
+} // namespace OpenMM
+
+#endif /*OPENMM_COMPUTEFORCEINFO_H_*/
--- a/platforms/common/include/openmm/common/ComputeKernel.h
+++ b/platforms/common/include/openmm/common/ComputeKernel.h
+#ifndef OPENMM_COMPUTEKERNEL_H_
+#define OPENMM_COMPUTEKERNEL_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2019 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/common/ArrayInterface.h"
+#include <memory>
+#include <string>
+#include <type_traits>
+
+namespace OpenMM {
+
+/**
+ * This abstract class represents a kernel that can be executed on a computing device.
+ * Call createKernel() on a ComputeProgramImpl to create an instance of a platform-specific
+ * subclass.  Then call addArg() to specify the values to pass for all of the kernel's arguments.
+ * Finally, call execute() to execute the kernel.  If you need to modify the values of kernel
+ * arguments between invocations, use setArg() to change the value of an argument.
+ * 
+ * Instead of referring to this class directly, it is best to use ComputeKernel, which is
+ * a typedef for a shared_ptr to a ComputeKernelImpl.  This allows you to treat it as having
+ * value semantics, and frees you from having to manage memory.  
+ */
+
+class OPENMM_EXPORT_COMMON ComputeKernelImpl {
+public:
+    virtual ~ComputeKernelImpl() {
+    }
+    /**
+     * Get the name of this kernel.
+     */
+    virtual std::string getName() const = 0;
+    /**
+     * Add an argument to pass the kernel when it is invoked.
+     * 
+     * @param value     the value to pass to the kernel
+     */
+    template <class T>
+    typename std::enable_if<std::is_trivially_copyable<T>::value, void>::type addArg(const T& value) {
+        addPrimitiveArg(&value, sizeof(value));
+    }
+    /**
+     * Add an argument to pass the kernel when it is invoked.
+     * 
+     * @param value     the value to pass to the kernel
+     */
+    void addArg(ArrayInterface& value) {
+        addArrayArg(value);
+    }
+    /**
+     * Add a placeholder for an argument without specifying its value.  The value must
+     * be provided by calling setArg() before the kernel is executed.
+     */
+    void addArg() {
+        addEmptyArg();
+    }
+    /**
+     * Set the value of an argument to pass the kernel when it is invoked.
+     * 
+     * @param index     the index of the argument to set
+     * @param value     the value to pass to the kernel
+     */
+    template <class T>
+    typename std::enable_if<std::is_trivially_copyable<T>::value, void>::type setArg(int index, const T& value) {
+        setPrimitiveArg(index, &value, sizeof(value));
+    }
+    /**
+     * Set the value of an argument to pass the kernel when it is invoked.
+     * 
+     * @param index     the index of the argument to set
+     * @param value     the value to pass to the kernel
+     */
+    void setArg(int index, ArrayInterface& value) {
+        setArrayArg(index, value);
+    }
+    /**
+     * Execute this kernel.
+     *
+     * @param threads      the maximum number of threads that should be used.  Depending on the
+     *                     computing device, it may choose to use fewer threads than this number.
+     * @param blockSize    the number of threads in each thread block.  If this is omitted, a
+     *                     default size that is appropriate for the computing device is used.
+     */
+    virtual void execute(int threads, int blockSize=-1) = 0;
+protected:
+    /**
+     * Add an argument to pass the kernel when it is invoked, where the value is a
+     * subclass of ArrayInterface.
+     * 
+     * @param value     the value to pass to the kernel
+     */
+    virtual void addArrayArg(ArrayInterface& value) = 0;
+    /**
+     * Add an argument to pass the kernel when it is invoked, where the value is a primitive type.
+     * 
+     * @param value    a pointer to the argument value
+     * @param size     the size of the value in bytes
+     */
+    virtual void addPrimitiveArg(const void* value, int size) = 0;
+    /**
+     * Add a placeholder for an argument without specifying its value.
+     */
+    virtual void addEmptyArg() = 0;
+    /**
+     * Add an argument to pass the kernel when it is invoked, where the value is a
+     * subclass of ArrayInterface.
+     * 
+     * @param index     the index of the argument to set
+     * @param value     the value to pass to the kernel
+     */
+    virtual void setArrayArg(int index, ArrayInterface& value) = 0;
+    /**
+     * Add an argument to pass the kernel when it is invoked, where the value is a primitive type.
+     * 
+     * @param index     the index of the argument to set
+     * @param value    a pointer to the argument value
+     * @param size     the size of the value in bytes
+     */
+    virtual void setPrimitiveArg(int index, const void* value, int size) = 0;
+};
+
+typedef std::shared_ptr<ComputeKernelImpl> ComputeKernel;
+
+} // namespace OpenMM
+
+#endif /*OPENMM_COMPUTEKERNEL_H_*/
--- a/platforms/common/include/openmm/common/ComputeParameterInfo.h
+++ b/platforms/common/include/openmm/common/ComputeParameterInfo.h
+#ifndef OPENMM_COMPUTEPARAMETERINFO_H_
+#define OPENMM_COMPUTEPARAMETERINFO_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2019 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/common/ArrayInterface.h"
+#include <sstream>
+#include <string>
+
+namespace OpenMM {
+
+/**
+ * This class stores information about a parameter that can be passed to a kernel.
+ * It combines an ArrayInterface holding parameter values with additional information
+ * describing how to represent it in kernels: the variable name, the data type, etc.
+ * 
+ * The array is assumed to contain a parameter value for each of many objects (atoms,
+ * bonds, etc.).  Each value may in turn be a multi-component vector.  When creating
+ * a ComputeParameterInfo, specify the number of components in the vector and the
+ * type of each component.  For example, suppose you have an array of type float3
+ * containing a dipole moment for each atom.  The ComputeParameterInfo would be
+ * created like this:
+ * 
+ * ComputeParameterInfo parameter(dipoleArray, "dipole", "float", 3);
+ */
+
+class ComputeParameterInfo {
+public:
+    /**
+     * Create a ComputeParameterInfo.
+     *
+     * @param array          the array containing the parameter values
+     * @param name           the name of the variable to use for this parameter
+     * @param type           the data type of the parameter's components
+     * @param numComponents  the number of components in the parameter
+     * @param constant       whether the array memory should be marked as constant
+     */
+    ComputeParameterInfo(ArrayInterface& array, const std::string& name, const std::string& componentType, int numComponents, bool constant=true) :
+            array(array), name(name), componentType(componentType), numComponents(numComponents), constant(constant) {
+        if (numComponents == 1)
+            type = componentType;
+        else {
+            std::stringstream s;
+            s << componentType << numComponents;
+            type = s.str();
+        }
+    }
+    virtual ~ComputeParameterInfo() {
+    }
+    /**
+     * Get the array containing the parameter values.
+     */
+    ArrayInterface& getArray() {
+        return array;
+    }
+    /**
+     * Get the array containing the parameter values.
+     */
+    const ArrayInterface& getArray() const {
+        return array;
+    }
+    /**
+     * Get the name of the variable to use for this parameter.
+     */
+    const std::string& getName() const {
+        return name;
+    }
+    /**
+     * Get the data type of each component of the value.  For example, if getType() returns "float3",
+     * this will return "float".
+     */
+    const std::string& getComponentType() const {
+        return componentType;
+    }
+    /**
+     * Get the data type of each value.
+     */
+    const std::string& getType() const {
+        return type;
+    }
+    /**
+     * Get the number of components in each value.  If the values are not a vector
+     * type, this returns 1.
+     */
+    int getNumComponents() const {
+        return numComponents;
+    }
+    /**
+     * Get the size of each parameter value in bytes.
+     */
+    int getSize() const {
+        return array.getElementSize();
+    }
+    /**
+     * Get whether the array memory should be marked as constant.
+     */
+    bool isConstant() const {
+        return constant;
+    }
+private:
+    ArrayInterface& array;
+    std::string name;
+    std::string componentType;
+    std::string type;
+    int numComponents;
+    bool constant;
+};
+
+} // namespace OpenMM
+
+#endif /*OPENMM_COMPUTEPARAMETERINFO_H_*/
--- a/platforms/common/include/openmm/common/ComputeParameterSet.h
+++ b/platforms/common/include/openmm/common/ComputeParameterSet.h
+#ifndef OPENMM_COMPUTEPARAMETERSET_H_
+#define OPENMM_COMPUTEPARAMETERSET_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/common/ArrayInterface.h"
+#include "openmm/common/ComputeContext.h"
+#include "openmm/common/ComputeParameterInfo.h"
+#include <string>
+#include <vector>
+
+namespace OpenMM {
+
+/**
+ * This class represents a set of floating point parameter values for a set of objects (particles, bonds, etc.).
+ * It automatically creates an appropriate set of arrays to hold the parameter values, based
+ * on the number of parameters required.
+ */
+
+class OPENMM_EXPORT_COMMON ComputeParameterSet {
+public:
+    /**
+     * Create an ComputeParameterSet.
+     *
+     * @param context          the context for which to create the parameter set
+     * @param numParameters    the number of parameters for each object
+     * @param numObjects       the number of objects to store parameter values for
+     * @param name             the name of the parameter set
+     * @param arrayPerParameter   if true, a separate array is created for each parameter.  If false,
+     *                            multiple parameters may be combined into a single array for efficiency.
+     * @param useDoublePrecision  whether values should be stored as single or double precision
+     */
+    ComputeParameterSet(ComputeContext& context, int numParameters, int numObjects, const std::string& name, bool arrayPerParameter=false, bool useDoublePrecision=false);
+    ~ComputeParameterSet();
+    /**
+     * Get the number of parameters.
+     */
+    int getNumParameters() const {
+        return numParameters;
+    }
+    /**
+     * Get the number of objects.
+     */
+    int getNumObjects() const {
+        return numObjects;
+    }
+    /**
+     * Get the values of all parameters.
+     *
+     * @param values on exit, values[i][j] contains the value of parameter j for object i
+     */
+    template <class T>
+    void getParameterValues(std::vector<std::vector<T> >& values);
+    /**
+     * Set the values of all parameters.
+     *
+     * @param values values[i][j] contains the value of parameter j for object i
+     */
+    template <class T>
+    void setParameterValues(const std::vector<std::vector<T> >& values);
+    /**
+     * Get a vector of ComputeParameterInfo objects which describe the arrays
+     * containing the data.
+     */
+    std::vector<ComputeParameterInfo>& getParameterInfos() {
+        return parameters;
+    }
+    /**
+     * Get a suffix to add to variable names when accessing a certain parameter.
+     *
+     * @param index         the index of the parameter
+     * @param extraSuffix   an extra suffix to add to the variable name
+     * @return the suffix to append
+     */
+    std::string getParameterSuffix(int index, const std::string& extraSuffix="") const;
+private:
+    ComputeContext& context;
+    int numParameters, numObjects, elementSize;
+    std::string name;
+    std::vector<ArrayInterface*> arrays;
+    std::vector<ComputeParameterInfo> parameters;
+};
+
+} // namespace OpenMM
+
+#endif /*OPENMM_COMPUTEPARAMETERSET_H_*/
--- a/platforms/common/include/openmm/common/ComputeProgram.h
+++ b/platforms/common/include/openmm/common/ComputeProgram.h
+#ifndef OPENMM_COMPUTEPROGRAM_H_
+#define OPENMM_COMPUTEPROGRAM_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2019 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/common/ComputeKernel.h"
+#include <memory>
+
+namespace OpenMM {
+
+/**
+ * This abstract class represents a compiled program that can be executed on a computing
+ * device.  A ComputeProgramImpl is created by calling compileProgram() on a ComputeContext,
+ * which returns an instance of a platform-specific subclass.  The source code for a
+ * ComputeProgramImpl typically contains one or more kernels.  Call createKernel() to get
+ * ComputeKernels for the kernels, which can then be executed.
+ * 
+ * Instead of referring to this class directly, it is best to use ComputeProgram, which is
+ * a typedef for a shared_ptr to a ComputeProgramImpl.  This allows you to treat it as having
+ * value semantics, and frees you from having to manage memory.  
+ */
+
+class OPENMM_EXPORT_COMMON ComputeProgramImpl {
+public:
+    virtual ~ComputeProgramImpl() {
+    }
+    /**
+     * Create a ComputeKernel for one of the kernels in this program.
+     * 
+     * @param name    the name of the kernel to get
+     */
+    virtual ComputeKernel createKernel(const std::string& name) = 0;
+};
+
+typedef std::shared_ptr<ComputeProgramImpl> ComputeProgram;
+
+} // namespace OpenMM
+
+#endif /*OPENMM_COMPUTEPROGRAM_H_*/
--- a/platforms/common/include/openmm/common/ComputeVectorTypes.h
+++ b/platforms/common/include/openmm/common/ComputeVectorTypes.h
+#ifndef OPENMM_COMPUTEVECTORTYPES_H_
+#define OPENMM_COMPUTEVECTORTYPES_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2019 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+namespace OpenMM {
+
+struct mm_short2 {
+    short x, y;
+    mm_short2() {
+    }
+    mm_short2(short x, short y) : x(x), y(y) {
+    }
+};
+struct mm_short3 {
+    short x, y, z, w;
+    mm_short3() {
+    }
+    mm_short3(short x, short y, short z) : x(x), y(y), z(z) {
+    }
+};
+struct mm_short4 {
+    short x, y, z, w;
+    mm_short4() {
+    }
+    mm_short4(short x, short y, short z, short w) : x(x), y(y), z(z), w(w) {
+    }
+};
+struct mm_int2 {
+    int x, y;
+    mm_int2() {
+    }
+    mm_int2(int x, int y) : x(x), y(y) {
+    }
+};
+struct mm_int3 {
+    int x, y, z, w;
+    mm_int3() {
+    }
+    mm_int3(int x, int y, int z) : x(x), y(y), z(z) {
+    }
+};
+struct mm_int4 {
+    int x, y, z, w;
+    mm_int4() {
+    }
+    mm_int4(int x, int y, int z, int w) : x(x), y(y), z(z), w(w) {
+    }
+};
+struct mm_float2 {
+    float x, y;
+    mm_float2() {
+    }
+    mm_float2(float x, float y) : x(x), y(y) {
+    }
+};
+struct mm_float3 {
+    float x, y, z, w;
+    mm_float3() {
+    }
+    mm_float3(float x, float y, float z) : x(x), y(y), z(z) {
+    }
+};
+struct mm_float4 {
+    float x, y, z, w;
+    mm_float4() {
+    }
+    mm_float4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {
+    }
+};
+struct mm_double2 {
+    double x, y;
+    mm_double2() {
+    }
+    mm_double2(double x, double y) : x(x), y(y) {
+    }
+};
+struct mm_double3 {
+    double x, y, z, w;
+    mm_double3() {
+    }
+    mm_double3(double x, double y, double z) : x(x), y(y), z(z) {
+    }
+};
+struct mm_double4 {
+    double x, y, z, w;
+    mm_double4() {
+    }
+    mm_double4(double x, double y, double z, double w) : x(x), y(y), z(z), w(w) {
+    }
+};
+
+} // namespace OpenMM
+
+#endif /*OPENMM_COMPUTEVECTORTYPES_H_*/
--- a/platforms/common/include/openmm/common/ExpressionUtilities.h
+++ b/platforms/common/include/openmm/common/ExpressionUtilities.h
+#ifndef OPENMM_EXPRESSIONUTILITIES_H_
+#define OPENMM_EXPRESSIONUTILITIES_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "ComputeContext.h"
+#include "openmm/TabulatedFunction.h"
+#include "lepton/CustomFunction.h"
+#include "lepton/ExpressionTreeNode.h"
+#include "lepton/ParsedExpression.h"
+#include <map>
+#include <sstream>
+#include <string>
+#include <utility>
+
+namespace OpenMM {
+
+/**
+ * This class is used by various classes to generate kernel source code implementing
+ * user defined mathematical expressions.
+ */
+
+class OPENMM_EXPORT_COMMON ExpressionUtilities {
+public:
+    ExpressionUtilities(ComputeContext& context);
+    /**
+     * Generate the source code for calculating a set of expressions.
+     *
+     * @param expressions    the expressions to generate code for (keys are the variables to store the output values in)
+     * @param variables      defines the source code to generate for each variable that may appear in the expressions.  Keys are
+     *                       variable names, and the values are the code to generate for them.
+     * @param functions      the tabulated functions that may appear in the expressions
+     * @param functionNames  defines the variable name for each tabulated function that may appear in the expressions
+     * @param prefix         a prefix to put in front of temporary variables
+     * @param tempType       the type of value to use for temporary variables (defaults to "real")
+     */
+    std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::map<std::string, std::string>& variables,
+            const std::vector<const TabulatedFunction*>& functions, const std::vector<std::pair<std::string, std::string> >& functionNames,
+            const std::string& prefix, const std::string& tempType="real");
+    /**
+     * Generate the source code for calculating a set of expressions.
+     *
+     * @param expressions    the expressions to generate code for (keys are the variables to store the output values in)
+     * @param variables      defines the source code to generate for each variable or precomputed sub-expression that may appear in the expressions.
+     *                       Each entry is an ExpressionTreeNode, and the code to generate wherever an identical node appears.
+     * @param functions      the tabulated functions that may appear in the expressions
+     * @param functionNames  defines the variable name for each tabulated function that may appear in the expressions
+     * @param prefix         a prefix to put in front of temporary variables
+     * @param tempType       the type of value to use for temporary variables (defaults to "real")
+     */
+    std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variables,
+            const std::vector<const TabulatedFunction*>& functions, const std::vector<std::pair<std::string, std::string> >& functionNames,
+            const std::string& prefix, const std::string& tempType="real");
+    /**
+     * Calculate the spline coefficients for a tabulated function that appears in expressions.
+     *
+     * @param function   the function for which to compute coefficients
+     * @param width      on output, the number of floats used for each value
+     * @return the spline coefficients
+     */
+    std::vector<float> computeFunctionCoefficients(const TabulatedFunction& function, int& width);
+    /**
+     * Get a Lepton::CustomFunction that can be used to represent a TabulatedFunction when parsing expressions.
+     * 
+     * @param function   the function for which to get a placeholder
+     */
+    Lepton::CustomFunction* getFunctionPlaceholder(const TabulatedFunction& function);
+    /**
+     * Get a Lepton::CustomFunction that can be used to represent the periodicdistance() function when parsing expressions.
+     */
+    Lepton::CustomFunction* getPeriodicDistancePlaceholder();
+private:
+    class FunctionPlaceholder : public Lepton::CustomFunction {
+        public:
+            FunctionPlaceholder(int numArgs) : numArgs(numArgs) {
+            }
+            int getNumArguments() const {
+                return numArgs;
+            }
+            double evaluate(const double* arguments) const {
+                return 0.0;
+            }
+            double evaluateDerivative(const double* arguments, const int* derivOrder) const {
+                return 0.0;
+            }
+            CustomFunction* clone() const {
+                return new FunctionPlaceholder(numArgs);
+            }
+        private:
+            int numArgs;
+    };
+    void processExpression(std::stringstream& out, const Lepton::ExpressionTreeNode& node,
+            std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps,
+            const std::vector<const TabulatedFunction*>& functions, const std::vector<std::pair<std::string, std::string> >& functionNames,
+            const std::string& prefix, const std::vector<std::vector<double> >& functionParams, const std::vector<Lepton::ParsedExpression>& allExpressions, const std::string& tempType);
+    std::string getTempName(const Lepton::ExpressionTreeNode& node, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps);
+    void findRelatedCustomFunctions(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
+            std::vector<const Lepton::ExpressionTreeNode*>& nodes);
+    void findRelatedPowers(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
+            std::map<int, const Lepton::ExpressionTreeNode*>& powers);
+    void callFunction(std::stringstream& out, std::string singleFn, std::string doubleFn, const std::string& arg, const std::string& tempType);
+    void callFunction2(std::stringstream& out, std::string singleFn, std::string doubleFn, const std::string& arg1, const std::string& arg2, const std::string& tempType);
+    std::vector<std::vector<double> > computeFunctionParameters(const std::vector<const TabulatedFunction*>& functions);
+    ComputeContext& context;
+    FunctionPlaceholder fp1, fp2, fp3, periodicDistance;
+};
+
+} // namespace OpenMM
+
+#endif /*OPENMM_EXPRESSIONUTILITIES_H_*/
--- a/platforms/common/include/openmm/common/IntegrationUtilities.h
+++ b/platforms/common/include/openmm/common/IntegrationUtilities.h
+#ifndef OPENMM_INTEGRATIONUTILITIES_H_
+#define OPENMM_INTEGRATIONUTILITIES_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/common/ComputeArray.h"
+#include "openmm/common/ComputeKernel.h"
+#include "openmm/common/ComputeVectorTypes.h"
+#include "openmm/System.h"
+#include <iosfwd>
+#include <map>
+
+namespace OpenMM {
+
+class ComputeContext;
+
+/**
+ * This class implements features that are used by many different integrators, including
+ * common workspace arrays, random number generation, and enforcing constraints.
+ */
+
+class OPENMM_EXPORT_COMMON IntegrationUtilities {
+public:
+    IntegrationUtilities(ComputeContext& context, const System& system);
+    virtual ~IntegrationUtilities() {
+    }
+    /**
+     * Get the array which contains position deltas.  These are the amounts by
+     * which the position of each atom will change in the current step.  The actual
+     * positions should not be modified until after constraints have been applied.
+     */
+    virtual ArrayInterface& getPosDelta() = 0;
+    /**
+     * Get the array which contains random values.  Each element is a float4 whose components
+     * are independent, normally distributed random numbers with mean 0 and variance 1.
+     * Be sure to call initRandomNumberGenerator() and prepareRandomNumbers() before
+     * accessing this array.
+     */
+    virtual ArrayInterface& getRandom() = 0;
+    /**
+     * Get the array which contains the current step size.
+     */
+    virtual ArrayInterface& getStepSize() = 0;
+    /**
+     * Set the size to use for the next step.
+     */
+    void setNextStepSize(double size);
+    /**
+     * Get the size that was used for the last step.
+     */
+    double getLastStepSize();
+    /**
+     * Apply constraints to the atom positions.  When calling this method, the
+     * context's array of positions should contain the positions at the start of the
+     * step, and the array returned by getPosDelta() should contain the intended
+     * change to each position.  This method modifies the position deltas so that,
+     * once they are added to the positions, constraints will be satisfied.
+     *
+     * @param tol             the constraint tolerance
+     */
+    void applyConstraints(double tol);
+    /**
+     * Apply constraints to the atom velocities.
+     *
+     * @param tol             the constraint tolerance
+     */
+    void applyVelocityConstraints(double tol);
+    /**
+     * Initialize the random number generator.  This should be called once when the
+     * context is first created.  Subsequent calls will be ignored if the random
+     * seed is the same as on the first call, or throw an exception if the random
+     * seed is different.
+     */
+    void initRandomNumberGenerator(unsigned int randomNumberSeed);
+    /**
+     * Ensure that sufficient random numbers are available in the array, and generate new ones if not.
+     *
+     * @param numValues     the number of random float4's that will be required
+     * @return the index in the array at which to start reading
+     */
+    int prepareRandomNumbers(int numValues);
+    /**
+     * Compute the positions of virtual sites.
+     */
+    void computeVirtualSites();
+    /**
+     * Distribute forces from virtual sites to the atoms they are based on.
+     */
+    virtual void distributeForcesFromVirtualSites() = 0;
+    /**
+     * Create a checkpoint recording the current state of the random number generator.
+     * 
+     * @param stream    an output stream the checkpoint data should be written to
+     */
+    void createCheckpoint(std::ostream& stream);
+    /**
+     * Load a checkpoint that was written by createCheckpoint().
+     * 
+     * @param stream    an input stream the checkpoint data should be read from
+     */
+    void loadCheckpoint(std::istream& stream);
+    /**
+     * Compute the kinetic energy of the system, possibly shifting the velocities in time to account
+     * for a leapfrog integrator.
+     * 
+     * @param timeShift   the amount by which to shift the velocities in time
+     */
+    double computeKineticEnergy(double timeShift);
+    /**
+     * Get the data structure that holds the state of all Nose-Hoover chains
+     */
+    std::map<int, ComputeArray>& getNoseHooverChainState() {
+        return noseHooverChainState;
+    }
+protected:
+    virtual void applyConstraintsImpl(bool constrainVelocities, double tol) = 0;
+    ComputeContext& context;
+    ComputeKernel settlePosKernel, settleVelKernel;
+    ComputeKernel shakePosKernel, shakeVelKernel;
+    ComputeKernel ccmaDirectionsKernel, ccmaPosForceKernel, ccmaVelForceKernel;
+    ComputeKernel ccmaMultiplyKernel, ccmaUpdateKernel;
+    ComputeKernel vsitePositionKernel, vsiteForceKernel, vsiteSaveForcesKernel;
+    ComputeKernel randomKernel, timeShiftKernel;
+    ComputeArray posDelta;
+    ComputeArray settleAtoms;
+    ComputeArray settleParams;
+    ComputeArray shakeAtoms;
+    ComputeArray shakeParams;
+    ComputeArray random;
+    ComputeArray randomSeed;
+    ComputeArray stepSize;
+    ComputeArray ccmaAtoms;
+    ComputeArray ccmaDistance;
+    ComputeArray ccmaReducedMass;
+    ComputeArray ccmaAtomConstraints;
+    ComputeArray ccmaNumAtomConstraints;
+    ComputeArray ccmaConstraintMatrixColumn;
+    ComputeArray ccmaConstraintMatrixValue;
+    ComputeArray ccmaDelta1;
+    ComputeArray ccmaDelta2;
+    ComputeArray ccmaConverged;
+    ComputeArray vsite2AvgAtoms;
+    ComputeArray vsite2AvgWeights;
+    ComputeArray vsite3AvgAtoms;
+    ComputeArray vsite3AvgWeights;
+    ComputeArray vsiteOutOfPlaneAtoms;
+    ComputeArray vsiteOutOfPlaneWeights;
+    ComputeArray vsiteLocalCoordsIndex;
+    ComputeArray vsiteLocalCoordsAtoms;
+    ComputeArray vsiteLocalCoordsWeights;
+    ComputeArray vsiteLocalCoordsPos;
+    ComputeArray vsiteLocalCoordsStartIndex;
+    std::map<int, ComputeArray> noseHooverChainState;
+    int randomPos, lastSeed, numVsites;
+    bool hasOverlappingVsites;
+    mm_double2 lastStepSize;
+    struct ShakeCluster;
+    struct ConstraintOrderer;
+};
+
+} // namespace OpenMM
+
+#endif /*OPENMM_INTEGRATIONUTILITIES_H_*/
--- a/platforms/common/include/openmm/common/NonbondedUtilities.h
+++ b/platforms/common/include/openmm/common/NonbondedUtilities.h
+#ifndef OPENMM_NONBONDEDUTILITIES_H_
+#define OPENMM_NONBONDEDUTILITIES_H_
+
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/common/ArrayInterface.h"
+#include "openmm/common/ComputeParameterInfo.h"
+#include <string>
+#include <vector>
+
+namespace OpenMM {
+
+/**
+ * This class provides a generic interface for calculating nonbonded interactions.  Clients only need
+ * to provide the code for evaluating a single interaction and the list of parameters it depends on.
+ * A complete kernel is then synthesized using an appropriate algorithm to evaluate all interactions on
+ * all atoms.  Call addInteraction() to define a nonbonded interaction, and addParameter() to define
+ * per-particle parameters that the interaction depends on.
+ *
+ * During each force or energy evaluation, the following sequence of steps takes place:
+ *
+ * 1. Data structures (e.g. neighbor lists) are calculated to allow nonbonded interactions to be evaluated
+ * quickly.
+ *
+ * 2. calcForcesAndEnergy() is called on each ForceImpl in the System.
+ *
+ * 3. Finally, the default interaction kernel is invoked to calculate all interactions that were added
+ * to it.
+ *
+ * This sequence means that the default interaction kernel may depend on quantities that were calculated
+ * by ForceImpls during calcForcesAndEnergy().
+ */
+
+class OPENMM_EXPORT_COMMON NonbondedUtilities {
+public:
+    virtual ~NonbondedUtilities() {
+    }
+    /**
+     * Add a nonbonded interaction to be evaluated by the default interaction kernel.
+     *
+     * @param usesCutoff     specifies whether a cutoff should be applied to this interaction
+     * @param usesPeriodic   specifies whether periodic boundary conditions should be applied to this interaction
+     * @param usesExclusions specifies whether this interaction uses exclusions.  If this is true, it must have identical exclusions to every other interaction.
+     * @param cutoffDistance the cutoff distance for this interaction (ignored if usesCutoff is false)
+     * @param exclusionList  for each atom, specifies the list of other atoms whose interactions should be excluded
+     * @param kernel         the code to evaluate the interaction
+     * @param forceGroup     the force group in which the interaction should be calculated
+     */
+    virtual void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup) = 0;
+    /**
+     * Add a per-atom parameter that the default interaction kernel may depend on.
+     */
+    virtual void addParameter(ComputeParameterInfo parameter) = 0;
+    /**
+     * Add an array (other than a per-atom parameter) that should be passed as an argument to the default interaction kernel.
+     */
+    virtual void addArgument(ComputeParameterInfo parameter) = 0;
+    /**
+     * Register that the interaction kernel will be computing the derivative of the potential energy
+     * with respect to a parameter.
+     * 
+     * @param param   the name of the parameter
+     * @return the variable that will be used to accumulate the derivative.  Any code you pass to addInteraction() should
+     * add its contributions to this variable.
+     */
+    virtual std::string addEnergyParameterDerivative(const std::string& param) = 0;
+    /**
+     * Get the number of force buffers required for nonbonded forces.
+     */
+    virtual int getNumForceBuffers() const = 0;
+    /**
+     * Get whether a cutoff is being used.
+     */
+    virtual bool getUseCutoff() = 0;
+    /**
+     * Get whether periodic boundary conditions are being used.
+     */
+    virtual bool getUsePeriodic() = 0;
+    /**
+     * Get the number of thread blocks used for computing nonbonded forces.
+     */
+    virtual int getNumForceThreadBlocks() = 0;
+    /**
+     * Get the size of each thread block used for computing nonbonded forces.
+     */
+    virtual int getForceThreadBlockSize() = 0;
+    /**
+     * Get the maximum cutoff distance used by any interaction.
+     */
+    virtual double getMaxCutoffDistance() = 0;
+    /**
+     * Given a nonbonded cutoff, get the padded cutoff distance used in computing
+     * the neighbor list.
+     */
+    virtual double padCutoff(double cutoff) = 0;
+    /**
+     * Get the array containing the center of each atom block.
+     */
+    virtual ArrayInterface& getBlockCenters() = 0;
+    /**
+     * Get the array containing the dimensions of each atom block.
+     */
+    virtual ArrayInterface& getBlockBoundingBoxes() = 0;
+    /**
+     * Get the array whose first element contains the number of tiles with interactions.
+     */
+    virtual ArrayInterface& getInteractionCount() = 0;
+    /**
+     * Get the array containing tiles with interactions.
+     */
+    virtual ArrayInterface& getInteractingTiles() = 0;
+    /**
+     * Get the array containing the atoms in each tile with interactions.
+     */
+    virtual ArrayInterface& getInteractingAtoms() = 0;
+    /**
+     * Get the array containing exclusion flags.
+     */
+    virtual ArrayInterface& getExclusions() = 0;
+    /**
+     * Get the array containing tiles with exclusions.
+     */
+    virtual ArrayInterface& getExclusionTiles() = 0;
+    /**
+     * Get the array containing the index into the exclusion array for each tile.
+     */
+    virtual ArrayInterface& getExclusionIndices() = 0;
+    /**
+     * Get the array listing where the exclusion data starts for each row.
+     */
+    virtual ArrayInterface& getExclusionRowIndices() = 0;
+    /**
+     * Get the array containing a flag for whether the neighbor list was rebuilt
+     * on the most recent call to prepareInteractions().
+     */
+    virtual ArrayInterface& getRebuildNeighborList() = 0;
+};
+
+} // namespace OpenMM
+
+#endif /*OPENMM_NONBONDEDUTILITIES_H_*/
--- a/platforms/opencl/include/windowsExportOpenCL.h
+++ b/platforms/opencl/include/windowsExportOpenCL.h
-#ifndef OPENMM_WINDOWSEXPORTOPENCL_H_
-#define OPENMM_WINDOWSEXPORTOPENCL_H_
-
-/*
- * Shared libraries are messy in Visual Studio. We have to distinguish three
- * cases:
- *   (1) this header is being used to build the OpenMM shared library
- *       (dllexport)
- *   (2) this header is being used by a *client* of the OpenMM shared
- *       library (dllimport)
- *   (3) we are building the OpenMM static library, or the client is
- *       being compiled with the expectation of linking with the
- *       OpenMM static library (nothing special needed)
- * In the CMake script for building this library, we define one of the symbols
- *     OPENMM_OPENCL_BUILDING_{SHARED|STATIC}_LIBRARY
- * Client code normally has no special symbol defined, in which case we'll
- * assume it wants to use the shared library. However, if the client defines
- * the symbol OPENMM_USE_STATIC_LIBRARIES we'll suppress the dllimport so
- * that the client code can be linked with static libraries. Note that
- * the client symbol is not library dependent, while the library symbols
- * affect only the OpenMM library, meaning that other libraries can
- * be clients of this one. However, we are assuming all-static or all-shared.
- */
-
-#ifdef _MSC_VER
-    // We don't want to hear about how sprintf is "unsafe".
-    #pragma warning(disable:4996)
-    // Keep MS VC++ quiet about lack of dll export of private members.
-    #pragma warning(disable:4251)
-    #if defined(OPENMM_OPENCL_BUILDING_SHARED_LIBRARY)
-        #define OPENMM_EXPORT_OPENCL __declspec(dllexport)
-    #elif defined(OPENMM_OPENCL_BUILDING_STATIC_LIBRARY) || defined(OPENMM_OPENCL_USE_STATIC_LIBRARIES)
-        #define OPENMM_EXPORT_OPENCL
-    #else
-        #define OPENMM_EXPORT_OPENCL __declspec(dllimport)   // i.e., a client of a shared library
-    #endif
-#else
-    #define OPENMM_EXPORT_OPENCL // Linux, Mac
-#endif
-
-#endif // OPENMM_WINDOWSEXPORTOPENCL_H_
+#ifndef OPENMM_WINDOWSEXPORTCOMMON_H_
+#define OPENMM_WINDOWSEXPORTCOMMON_H_
+
+/*
+ * Shared libraries are messy in Visual Studio. We have to distinguish three
+ * cases:
+ *   (1) this header is being used to build the OpenMM shared library
+ *       (dllexport)
+ *   (2) this header is being used by a *client* of the OpenMM shared
+ *       library (dllimport)
+ *   (3) we are building the OpenMM static library, or the client is
+ *       being compiled with the expectation of linking with the
+ *       OpenMM static library (nothing special needed)
+ * In the CMake script for building this library, we define one of the symbols
+ *     OPENMM_COMMON_BUILDING_{SHARED|STATIC}_LIBRARY
+ * Client code normally has no special symbol defined, in which case we'll
+ * assume it wants to use the shared library. However, if the client defines
+ * the symbol OPENMM_USE_STATIC_LIBRARIES we'll suppress the dllimport so
+ * that the client code can be linked with static libraries. Note that
+ * the client symbol is not library dependent, while the library symbols
+ * affect only the OpenMM library, meaning that other libraries can
+ * be clients of this one. However, we are assuming all-static or all-shared.
+ */
+
+#ifdef _MSC_VER
+    // We don't want to hear about how sprintf is "unsafe".
+    #pragma warning(disable:4996)
+    // Keep MS VC++ quiet about lack of dll export of private members.
+    #pragma warning(disable:4251)
+    #if defined(OPENMM_COMMON_BUILDING_SHARED_LIBRARY)
+        #define OPENMM_EXPORT_COMMON __declspec(dllexport)
+    #elif defined(OPENMM_COMMON_BUILDING_STATIC_LIBRARY) || defined(OPENMM_COMMON_USE_STATIC_LIBRARIES)
+        #define OPENMM_EXPORT_COMMON
+    #else
+        #define OPENMM_EXPORT_COMMON __declspec(dllimport)   // i.e., a client of a shared library
+    #endif
+#else
+    #define OPENMM_EXPORT_COMMON // Linux, Mac
+#endif
+
+#endif // OPENMM_WINDOWSEXPORTCOMMON_H_
--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernelSources.cpp.in
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernelSources.cpp.in
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2010 Stanford University and the Authors.           *
+ * Portions copyright (c) 2019 Stanford University and the Authors.           *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -24,7 +24,7 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

-#include "CudaDrudeKernelSources.h"
+#include "CommonKernelSources.h"

 using namespace OpenMM;
 using namespace std;

--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernelSources.h.in
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernelSources.h.in
-#ifndef OPENMM_CUDADRUDEKERNELSOURCES_H_
-#define OPENMM_CUDADRUDEKERNELSOURCES_H_
+#ifndef OPENMM_COMMONKERNELSOURCES_H_
+#define OPENMM_COMMONKERNELSOURCES_H_

 /* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2010 Stanford University and the Authors.           *
+ * Portions copyright (c) 2019 Stanford University and the Authors.           *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -27,21 +27,22 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

+#include "openmm/common/windowsExportCommon.h"
 #include <string>

 namespace OpenMM {

 /**
- * This class is a central holding place for the source code of CUDA kernels.
- * The CMake build script inserts declarations into it based on the .cu files in the
+ * This class is a central holding place for the source code of common kernels.
+ * The CMake build script inserts declarations into it based on the .cc files in the
 * kernels subfolder.
 */

-class CudaDrudeKernelSources {
+class OPENMM_EXPORT_COMMON CommonKernelSources {
 public:
-@CUDA_FILE_DECLARATIONS@
+@KERNEL_FILE_DECLARATIONS@
 };

 } // namespace OpenMM

-#endif /*OPENMM_CUDADRUDEKERNELSOURCES_H_*/
+#endif /*OPENMM_COMMONKERNELSOURCES_H_*/
--- a/platforms/common/src/CommonKernels.cpp
+++ b/platforms/common/src/CommonKernels.cpp
--- a/platforms/common/src/ComputeArray.cpp
+++ b/platforms/common/src/ComputeArray.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2019 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/common/ComputeArray.h"
+#include "openmm/common/ComputeContext.h"
+
+using namespace OpenMM;
+
+ComputeArray::ComputeArray() : impl(NULL) {
+}
+
+ComputeArray::~ComputeArray() {
+    if (impl != NULL)
+        delete impl;
+}
+
+ArrayInterface& ComputeArray::getArray() {
+    if (impl == NULL)
+        throw OpenMMException("ComputeArray has not been initialized");
+    return *impl;
+}
+
+void ComputeArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) {
+    if (impl != NULL)
+        throw OpenMMException("The array "+getName()+" has already been initialized");
+    impl = context.createArray();
+    impl->initialize(context, size, elementSize, name);
+}
+
+void ComputeArray::resize(int size) {
+    if (impl == NULL)
+        throw OpenMMException("ComputeArray has not been initialized");
+    impl->resize(size);
+}
+
+bool ComputeArray::isInitialized() const {
+    return (impl != NULL);
+}
+
+int ComputeArray::getSize() const {
+    if (impl == NULL)
+        throw OpenMMException("ComputeArray has not been initialized");
+    return impl->getSize();
+}
+
+int ComputeArray::getElementSize() const {
+    if (impl == NULL)
+        throw OpenMMException("ComputeArray has not been initialized");
+    return impl->getElementSize();
+}
+
+const std::string& ComputeArray::getName() const {
+    if (impl == NULL)
+        throw OpenMMException("ComputeArray has not been initialized");
+    return impl->getName();
+}
+
+ComputeContext& ComputeArray::getContext() {
+    if (impl == NULL)
+        throw OpenMMException("ComputeArray has not been initialized");
+    return impl->getContext();
+}
+
+void ComputeArray::upload(const void* data, bool blocking) {
+    if (impl == NULL)
+        throw OpenMMException("ComputeArray has not been initialized");
+    impl->upload(data, blocking);
+}
+
+void ComputeArray::download(void* data, bool blocking) const {
+    if (impl == NULL)
+        throw OpenMMException("ComputeArray has not been initialized");
+    impl->download(data, blocking);
+}
+
+void ComputeArray::copyTo(ArrayInterface& dest) const {
+    if (impl == NULL)
+        throw OpenMMException("ComputeArray has not been initialized");
+    impl->copyTo(dest);
+}
\ No newline at end of file
--- a/platforms/common/src/ComputeContext.cpp
+++ b/platforms/common/src/ComputeContext.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2019 Stanford University and the Authors.           *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/common/ComputeContext.h"
+#include "openmm/System.h"
+#include "openmm/VirtualSite.h"
+#include "openmm/internal/ContextImpl.h"
+#include "openmm/internal/ThreadPool.h"
+#include "hilbert.h"
+#include <algorithm>
+#include <cmath>
+#include <set>
+#include <sstream>
+#include <utility>
+
+using namespace OpenMM;
+using namespace std;
+
+ComputeContext::ComputeContext(const System& system) : system(system), time(0.0), stepCount(0), computeForceCount(0), stepsSinceReorder(99999),
+        atomsWereReordered(false), forcesValid(false), thread(NULL) {
+    thread = new WorkThread();
+}
+
+ComputeContext::~ComputeContext() {
+    if (thread != NULL)
+        delete thread;
+}
+
+void ComputeContext::addForce(ComputeForceInfo* force) {
+    forces.push_back(force);
+}
+
+string ComputeContext::replaceStrings(const string& input, const std::map<std::string, std::string>& replacements) const {
+    static set<char> symbolChars;
+    if (symbolChars.size() == 0) {
+        symbolChars.insert('_');
+        for (char c = 'a'; c <= 'z'; c++)
+            symbolChars.insert(c);
+        for (char c = 'A'; c <= 'Z'; c++)
+            symbolChars.insert(c);
+        for (char c = '0'; c <= '9'; c++)
+            symbolChars.insert(c);
+    }
+    string result = input;
+    for (auto& pair : replacements) {
+        int index = 0;
+        int size = pair.first.size();
+        do {
+            index = result.find(pair.first, index);
+            if (index != result.npos) {
+                if ((index == 0 || symbolChars.find(result[index-1]) == symbolChars.end()) && (index == result.size()-size || symbolChars.find(result[index+size]) == symbolChars.end())) {
+                    // We have found a complete symbol, not part of a longer symbol.
+
+                    result.replace(index, size, pair.second);
+                    index += pair.second.size();
+                }
+                else
+                    index++;
+            }
+        } while (index != result.npos);
+    }
+    return result;
+}
+
+string ComputeContext::doubleToString(double value) const {
+    stringstream s;
+    s.precision(getUseDoublePrecision() ? 16 : 8);
+    s << scientific << value;
+    if (!getUseDoublePrecision())
+        s << "f";
+    return s.str();
+}
+
+string ComputeContext::intToString(int value) const {
+    stringstream s;
+    s << value;
+    return s.str();
+}
+
+/**
+ * This class ensures that atom reordering doesn't break virtual sites.
+ */
+class ComputeContext::VirtualSiteInfo : public ComputeForceInfo {
+public:
+    VirtualSiteInfo(const System& system) {
+        for (int i = 0; i < system.getNumParticles(); i++) {
+            if (system.isVirtualSite(i)) {
+                const VirtualSite& vsite = system.getVirtualSite(i);
+                siteTypes.push_back(&typeid(vsite));
+                vector<int> particles;
+                particles.push_back(i);
+                for (int j = 0; j < vsite.getNumParticles(); j++)
+                    particles.push_back(vsite.getParticle(j));
+                siteParticles.push_back(particles);
+                vector<double> weights;
+                if (dynamic_cast<const TwoParticleAverageSite*>(&vsite) != NULL) {
+                    // A two particle average.
+
+                    const TwoParticleAverageSite& site = dynamic_cast<const TwoParticleAverageSite&>(vsite);
+                    weights.push_back(site.getWeight(0));
+                    weights.push_back(site.getWeight(1));
+                }
+                else if (dynamic_cast<const ThreeParticleAverageSite*>(&vsite) != NULL) {
+                    // A three particle average.
+
+                    const ThreeParticleAverageSite& site = dynamic_cast<const ThreeParticleAverageSite&>(vsite);
+                    weights.push_back(site.getWeight(0));
+                    weights.push_back(site.getWeight(1));
+                    weights.push_back(site.getWeight(2));
+                }
+                else if (dynamic_cast<const OutOfPlaneSite*>(&vsite) != NULL) {
+                    // An out of plane site.
+
+                    const OutOfPlaneSite& site = dynamic_cast<const OutOfPlaneSite&>(vsite);
+                    weights.push_back(site.getWeight12());
+                    weights.push_back(site.getWeight13());
+                    weights.push_back(site.getWeightCross());
+                }
+                siteWeights.push_back(weights);
+            }
+        }
+    }
+    int getNumParticleGroups() {
+        return siteTypes.size();
+    }
+    void getParticlesInGroup(int index, std::vector<int>& particles) {
+        particles = siteParticles[index];
+    }
+    bool areGroupsIdentical(int group1, int group2) {
+        if (siteTypes[group1] != siteTypes[group2])
+            return false;
+        int numParticles = siteWeights[group1].size();
+        if (siteWeights[group2].size() != numParticles)
+            return false;
+        for (int i = 0; i < numParticles; i++)
+            if (siteWeights[group1][i] != siteWeights[group2][i])
+                return false;
+        return true;
+    }
+private:
+    vector<const type_info*> siteTypes;
+    vector<vector<int> > siteParticles;
+    vector<vector<double> > siteWeights;
+};
+
+void ComputeContext::findMoleculeGroups() {
+    // The first time this is called, we need to identify all the molecules in the system.
+
+    if (moleculeGroups.size() == 0) {
+        // Add a ForceInfo that makes sure reordering doesn't break virtual sites.
+
+        addForce(new VirtualSiteInfo(system));
+
+        // First make a list of every other atom to which each atom is connect by a constraint or force group.
+
+        vector<vector<int> > atomBonds(system.getNumParticles());
+        for (int i = 0; i < system.getNumConstraints(); i++) {
+            int particle1, particle2;
+            double distance;
+            system.getConstraintParameters(i, particle1, particle2, distance);
+            atomBonds[particle1].push_back(particle2);
+            atomBonds[particle2].push_back(particle1);
+        }
+        for (auto force : forces) {
+            for (int j = 0; j < force->getNumParticleGroups(); j++) {
+                vector<int> particles;
+                force->getParticlesInGroup(j, particles);
+                for (int k = 0; k < (int) particles.size(); k++)
+                    for (int m = 0; m < (int) particles.size(); m++)
+                        if (k != m)
+                            atomBonds[particles[k]].push_back(particles[m]);
+            }
+        }
+
+        // Now identify atoms by which molecule they belong to.
+
+        vector<vector<int> > atomIndices = ContextImpl::findMolecules(numAtoms, atomBonds);
+        int numMolecules = atomIndices.size();
+        vector<int> atomMolecule(numAtoms);
+        for (int i = 0; i < (int) atomIndices.size(); i++)
+            for (int j = 0; j < (int) atomIndices[i].size(); j++)
+                atomMolecule[atomIndices[i][j]] = i;
+
+        // Construct a description of each molecule.
+
+        molecules.resize(numMolecules);
+        for (int i = 0; i < numMolecules; i++) {
+            molecules[i].atoms = atomIndices[i];
+            molecules[i].groups.resize(forces.size());
+        }
+        for (int i = 0; i < system.getNumConstraints(); i++) {
+            int particle1, particle2;
+            double distance;
+            system.getConstraintParameters(i, particle1, particle2, distance);
+            molecules[atomMolecule[particle1]].constraints.push_back(i);
+        }
+        for (int i = 0; i < (int) forces.size(); i++)
+            for (int j = 0; j < forces[i]->getNumParticleGroups(); j++) {
+                vector<int> particles;
+                forces[i]->getParticlesInGroup(j, particles);
+                if (particles.size() > 0)
+                    molecules[atomMolecule[particles[0]]].groups[i].push_back(j);
+            }
+    }
+
+    // Sort them into groups of identical molecules.
+
+    vector<Molecule> uniqueMolecules;
+    vector<vector<int> > moleculeInstances;
+    vector<vector<int> > moleculeOffsets;
+    for (int molIndex = 0; molIndex < (int) molecules.size(); molIndex++) {
+        Molecule& mol = molecules[molIndex];
+
+        // See if it is identical to another molecule.
+
+        bool isNew = true;
+        for (int j = 0; j < (int) uniqueMolecules.size() && isNew; j++) {
+            Molecule& mol2 = uniqueMolecules[j];
+            bool identical = (mol.atoms.size() == mol2.atoms.size() && mol.constraints.size() == mol2.constraints.size());
+
+            // See if the atoms are identical.
+
+            int atomOffset = mol2.atoms[0]-mol.atoms[0];
+            for (int i = 0; i < (int) mol.atoms.size() && identical; i++) {
+                if (mol.atoms[i] != mol2.atoms[i]-atomOffset || system.getParticleMass(mol.atoms[i]) != system.getParticleMass(mol2.atoms[i]))
+                    identical = false;
+                for (int k = 0; k < (int) forces.size(); k++)
+                    if (!forces[k]->areParticlesIdentical(mol.atoms[i], mol2.atoms[i]))
+                        identical = false;
+            }
+
+            // See if the constraints are identical.
+
+            for (int i = 0; i < (int) mol.constraints.size() && identical; i++) {
+                int c1particle1, c1particle2, c2particle1, c2particle2;
+                double distance1, distance2;
+                system.getConstraintParameters(mol.constraints[i], c1particle1, c1particle2, distance1);
+                system.getConstraintParameters(mol2.constraints[i], c2particle1, c2particle2, distance2);
+                if (c1particle1 != c2particle1-atomOffset || c1particle2 != c2particle2-atomOffset || distance1 != distance2)
+                    identical = false;
+            }
+
+            // See if the force groups are identical.
+
+            for (int i = 0; i < (int) forces.size() && identical; i++) {
+                if (mol.groups[i].size() != mol2.groups[i].size())
+                    identical = false;
+                for (int k = 0; k < (int) mol.groups[i].size() && identical; k++) {
+                    if (!forces[i]->areGroupsIdentical(mol.groups[i][k], mol2.groups[i][k]))
+                        identical = false;
+                    vector<int> p1, p2;
+                    forces[i]->getParticlesInGroup(mol.groups[i][k], p1);
+                    forces[i]->getParticlesInGroup(mol2.groups[i][k], p2);
+                    for (int m = 0; m < p1.size(); m++)
+                        if (p1[m] != p2[m]-atomOffset)
+                            identical = false;
+                }
+            }
+            if (identical) {
+                moleculeInstances[j].push_back(molIndex);
+                moleculeOffsets[j].push_back(mol.atoms[0]);
+                isNew = false;
+            }
+        }
+        if (isNew) {
+            uniqueMolecules.push_back(mol);
+            moleculeInstances.push_back(vector<int>());
+            moleculeInstances[moleculeInstances.size()-1].push_back(molIndex);
+            moleculeOffsets.push_back(vector<int>());
+            moleculeOffsets[moleculeOffsets.size()-1].push_back(mol.atoms[0]);
+        }
+    }
+    moleculeGroups.resize(moleculeInstances.size());
+    for (int i = 0; i < (int) moleculeInstances.size(); i++)
+    {
+        moleculeGroups[i].instances = moleculeInstances[i];
+        moleculeGroups[i].offsets = moleculeOffsets[i];
+        vector<int>& atoms = uniqueMolecules[i].atoms;
+        moleculeGroups[i].atoms.resize(atoms.size());
+        for (int j = 0; j < (int) atoms.size(); j++)
+            moleculeGroups[i].atoms[j] = atoms[j]-atoms[0];
+    }
+}
+
+void ComputeContext::invalidateMolecules() {
+    for (int i = 0; i < forces.size(); i++)
+        if (invalidateMolecules(forces[i]))
+            return;
+}
+
+bool ComputeContext::invalidateMolecules(ComputeForceInfo* force) {
+    if (numAtoms == 0 || !getNonbondedUtilities().getUseCutoff())
+        return false;
+    bool valid = true;
+    int forceIndex = -1;
+    for (int i = 0; i < forces.size(); i++)
+        if (forces[i] == force)
+            forceIndex = i;
+    getThreadPool().execute([&] (ThreadPool& threads, int threadIndex) {
+        for (int group = 0; valid && group < (int) moleculeGroups.size(); group++) {
+            MoleculeGroup& mol = moleculeGroups[group];
+            vector<int>& instances = mol.instances;
+            vector<int>& offsets = mol.offsets;
+            vector<int>& atoms = mol.atoms;
+            int numMolecules = instances.size();
+            Molecule& m1 = molecules[instances[0]];
+            int offset1 = offsets[0];
+            int numThreads = threads.getNumThreads();
+            int start = max(1, threadIndex*numMolecules/numThreads);
+            int end = (threadIndex+1)*numMolecules/numThreads;
+            for (int j = start; j < end; j++) {
+                // See if the atoms are identical.
+
+                Molecule& m2 = molecules[instances[j]];
+                int offset2 = offsets[j];
+                for (int i = 0; i < (int) atoms.size() && valid; i++) {
+                    if (!force->areParticlesIdentical(atoms[i]+offset1, atoms[i]+offset2))
+                        valid = false;
+                }
+
+                // See if the force groups are identical.
+
+                if (valid && forceIndex > -1) {
+                    for (int k = 0; k < (int) m1.groups[forceIndex].size() && valid; k++)
+                        if (!force->areGroupsIdentical(m1.groups[forceIndex][k], m2.groups[forceIndex][k]))
+                            valid = false;
+                }
+            }
+        }
+    });
+    getThreadPool().waitForThreads();
+    if (valid)
+        return false;
+
+    // The list of which molecules are identical is no longer valid.  We need to restore the
+    // atoms to their original order, rebuild the list of identical molecules, and sort them
+    // again.
+
+    vector<mm_int4> newCellOffsets(numAtoms);
+    if (getUseDoublePrecision()) {
+        vector<mm_double4> oldPosq(paddedNumAtoms);
+        vector<mm_double4> newPosq(paddedNumAtoms, mm_double4(0,0,0,0));
+        vector<mm_double4> oldVelm(paddedNumAtoms);
+        vector<mm_double4> newVelm(paddedNumAtoms, mm_double4(0,0,0,0));
+        getPosq().download(oldPosq);
+        getVelm().download(oldVelm);
+        for (int i = 0; i < numAtoms; i++) {
+            int index = atomIndex[i];
+            newPosq[index] = oldPosq[i];
+            newVelm[index] = oldVelm[i];
+            newCellOffsets[index] = posCellOffsets[i];
+        }
+        getPosq().upload(newPosq);
+        getVelm().upload(newVelm);
+    }
+    else if (getUseMixedPrecision()) {
+        vector<mm_float4> oldPosq(paddedNumAtoms);
+        vector<mm_float4> newPosq(paddedNumAtoms, mm_float4(0,0,0,0));
+        vector<mm_float4> oldPosqCorrection(paddedNumAtoms);
+        vector<mm_float4> newPosqCorrection(paddedNumAtoms, mm_float4(0,0,0,0));
+        vector<mm_double4> oldVelm(paddedNumAtoms);
+        vector<mm_double4> newVelm(paddedNumAtoms, mm_double4(0,0,0,0));
+        getPosq().download(oldPosq);
+        getVelm().download(oldVelm);
+        for (int i = 0; i < numAtoms; i++) {
+            int index = atomIndex[i];
+            newPosq[index] = oldPosq[i];
+            newPosqCorrection[index] = oldPosqCorrection[i];
+            newVelm[index] = oldVelm[i];
+            newCellOffsets[index] = posCellOffsets[i];
+        }
+        getPosq().upload(newPosq);
+        getPosqCorrection().upload(newPosqCorrection);
+        getVelm().upload(newVelm);
+    }
+    else {
+        vector<mm_float4> oldPosq(paddedNumAtoms);
+        vector<mm_float4> newPosq(paddedNumAtoms, mm_float4(0,0,0,0));
+        vector<mm_float4> oldVelm(paddedNumAtoms);
+        vector<mm_float4> newVelm(paddedNumAtoms, mm_float4(0,0,0,0));
+        getPosq().download(oldPosq);
+        getVelm().download(oldVelm);
+        for (int i = 0; i < numAtoms; i++) {
+            int index = atomIndex[i];
+            newPosq[index] = oldPosq[i];
+            newVelm[index] = oldVelm[i];
+            newCellOffsets[index] = posCellOffsets[i];
+        }
+        getPosq().upload(newPosq);
+        getVelm().upload(newVelm);
+    }
+    for (int i = 0; i < numAtoms; i++) {
+        atomIndex[i] = i;
+        posCellOffsets[i] = newCellOffsets[i];
+    }
+    getAtomIndexArray().upload(atomIndex);
+    findMoleculeGroups();
+    for (auto listener : reorderListeners)
+        listener->execute();
+    reorderAtoms();
+    return true;
+}
+
+void ComputeContext::reorderAtoms() {
+    atomsWereReordered = false;
+    if (numAtoms == 0 || !getNonbondedUtilities().getUseCutoff() || stepsSinceReorder < 250) {
+        stepsSinceReorder++;
+        return;
+    }
+    atomsWereReordered = true;
+    stepsSinceReorder = 0;
+    if (getUseDoublePrecision())
+        reorderAtomsImpl<double, mm_double4, double, mm_double4>();
+    else if (getUseMixedPrecision())
+        reorderAtomsImpl<float, mm_float4, double, mm_double4>();
+    else
+        reorderAtomsImpl<float, mm_float4, float, mm_float4>();
+}
+
+template <class Real, class Real4, class Mixed, class Mixed4>
+void ComputeContext::reorderAtomsImpl() {
+
+    // Find the range of positions and the number of bins along each axis.
+
+    vector<Real4> oldPosq(paddedNumAtoms);
+    vector<Real4> oldPosqCorrection(paddedNumAtoms);
+    vector<Mixed4> oldVelm(paddedNumAtoms);
+    getPosq().download(oldPosq);
+    getVelm().download(oldVelm);
+    if (getUseMixedPrecision())
+        getPosqCorrection().download(oldPosqCorrection);
+    Real minx = oldPosq[0].x, maxx = oldPosq[0].x;
+    Real miny = oldPosq[0].y, maxy = oldPosq[0].y;
+    Real minz = oldPosq[0].z, maxz = oldPosq[0].z;
+    Vec3 periodicBoxX, periodicBoxY, periodicBoxZ;
+    getPeriodicBoxVectors(periodicBoxX, periodicBoxY, periodicBoxZ);
+    Vec3 invPeriodicBoxSize(1.0/periodicBoxX[0], 1.0/periodicBoxY[1], 1.0/periodicBoxZ[2]);
+    if (getNonbondedUtilities().getUsePeriodic()) {
+        minx = miny = minz = 0.0;
+        maxx = periodicBoxX[0];
+        maxy = periodicBoxY[1];
+        maxz = periodicBoxZ[2];
+    }
+    else {
+        for (int i = 1; i < numAtoms; i++) {
+            const Real4& pos = oldPosq[i];
+            minx = min(minx, pos.x);
+            maxx = max(maxx, pos.x);
+            miny = min(miny, pos.y);
+            maxy = max(maxy, pos.y);
+            minz = min(minz, pos.z);
+            maxz = max(maxz, pos.z);
+        }
+    }
+
+    // Loop over each group of identical molecules and reorder them.
+
+    
+    vector<int> originalIndex(numAtoms);
+    vector<Real4> newPosq(paddedNumAtoms, Real4(0,0,0,0));
+    vector<Real4> newPosqCorrection(paddedNumAtoms, Real4(0,0,0,0));
+    vector<Mixed4> newVelm(paddedNumAtoms, Mixed4(0,0,0,0));
+    vector<mm_int4> newCellOffsets(numAtoms);
+    for (auto& mol : moleculeGroups) {
+        // Find the center of each molecule.
+
+        int numMolecules = mol.offsets.size();
+        vector<int>& atoms = mol.atoms;
+        vector<Real4> molPos(numMolecules);
+        Real invNumAtoms = (Real) (1.0/atoms.size());
+        for (int i = 0; i < numMolecules; i++) {
+            molPos[i].x = 0.0f;
+            molPos[i].y = 0.0f;
+            molPos[i].z = 0.0f;
+            for (int j = 0; j < (int)atoms.size(); j++) {
+                int atom = atoms[j]+mol.offsets[i];
+                const Real4& pos = oldPosq[atom];
+                molPos[i].x += pos.x;
+                molPos[i].y += pos.y;
+                molPos[i].z += pos.z;
+            }
+            molPos[i].x *= invNumAtoms;
+            molPos[i].y *= invNumAtoms;
+            molPos[i].z *= invNumAtoms;
+            if (molPos[i].x != molPos[i].x)
+                throw OpenMMException("Particle coordinate is nan");
+        }
+        if (getNonbondedUtilities().getUsePeriodic()) {
+            // Move each molecule position into the same box.
+
+            for (int i = 0; i < numMolecules; i++) {
+                Real4 center = molPos[i];
+                int zcell = (int) floor(center.z*invPeriodicBoxSize[2]);
+                center.x -= zcell*periodicBoxZ[0];
+                center.y -= zcell*periodicBoxZ[1];
+                center.z -= zcell*periodicBoxZ[2];
+                int ycell = (int) floor(center.y*invPeriodicBoxSize[1]);
+                center.x -= ycell*periodicBoxY[0];
+                center.y -= ycell*periodicBoxY[1];
+                int xcell = (int) floor(center.x*invPeriodicBoxSize[0]);
+                center.x -= xcell*periodicBoxX[0];
+                if (xcell != 0 || ycell != 0 || zcell != 0) {
+                    Real dx = molPos[i].x-center.x;
+                    Real dy = molPos[i].y-center.y;
+                    Real dz = molPos[i].z-center.z;
+                    molPos[i] = center;
+                    for (int j = 0; j < (int) atoms.size(); j++) {
+                        int atom = atoms[j]+mol.offsets[i];
+                        Real4 p = oldPosq[atom];
+                        p.x -= dx;
+                        p.y -= dy;
+                        p.z -= dz;
+                        oldPosq[atom] = p;
+                        posCellOffsets[atom].x -= xcell;
+                        posCellOffsets[atom].y -= ycell;
+                        posCellOffsets[atom].z -= zcell;
+                    }
+                }
+            }
+        }
+
+        // Select a bin for each molecule, then sort them by bin.
+
+        bool useHilbert = (numMolecules > 5000 || atoms.size() > 8); // For small systems, a simple zigzag curve works better than a Hilbert curve.
+        Real binWidth;
+        if (useHilbert)
+            binWidth = (Real) (max(max(maxx-minx, maxy-miny), maxz-minz)/255.0);
+        else
+            binWidth = (Real) (0.2*getNonbondedUtilities().getMaxCutoffDistance());
+        Real invBinWidth = (Real) (1.0/binWidth);
+        int xbins = 1 + (int) ((maxx-minx)*invBinWidth);
+        int ybins = 1 + (int) ((maxy-miny)*invBinWidth);
+        vector<pair<int, int> > molBins(numMolecules);
+        bitmask_t coords[3];
+        for (int i = 0; i < numMolecules; i++) {
+            int x = (int) ((molPos[i].x-minx)*invBinWidth);
+            int y = (int) ((molPos[i].y-miny)*invBinWidth);
+            int z = (int) ((molPos[i].z-minz)*invBinWidth);
+            int bin;
+            if (useHilbert) {
+                coords[0] = x;
+                coords[1] = y;
+                coords[2] = z;
+                bin = (int) hilbert_c2i(3, 8, coords);
+            }
+            else {
+                int yodd = y&1;
+                int zodd = z&1;
+                bin = z*xbins*ybins;
+                bin += (zodd ? ybins-y : y)*xbins;
+                bin += (yodd ? xbins-x : x);
+            }
+            molBins[i] = pair<int, int>(bin, i);
+        }
+        sort(molBins.begin(), molBins.end());
+
+        // Reorder the atoms.
+
+        for (int i = 0; i < numMolecules; i++) {
+            for (int atom : atoms) {
+                int oldIndex = mol.offsets[molBins[i].second]+atom;
+                int newIndex = mol.offsets[i]+atom;
+                originalIndex[newIndex] = atomIndex[oldIndex];
+                newPosq[newIndex] = oldPosq[oldIndex];
+                if (getUseMixedPrecision())
+                    newPosqCorrection[newIndex] = oldPosqCorrection[oldIndex];
+                newVelm[newIndex] = oldVelm[oldIndex];
+                newCellOffsets[newIndex] = posCellOffsets[oldIndex];
+            }
+        }
+    }
+
+    // Update the arrays.
+
+    for (int i = 0; i < numAtoms; i++) {
+        atomIndex[i] = originalIndex[i];
+        posCellOffsets[i] = newCellOffsets[i];
+    }
+    getPosq().upload(newPosq);
+    if (getUseMixedPrecision())
+        getPosqCorrection().upload(newPosqCorrection);
+    getVelm().upload(newVelm);
+    getAtomIndexArray().upload(atomIndex);
+    for (auto listener : reorderListeners)
+        listener->execute();
+}
+
+void ComputeContext::addReorderListener(ReorderListener* listener) {
+    reorderListeners.push_back(listener);
+}
+
+void ComputeContext::addPreComputation(ForcePreComputation* computation) {
+    preComputations.push_back(computation);
+}
+
+void ComputeContext::addPostComputation(ForcePostComputation* computation) {
+    postComputations.push_back(computation);
+}
+
+struct ComputeContext::WorkThread::ThreadData {
+    ThreadData(std::queue<ComputeContext::WorkTask*>& tasks, bool& waiting,  bool& finished,
+            pthread_mutex_t& queueLock, pthread_cond_t& waitForTaskCondition, pthread_cond_t& queueEmptyCondition) :
+        tasks(tasks), waiting(waiting), finished(finished), queueLock(queueLock),
+        waitForTaskCondition(waitForTaskCondition), queueEmptyCondition(queueEmptyCondition) {
+    }
+    std::queue<ComputeContext::WorkTask*>& tasks;
+    bool& waiting;
+    bool& finished;
+    pthread_mutex_t& queueLock;
+    pthread_cond_t& waitForTaskCondition;
+    pthread_cond_t& queueEmptyCondition;
+};
+
+static void* threadBody(void* args) {
+    ComputeContext::WorkThread::ThreadData& data = *reinterpret_cast<ComputeContext::WorkThread::ThreadData*>(args);
+    while (!data.finished || data.tasks.size() > 0) {
+        pthread_mutex_lock(&data.queueLock);
+        while (data.tasks.empty() && !data.finished) {
+            data.waiting = true;
+            pthread_cond_signal(&data.queueEmptyCondition);
+            pthread_cond_wait(&data.waitForTaskCondition, &data.queueLock);
+        }
+        ComputeContext::WorkTask* task = NULL;
+        if (!data.tasks.empty()) {
+            data.waiting = false;
+            task = data.tasks.front();
+            data.tasks.pop();
+        }
+        pthread_mutex_unlock(&data.queueLock);
+        if (task != NULL) {
+            task->execute();
+            delete task;
+        }
+    }
+    data.waiting = true;
+    pthread_cond_signal(&data.queueEmptyCondition);
+    delete &data;
+    return 0;
+}
+
+ComputeContext::WorkThread::WorkThread() : waiting(true), finished(false) {
+    pthread_mutex_init(&queueLock, NULL);
+    pthread_cond_init(&waitForTaskCondition, NULL);
+    pthread_cond_init(&queueEmptyCondition, NULL);
+    ThreadData* data = new ThreadData(tasks, waiting, finished, queueLock, waitForTaskCondition, queueEmptyCondition);
+    pthread_create(&thread, NULL, threadBody, data);
+}
+
+ComputeContext::WorkThread::~WorkThread() {
+    pthread_mutex_lock(&queueLock);
+    finished = true;
+    pthread_cond_broadcast(&waitForTaskCondition);
+    pthread_mutex_unlock(&queueLock);
+    pthread_join(thread, NULL);
+    pthread_mutex_destroy(&queueLock);
+    pthread_cond_destroy(&waitForTaskCondition);
+    pthread_cond_destroy(&queueEmptyCondition);
+}
+
+void ComputeContext::WorkThread::addTask(ComputeContext::WorkTask* task) {
+    pthread_mutex_lock(&queueLock);
+    tasks.push(task);
+    waiting = false;
+    pthread_cond_signal(&waitForTaskCondition);
+    pthread_mutex_unlock(&queueLock);
+}
+
+bool ComputeContext::WorkThread::isWaiting() {
+    return waiting;
+}
+
+bool ComputeContext::WorkThread::isFinished() {
+    return finished;
+}
+
+void ComputeContext::WorkThread::flush() {
+    pthread_mutex_lock(&queueLock);
+    while (!waiting)
+       pthread_cond_wait(&queueEmptyCondition, &queueLock);
+    pthread_mutex_unlock(&queueLock);
+}
--- a/platforms/opencl/src/OpenCLForceInfo.cpp
+++ b/platforms/opencl/src/OpenCLForceInfo.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Portions copyright (c) 2012-2019 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -24,23 +24,23 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

-#include "OpenCLForceInfo.h"
+#include "openmm/common/ComputeForceInfo.h"

 using namespace OpenMM;
 using namespace std;

-bool OpenCLForceInfo::areParticlesIdentical(int particle1, int particle2) {
+bool ComputeForceInfo::areParticlesIdentical(int particle1, int particle2) {
    return true;
 }

-int OpenCLForceInfo::getNumParticleGroups() {
+int ComputeForceInfo::getNumParticleGroups() {
    return 0;
 }

-void OpenCLForceInfo::getParticlesInGroup(int index, vector<int>& particles) {
+void ComputeForceInfo::getParticlesInGroup(int index, vector<int>& particles) {
    return;
 }

-bool OpenCLForceInfo::areGroupsIdentical(int group1, int group2) {
+bool ComputeForceInfo::areGroupsIdentical(int group1, int group2) {
    return true;
 }
--- a/platforms/common/src/ComputeParameterSet.cpp
+++ b/platforms/common/src/ComputeParameterSet.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/common/ComputeParameterSet.h"
+#include "openmm/OpenMMException.h"
+#include <cmath>
+#include <sstream>
+
+using namespace OpenMM;
+using namespace std;
+
+ComputeParameterSet::ComputeParameterSet(ComputeContext& context, int numParameters, int numObjects, const string& name, bool arrayPerParameter, bool useDoublePrecision) :
+            context(context), numParameters(numParameters), numObjects(numObjects), name(name) {
+    int params = numParameters;
+    int bufferCount = 0;
+    elementSize = (useDoublePrecision ? sizeof(double) : sizeof(float));
+    string elementType = (useDoublePrecision ? "double" : "float");
+    if (!arrayPerParameter) {
+        while (params > 2) {
+            std::stringstream name;
+            name << "param" << (++bufferCount);
+            arrays.push_back(context.createArray());
+            arrays.back()->initialize(context, numObjects, elementSize*4, name.str());
+            params -= 4;
+        }
+        if (params > 1) {
+            std::stringstream name;
+            name << "param" << (++bufferCount);
+            arrays.push_back(context.createArray());
+            arrays.back()->initialize(context, numObjects, elementSize*2, name.str());
+            params -= 2;
+        }
+    }
+    while (params > 0) {
+        std::stringstream name;
+        name << "param" << (++bufferCount);
+            arrays.push_back(context.createArray());
+        arrays.back()->initialize(context, numObjects, elementSize, name.str());
+        params--;
+    }
+    for (ArrayInterface* array : arrays)
+        parameters.push_back(ComputeParameterInfo(*array, array->getName(), elementType, array->getElementSize()/elementSize));
+}
+
+ComputeParameterSet::~ComputeParameterSet() {
+    for (ArrayInterface* array : arrays)
+        delete array;
+}
+
+template <class T>
+void ComputeParameterSet::getParameterValues(vector<vector<T> >& values) {
+    if (sizeof(T) != elementSize)
+        throw OpenMMException("Called getParameterValues() with vector of wrong type");
+    values.resize(numObjects);
+    for (int i = 0; i < numObjects; i++)
+        values[i].resize(numParameters);
+    int base = 0;
+    for (int i = 0; i < (int) arrays.size(); i++) {
+        if (arrays[i]->getElementSize() == 4*elementSize) {
+            vector<T> data(4*numObjects);
+            arrays[i]->download(data.data());
+            for (int j = 0; j < numObjects; j++) {
+                values[j][base] = data[4*j];
+                if (base+1 < numParameters)
+                    values[j][base+1] = data[4*j+1];
+                if (base+2 < numParameters)
+                    values[j][base+2] = data[4*j+2];
+                if (base+3 < numParameters)
+                    values[j][base+3] = data[4*j+3];
+            }
+            base += 4;
+        }
+        else if (arrays[i]->getElementSize() == 2*elementSize) {
+            vector<T> data(2*numObjects);
+            arrays[i]->download(data.data());
+            for (int j = 0; j < numObjects; j++) {
+                values[j][base] = data[2*j];
+                if (base+1 < numParameters)
+                    values[j][base+1] = data[2*j+1];
+            }
+            base += 2;
+        }
+        else if (arrays[i]->getElementSize() == elementSize) {
+            vector<T> data(numObjects);
+            arrays[i]->download(data.data());
+            for (int j = 0; j < numObjects; j++)
+                values[j][base] = data[j];
+            base++;
+        }
+        else
+            throw OpenMMException("Internal error: Unknown buffer type in ComputeParameterSet");
+    }
+}
+
+template <class T>
+void ComputeParameterSet::setParameterValues(const vector<vector<T> >& values) {
+    if (sizeof(T) != elementSize)
+        throw OpenMMException("Called setParameterValues() with vector of wrong type");
+    int base = 0;
+    for (int i = 0; i < (int) arrays.size(); i++) {
+        if (arrays[i]->getElementSize() == 4*elementSize) {
+            vector<T> data(4*numObjects);
+            for (int j = 0; j < numObjects; j++) {
+                data[4*j] = values[j][base];
+                if (base+1 < numParameters)
+                    data[4*j+1] = values[j][base+1];
+                if (base+2 < numParameters)
+                    data[4*j+2] = values[j][base+2];
+                if (base+3 < numParameters)
+                    data[4*j+3] = values[j][base+3];
+            }
+            arrays[i]->upload(data.data());
+            base += 4;
+        }
+        else if (arrays[i]->getElementSize() == 2*elementSize) {
+            vector<T> data(2*numObjects);
+            for (int j = 0; j < numObjects; j++) {
+                data[2*j] = values[j][base];
+                if (base+1 < numParameters)
+                    data[2*j+1] = values[j][base+1];
+            }
+            arrays[i]->upload(data.data());
+            base += 2;
+        }
+        else if (arrays[i]->getElementSize() == elementSize) {
+            vector<T> data(numObjects);
+            for (int j = 0; j < numObjects; j++)
+                data[j] = values[j][base];
+            arrays[i]->upload(data.data());
+            base++;
+        }
+        else
+            throw OpenMMException("Internal error: Unknown buffer type in ComputeParameterSet");
+    }
+}
+
+string ComputeParameterSet::getParameterSuffix(int index, const std::string& extraSuffix) const {
+    const string suffixes[] = {".x", ".y", ".z", ".w"};
+    int buffer = -1;
+    for (int i = 0; buffer == -1 && i < (int) parameters.size(); i++) {
+        if (index*elementSize < parameters[i].getSize())
+            buffer = i;
+        else
+            index -= parameters[i].getSize()/elementSize;
+    }
+    if (buffer == -1)
+        throw OpenMMException("Internal error: Illegal argument to ComputeParameterSet::getParameterSuffix() ("+name+")");
+    stringstream suffix;
+    suffix << (buffer+1) << extraSuffix;
+    if (parameters[buffer].getSize() != elementSize)
+        suffix << suffixes[index];
+    return suffix.str();
+}
+
+/**
+ * Define template instantiations for float and double versions of getParameterValues() and setParameterValues().
+ */
+namespace OpenMM {
+template void ComputeParameterSet::getParameterValues<float>(vector<vector<float> >& values);
+template void ComputeParameterSet::setParameterValues<float>(const vector<vector<float> >& values);
+template void ComputeParameterSet::getParameterValues<double>(vector<vector<double> >& values);
+template void ComputeParameterSet::setParameterValues<double>(const vector<vector<double> >& values);
+}
\ No newline at end of file
--- a/platforms/cuda/src/CudaExpressionUtilities.cpp
+++ b/platforms/cuda/src/CudaExpressionUtilities.cpp
@@ -24,7 +24,7 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */

-#include "CudaExpressionUtilities.h"
+#include "openmm/common/ExpressionUtilities.h"
 #include "openmm/OpenMMException.h"
 #include "openmm/internal/SplineFitter.h"
 #include "lepton/Operation.h"
@@ -33,10 +33,10 @@ using namespace OpenMM;
 using namespace Lepton;
 using namespace std;

-CudaExpressionUtilities::CudaExpressionUtilities(CudaContext& context) : context(context), fp1(1), fp2(2), fp3(3), periodicDistance(6) {
+ExpressionUtilities::ExpressionUtilities(ComputeContext& context) : context(context), fp1(1), fp2(2), fp3(3), periodicDistance(6) {
 }

-string CudaExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const map<string, string>& variables,
+string ExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const map<string, string>& variables,
        const vector<const TabulatedFunction*>& functions, const vector<pair<string, string> >& functionNames, const string& prefix, const string& tempType) {
    vector<pair<ExpressionTreeNode, string> > variableNodes;
    for (map<string, string>::const_iterator iter = variables.begin(); iter != variables.end(); ++iter)
@@ -44,7 +44,7 @@ string CudaExpressionUtilities::createExpressions(const map<string, ParsedExpres
    return createExpressions(expressions, variableNodes, functions, functionNames, prefix, tempType);
 }

-string CudaExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const vector<pair<ExpressionTreeNode, string> >& variables,
+string ExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const vector<pair<ExpressionTreeNode, string> >& variables,
        const vector<const TabulatedFunction*>& functions, const vector<pair<string, string> >& functionNames, const string& prefix, const string& tempType) {
    stringstream out;
    vector<ParsedExpression> allExpressions;
@@ -59,7 +59,7 @@ string CudaExpressionUtilities::createExpressions(const map<string, ParsedExpres
    return out.str();
 }

-void CudaExpressionUtilities::processExpression(stringstream& out, const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, string> >& temps,
+void ExpressionUtilities::processExpression(stringstream& out, const ExpressionTreeNode& node, vector<pair<ExpressionTreeNode, string> >& temps,
        const vector<const TabulatedFunction*>& functions, const vector<pair<string, string> >& functionNames, const string& prefix, const vector<vector<double> >& functionParams,
        const vector<ParsedExpression>& allExpressions, const string& tempType) {
    for (int i = 0; i < (int) temps.size(); i++)
@@ -662,7 +662,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
        temps.push_back(make_pair(node, name));
 }

-string CudaExpressionUtilities::getTempName(const ExpressionTreeNode& node, const vector<pair<ExpressionTreeNode, string> >& temps) {
+string ExpressionUtilities::getTempName(const ExpressionTreeNode& node, const vector<pair<ExpressionTreeNode, string> >& temps) {
    for (int i = 0; i < (int) temps.size(); i++)
        if (temps[i].first == node)
            return temps[i].second;
@@ -671,7 +671,7 @@ string CudaExpressionUtilities::getTempName(const ExpressionTreeNode& node, cons
    throw OpenMMException(out.str());
 }

-void CudaExpressionUtilities::findRelatedCustomFunctions(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode,
+void ExpressionUtilities::findRelatedCustomFunctions(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode,
            vector<const Lepton::ExpressionTreeNode*>& nodes) {
    if (searchNode.getOperation().getId() == Operation::CUSTOM && node.getOperation().getName() == searchNode.getOperation().getName()) {
        // Make sure the arguments are identical.
@@ -695,7 +695,7 @@ void CudaExpressionUtilities::findRelatedCustomFunctions(const ExpressionTreeNod
            findRelatedCustomFunctions(node, searchNode.getChildren()[i], nodes);
 }

-void CudaExpressionUtilities::findRelatedPowers(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode, map<int, const ExpressionTreeNode*>& powers) {
+void ExpressionUtilities::findRelatedPowers(const ExpressionTreeNode& node, const ExpressionTreeNode& searchNode, map<int, const ExpressionTreeNode*>& powers) {
    if (searchNode.getOperation().getId() == Operation::POWER_CONSTANT && node.getChildren()[0] == searchNode.getChildren()[0]) {
        double realPower = dynamic_cast<const Operation::PowerConstant*>(&searchNode.getOperation())->getValue();
        int power = (int) realPower;
@@ -712,7 +712,7 @@ void CudaExpressionUtilities::findRelatedPowers(const ExpressionTreeNode& node,
            findRelatedPowers(node, searchNode.getChildren()[i], powers);
 }

-vector<float> CudaExpressionUtilities::computeFunctionCoefficients(const TabulatedFunction& function, int& width) {
+vector<float> ExpressionUtilities::computeFunctionCoefficients(const TabulatedFunction& function, int& width) {
    if (dynamic_cast<const Continuous1DFunction*>(&function) != NULL) {
        // Compute the spline coefficients.

@@ -827,7 +827,7 @@ vector<float> CudaExpressionUtilities::computeFunctionCoefficients(const Tabulat
    throw OpenMMException("computeFunctionCoefficients: Unknown function type");
 }

-vector<vector<double> > CudaExpressionUtilities::computeFunctionParameters(const vector<const TabulatedFunction*>& functions) {
+vector<vector<double> > ExpressionUtilities::computeFunctionParameters(const vector<const TabulatedFunction*>& functions) {
    vector<vector<double> > params(functions.size());
    for (int i = 0; i < (int) functions.size(); i++) {
        if (dynamic_cast<const Continuous1DFunction*>(functions[i]) != NULL) {
@@ -903,7 +903,7 @@ vector<vector<double> > CudaExpressionUtilities::computeFunctionParameters(const
    return params;
 }

-Lepton::CustomFunction* CudaExpressionUtilities::getFunctionPlaceholder(const TabulatedFunction& function) {
+Lepton::CustomFunction* ExpressionUtilities::getFunctionPlaceholder(const TabulatedFunction& function) {
    if (dynamic_cast<const Continuous1DFunction*>(&function) != NULL)
        return &fp1;
    if (dynamic_cast<const Continuous2DFunction*>(&function) != NULL)
@@ -919,11 +919,11 @@ Lepton::CustomFunction* CudaExpressionUtilities::getFunctionPlaceholder(const Ta
    throw OpenMMException("getFunctionPlaceholder: Unknown function type");
 }

-Lepton::CustomFunction* CudaExpressionUtilities::getPeriodicDistancePlaceholder() {
+Lepton::CustomFunction* ExpressionUtilities::getPeriodicDistancePlaceholder() {
    return &periodicDistance;
 }

-void CudaExpressionUtilities::callFunction(stringstream& out, string singleFn, string doubleFn, const string& arg, const string& tempType) {
+void ExpressionUtilities::callFunction(stringstream& out, string singleFn, string doubleFn, const string& arg, const string& tempType) {
    bool isDouble = (tempType[0] == 'd');
    bool isVector = (tempType[tempType.size()-1] == '3');
    string fn = (isDouble ? doubleFn : singleFn);
@@ -933,7 +933,7 @@ void CudaExpressionUtilities::callFunction(stringstream& out, string singleFn, s
        out<<fn<<"("<<arg<<")";
 }

-void CudaExpressionUtilities::callFunction2(stringstream& out, string singleFn, string doubleFn, const string& arg1, const string& arg2, const string& tempType) {
+void ExpressionUtilities::callFunction2(stringstream& out, string singleFn, string doubleFn, const string& arg1, const string& arg2, const string& tempType) {
    bool isDouble = (tempType[0] == 'd');
    bool isVector = (tempType[tempType.size()-1] == '3');
    string fn = (isDouble ? doubleFn : singleFn);

--- a/platforms/common/src/IntegrationUtilities.cpp
+++ b/platforms/common/src/IntegrationUtilities.cpp
+/* -------------------------------------------------------------------------- *
+ *                                   OpenMM                                   *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009-2019 Stanford University and the Authors.      *
+ * Authors: Peter Eastman                                                     *
+ * Contributors:                                                              *
+ *                                                                            *
+ * This program is free software: you can redistribute it and/or modify       *
+ * it under the terms of the GNU Lesser General Public License as published   *
+ * by the Free Software Foundation, either version 3 of the License, or       *
+ * (at your option) any later version.                                        *
+ *                                                                            *
+ * This program is distributed in the hope that it will be useful,            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ * GNU Lesser General Public License for more details.                        *
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * -------------------------------------------------------------------------- */
+
+#include "openmm/common/IntegrationUtilities.h"
+#include "openmm/common/ComputeContext.h"
+#include "CommonKernelSources.h"
+#include "openmm/internal/OSRngSeed.h"
+#include "openmm/HarmonicAngleForce.h"
+#include "openmm/VirtualSite.h"
+#include "quern.h"
+#include "ReferenceCCMAAlgorithm.h"
+#include <algorithm>
+#include <cmath>
+#include <cstdlib>
+#include <map>
+
+using namespace OpenMM;
+using namespace std;
+
+struct IntegrationUtilities::ShakeCluster {
+    int centralID;
+    int peripheralID[3];
+    int size;
+    bool valid;
+    double distance;
+    double centralInvMass, peripheralInvMass;
+    ShakeCluster() : valid(true) {
+    }
+    ShakeCluster(int centralID, double invMass) : centralID(centralID), centralInvMass(invMass), size(0), valid(true) {
+    }
+    void addAtom(int id, double dist, double invMass) {
+        if (size == 3 || (size > 0 && abs(dist-distance)/distance > 1e-8) || (size > 0 && abs(invMass-peripheralInvMass)/peripheralInvMass > 1e-8))
+            valid = false;
+        else {
+            peripheralID[size++] = id;
+            distance = dist;
+            peripheralInvMass = invMass;
+        }
+    }
+    void markInvalid(map<int, ShakeCluster>& allClusters, vector<bool>& invalidForShake)
+    {
+        valid = false;
+        invalidForShake[centralID] = true;
+        for (int i = 0; i < size; i++) {
+            invalidForShake[peripheralID[i]] = true;
+            map<int, ShakeCluster>::iterator otherCluster = allClusters.find(peripheralID[i]);
+            if (otherCluster != allClusters.end() && otherCluster->second.valid)
+                otherCluster->second.markInvalid(allClusters, invalidForShake);
+        }
+    }
+};
+
+struct IntegrationUtilities::ConstraintOrderer : public binary_function<int, int, bool> {
+    const vector<int>& atom1;
+    const vector<int>& atom2;
+    const vector<int>& constraints;
+    ConstraintOrderer(const vector<int>& atom1, const vector<int>& atom2, const vector<int>& constraints) : atom1(atom1), atom2(atom2), constraints(constraints) {
+    }
+    bool operator()(int x, int y) {
+        int ix = constraints[x];
+        int iy = constraints[y];
+        if (atom1[ix] != atom1[iy])
+            return atom1[ix] < atom1[iy];
+        return atom2[ix] < atom2[iy];
+    }
+};
+
+IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System& system) : context(context),
+        randomPos(0), hasOverlappingVsites(false) {
+    // Create workspace arrays.
+
+    lastStepSize = mm_double2(0.0, 0.0);
+    if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) {
+        posDelta.initialize<mm_double4>(context, context.getPaddedNumAtoms(), "posDelta");
+        vector<mm_double4> deltas(posDelta.getSize(), mm_double4(0.0, 0.0, 0.0, 0.0));
+        posDelta.upload(deltas);
+        stepSize.initialize<mm_double2>(context, 1, "stepSize");
+        stepSize.upload(&lastStepSize);
+    }
+    else {
+        posDelta.initialize<mm_float4>(context, context.getPaddedNumAtoms(), "posDelta");
+        vector<mm_float4> deltas(posDelta.getSize(), mm_float4(0.0f, 0.0f, 0.0f, 0.0f));
+        posDelta.upload(deltas);
+        stepSize.initialize<mm_float2>(context, 1, "stepSize");
+        mm_float2 lastStepSizeFloat = mm_float2(0.0f, 0.0f);
+        stepSize.upload(&lastStepSizeFloat);
+    }
+
+    // Record the set of constraints and how many constraints each atom is involved in.
+
+    vector<int> atom1;
+    vector<int> atom2;
+    vector<double> distance;
+    vector<int> constraintCount(context.getNumAtoms(), 0);
+    for (int i = 0; i < system.getNumConstraints(); i++) {
+        int p1, p2;
+        double d;
+        system.getConstraintParameters(i, p1, p2, d);
+        if (system.getParticleMass(p1) != 0 || system.getParticleMass(p2) != 0) {
+            atom1.push_back(p1);
+            atom2.push_back(p2);
+            distance.push_back(d);
+            constraintCount[p1]++;
+            constraintCount[p2]++;
+        }
+    }
+
+    // Identify clusters of three atoms that can be treated with SETTLE.  First, for every
+    // atom that might be part of such a cluster, make a list of the two other atoms it is
+    // connected to.
+
+    int numAtoms = system.getNumParticles();
+    vector<map<int, float> > settleConstraints(numAtoms);
+    for (int i = 0; i < (int)atom1.size(); i++) {
+        if (constraintCount[atom1[i]] == 2 && constraintCount[atom2[i]] == 2) {
+            settleConstraints[atom1[i]][atom2[i]] = (float) distance[i];
+            settleConstraints[atom2[i]][atom1[i]] = (float) distance[i];
+        }
+    }
+
+    // Now remove the ones that don't actually form closed loops of three atoms.
+
+    vector<int> settleClusters;
+    for (int i = 0; i < (int)settleConstraints.size(); i++) {
+        if (settleConstraints[i].size() == 2) {
+            int partner1 = settleConstraints[i].begin()->first;
+            int partner2 = (++settleConstraints[i].begin())->first;
+            if (settleConstraints[partner1].size() != 2 || settleConstraints[partner2].size() != 2 ||
+                    settleConstraints[partner1].find(partner2) == settleConstraints[partner1].end())
+                settleConstraints[i].clear();
+            else if (i < partner1 && i < partner2)
+                settleClusters.push_back(i);
+        }
+        else
+            settleConstraints[i].clear();
+    }
+
+    // Record the SETTLE clusters.
+
+    vector<bool> isShakeAtom(numAtoms, false);
+    if (settleClusters.size() > 0) {
+        vector<mm_int4> atoms;
+        vector<mm_float2> params;
+        for (int i = 0; i < (int) settleClusters.size(); i++) {
+            int atom1 = settleClusters[i];
+            int atom2 = settleConstraints[atom1].begin()->first;
+            int atom3 = (++settleConstraints[atom1].begin())->first;
+            float dist12 = settleConstraints[atom1].find(atom2)->second;
+            float dist13 = settleConstraints[atom1].find(atom3)->second;
+            float dist23 = settleConstraints[atom2].find(atom3)->second;
+            if (dist12 == dist13) {
+                // atom1 is the central atom
+                atoms.push_back(mm_int4(atom1, atom2, atom3, 0));
+                params.push_back(mm_float2(dist12, dist23));
+            }
+            else if (dist12 == dist23) {
+                // atom2 is the central atom
+                atoms.push_back(mm_int4(atom2, atom1, atom3, 0));
+                params.push_back(mm_float2(dist12, dist13));
+            }
+            else if (dist13 == dist23) {
+                // atom3 is the central atom
+                atoms.push_back(mm_int4(atom3, atom1, atom2, 0));
+                params.push_back(mm_float2(dist13, dist12));
+            }
+            else
+                continue; // We can't handle this with SETTLE
+            isShakeAtom[atom1] = true;
+            isShakeAtom[atom2] = true;
+            isShakeAtom[atom3] = true;
+        }
+        if (atoms.size() > 0) {
+            settleAtoms.initialize<mm_int4>(context, atoms.size(), "settleAtoms");
+            settleParams.initialize<mm_float2>(context, params.size(), "settleParams");
+            settleAtoms.upload(atoms);
+            settleParams.upload(params);
+        }
+    }
+
+    // Find clusters consisting of a central atom with up to three peripheral atoms.
+
+    map<int, ShakeCluster> clusters;
+    vector<bool> invalidForShake(numAtoms, false);
+    for (int i = 0; i < (int) atom1.size(); i++) {
+        if (isShakeAtom[atom1[i]])
+            continue; // This is being taken care of with SETTLE.
+
+        // Determine which is the central atom.
+
+        bool firstIsCentral;
+        if (constraintCount[atom1[i]] > 1)
+            firstIsCentral = true;
+        else if (constraintCount[atom2[i]] > 1)
+            firstIsCentral = false;
+        else if (atom1[i] < atom2[i])
+            firstIsCentral = true;
+        else
+            firstIsCentral = false;
+        int centralID, peripheralID;
+        if (firstIsCentral) {
+            centralID = atom1[i];
+            peripheralID = atom2[i];
+        }
+        else {
+            centralID = atom2[i];
+            peripheralID = atom1[i];
+        }
+
+        // Add it to the cluster.
+
+        if (clusters.find(centralID) == clusters.end()) {
+            clusters[centralID] = ShakeCluster(centralID, 1.0/system.getParticleMass(centralID));
+        }
+        ShakeCluster& cluster = clusters[centralID];
+        cluster.addAtom(peripheralID, distance[i], 1.0/system.getParticleMass(peripheralID));
+        if (constraintCount[peripheralID] != 1 || invalidForShake[atom1[i]] || invalidForShake[atom2[i]]) {
+            cluster.markInvalid(clusters, invalidForShake);
+            map<int, ShakeCluster>::iterator otherCluster = clusters.find(peripheralID);
+            if (otherCluster != clusters.end() && otherCluster->second.valid)
+                otherCluster->second.markInvalid(clusters, invalidForShake);
+        }
+    }
+    int validShakeClusters = 0;
+    for (map<int, ShakeCluster>::iterator iter = clusters.begin(); iter != clusters.end(); ++iter) {
+        ShakeCluster& cluster = iter->second;
+        if (cluster.valid) {
+            cluster.valid = !invalidForShake[cluster.centralID] && cluster.size == constraintCount[cluster.centralID];
+            for (int i = 0; i < cluster.size; i++)
+                if (invalidForShake[cluster.peripheralID[i]])
+                    cluster.valid = false;
+            if (cluster.valid)
+                ++validShakeClusters;
+        }
+    }
+
+    // Record the SHAKE clusters.
+
+    if (validShakeClusters > 0) {
+        vector<mm_int4> atoms;
+        vector<mm_float4> params;
+        int index = 0;
+        for (map<int, ShakeCluster>::const_iterator iter = clusters.begin(); iter != clusters.end(); ++iter) {
+            const ShakeCluster& cluster = iter->second;
+            if (!cluster.valid)
+                continue;
+            atoms.push_back(mm_int4(cluster.centralID, cluster.peripheralID[0], (cluster.size > 1 ? cluster.peripheralID[1] : -1), (cluster.size > 2 ? cluster.peripheralID[2] : -1)));
+            params.push_back(mm_float4((float) cluster.centralInvMass, (float) (0.5/(cluster.centralInvMass+cluster.peripheralInvMass)), (float) (cluster.distance*cluster.distance), (float) cluster.peripheralInvMass));
+            isShakeAtom[cluster.centralID] = true;
+            isShakeAtom[cluster.peripheralID[0]] = true;
+            if (cluster.size > 1)
+                isShakeAtom[cluster.peripheralID[1]] = true;
+            if (cluster.size > 2)
+                isShakeAtom[cluster.peripheralID[2]] = true;
+            ++index;
+        }
+        shakeAtoms.initialize<mm_int4>(context, atoms.size(), "shakeAtoms");
+        shakeParams.initialize<mm_float4>(context, params.size(), "shakeParams");
+        shakeAtoms.upload(atoms);
+        shakeParams.upload(params);
+    }
+
+    // Find connected constraints for CCMA.
+
+    vector<int> ccmaConstraints;
+    for (unsigned i = 0; i < atom1.size(); i++)
+        if (!isShakeAtom[atom1[i]])
+            ccmaConstraints.push_back(i);
+
+    // Record the connections between constraints.
+
+    int numCCMA = (int) ccmaConstraints.size();
+    if (numCCMA > 0) {
+        // Record information needed by ReferenceCCMAAlgorithm.
+        
+        vector<pair<int, int> > refIndices(numCCMA);
+        vector<double> refDistance(numCCMA);
+        for (int i = 0; i < numCCMA; i++) {
+            int index = ccmaConstraints[i];
+            refIndices[i] = make_pair(atom1[index], atom2[index]);
+            refDistance[i] = distance[index];
+        }
+        vector<double> refMasses(numAtoms);
+        for (int i = 0; i < numAtoms; ++i)
+            refMasses[i] = system.getParticleMass(i);
+
+        // Look up angles for CCMA.
+        
+        vector<ReferenceCCMAAlgorithm::AngleInfo> angles;
+        for (int i = 0; i < system.getNumForces(); i++) {
+            const HarmonicAngleForce* force = dynamic_cast<const HarmonicAngleForce*>(&system.getForce(i));
+            if (force != NULL) {
+                for (int j = 0; j < force->getNumAngles(); j++) {
+                    int atom1, atom2, atom3;
+                    double angle, k;
+                    force->getAngleParameters(j, atom1, atom2, atom3, angle, k);
+                    angles.push_back(ReferenceCCMAAlgorithm::AngleInfo(atom1, atom2, atom3, angle));
+                }
+            }
+        }
+        
+        // Create a ReferenceCCMAAlgorithm.  It will build and invert the constraint matrix for us.
+        
+        ReferenceCCMAAlgorithm ccma(numAtoms, numCCMA, refIndices, refDistance, refMasses, angles, 0.1);
+        vector<vector<pair<int, double> > > matrix = ccma.getMatrix();
+        int maxRowElements = 0;
+        for (unsigned i = 0; i < matrix.size(); i++)
+            maxRowElements = max(maxRowElements, (int) matrix[i].size());
+        maxRowElements++;
+
+        // Build the list of constraints for each atom.
+
+        vector<vector<int> > atomConstraints(context.getNumAtoms());
+        for (int i = 0; i < numCCMA; i++) {
+            atomConstraints[atom1[ccmaConstraints[i]]].push_back(i);
+            atomConstraints[atom2[ccmaConstraints[i]]].push_back(i);
+        }
+        int maxAtomConstraints = 0;
+        for (unsigned i = 0; i < atomConstraints.size(); i++)
+            maxAtomConstraints = max(maxAtomConstraints, (int) atomConstraints[i].size());
+
+        // Sort the constraints.
+
+        vector<int> constraintOrder(numCCMA);
+        for (int i = 0; i < numCCMA; ++i)
+            constraintOrder[i] = i;
+        sort(constraintOrder.begin(), constraintOrder.end(), ConstraintOrderer(atom1, atom2, ccmaConstraints));
+        vector<int> inverseOrder(numCCMA);
+        for (int i = 0; i < numCCMA; ++i)
+            inverseOrder[constraintOrder[i]] = i;
+        for (int i = 0; i < (int)matrix.size(); ++i)
+            for (int j = 0; j < (int)matrix[i].size(); ++j)
+                matrix[i][j].first = inverseOrder[matrix[i][j].first];
+
+        // Record the CCMA data structures.
+
+        ccmaAtoms.initialize<mm_int2>(context, numCCMA, "CcmaAtoms");
+        ccmaAtomConstraints.initialize<int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints");
+        ccmaNumAtomConstraints.initialize<int>(context, numAtoms, "CcmaAtomConstraintsIndex");
+        ccmaConstraintMatrixColumn.initialize<int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
+        ccmaConverged.initialize<int>(context, 2, "ccmaConverged");
+        vector<mm_int2> atomsVec(ccmaAtoms.getSize());
+        vector<int> atomConstraintsVec(ccmaAtomConstraints.getSize());
+        vector<int> numAtomConstraintsVec(ccmaNumAtomConstraints.getSize());
+        vector<int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn.getSize());
+        int elementSize = (context.getUseDoublePrecision() || context.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
+        ccmaDistance.initialize(context, numCCMA, 4*elementSize, "CcmaDistance");
+        ccmaDelta1.initialize(context, numCCMA, elementSize, "CcmaDelta1");
+        ccmaDelta2.initialize(context, numCCMA, elementSize, "CcmaDelta2");
+        ccmaReducedMass.initialize(context, numCCMA, elementSize, "CcmaReducedMass");
+        ccmaConstraintMatrixValue.initialize(context, numCCMA*maxRowElements, elementSize, "ConstraintMatrixValue");
+        vector<mm_double4> distanceVec(ccmaDistance.getSize());
+        vector<double> reducedMassVec(ccmaReducedMass.getSize());
+        vector<double> constraintMatrixValueVec(ccmaConstraintMatrixValue.getSize());
+        for (int i = 0; i < numCCMA; i++) {
+            int index = constraintOrder[i];
+            int c = ccmaConstraints[index];
+            atomsVec[i].x = atom1[c];
+            atomsVec[i].y = atom2[c];
+            distanceVec[i].w = distance[c];
+            reducedMassVec[i] = (0.5/(1.0/system.getParticleMass(atom1[c])+1.0/system.getParticleMass(atom2[c])));
+            for (unsigned int j = 0; j < matrix[index].size(); j++) {
+                constraintMatrixColumnVec[i+j*numCCMA] = matrix[index][j].first;
+                constraintMatrixValueVec[i+j*numCCMA] = matrix[index][j].second;
+            }
+            constraintMatrixColumnVec[i+matrix[index].size()*numCCMA] = numCCMA;
+        }
+        ccmaDistance.upload(distanceVec, true);
+        ccmaReducedMass.upload(reducedMassVec, true);
+        ccmaConstraintMatrixValue.upload(constraintMatrixValueVec, true);
+        for (unsigned int i = 0; i < atomConstraints.size(); i++) {
+            numAtomConstraintsVec[i] = atomConstraints[i].size();
+            for (unsigned int j = 0; j < atomConstraints[i].size(); j++) {
+                bool forward = (atom1[ccmaConstraints[atomConstraints[i][j]]] == i);
+                atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1);
+            }
+        }
+        ccmaAtoms.upload(atomsVec);
+        ccmaAtomConstraints.upload(atomConstraintsVec);
+        ccmaNumAtomConstraints.upload(numAtomConstraintsVec);
+        ccmaConstraintMatrixColumn.upload(constraintMatrixColumnVec);
+    }
+    
+    // Build the list of virtual sites.
+    
+    vector<mm_int4> vsite2AvgAtomVec;
+    vector<mm_double2> vsite2AvgWeightVec;
+    vector<mm_int4> vsite3AvgAtomVec;
+    vector<mm_double4> vsite3AvgWeightVec;
+    vector<mm_int4> vsiteOutOfPlaneAtomVec;
+    vector<mm_double4> vsiteOutOfPlaneWeightVec;
+    vector<int> vsiteLocalCoordsIndexVec;
+    vector<int> vsiteLocalCoordsAtomVec;
+    vector<int> vsiteLocalCoordsStartVec;
+    vector<double> vsiteLocalCoordsWeightVec;
+    vector<mm_double4> vsiteLocalCoordsPosVec;
+    for (int i = 0; i < numAtoms; i++) {
+        if (system.isVirtualSite(i)) {
+            if (dynamic_cast<const TwoParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
+                // A two particle average.
+                
+                const TwoParticleAverageSite& site = dynamic_cast<const TwoParticleAverageSite&>(system.getVirtualSite(i));
+                vsite2AvgAtomVec.push_back(mm_int4(i, site.getParticle(0), site.getParticle(1), 0));
+                vsite2AvgWeightVec.push_back(mm_double2(site.getWeight(0), site.getWeight(1)));
+            }
+            else if (dynamic_cast<const ThreeParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
+                // A three particle average.
+                
+                const ThreeParticleAverageSite& site = dynamic_cast<const ThreeParticleAverageSite&>(system.getVirtualSite(i));
+                vsite3AvgAtomVec.push_back(mm_int4(i, site.getParticle(0), site.getParticle(1), site.getParticle(2)));
+                vsite3AvgWeightVec.push_back(mm_double4(site.getWeight(0), site.getWeight(1), site.getWeight(2), 0.0));
+            }
+            else if (dynamic_cast<const OutOfPlaneSite*>(&system.getVirtualSite(i)) != NULL) {
+                // An out of plane site.
+                
+                const OutOfPlaneSite& site = dynamic_cast<const OutOfPlaneSite&>(system.getVirtualSite(i));
+                vsiteOutOfPlaneAtomVec.push_back(mm_int4(i, site.getParticle(0), site.getParticle(1), site.getParticle(2)));
+                vsiteOutOfPlaneWeightVec.push_back(mm_double4(site.getWeight12(), site.getWeight13(), site.getWeightCross(), 0.0));
+            }
+            else if (dynamic_cast<const LocalCoordinatesSite*>(&system.getVirtualSite(i)) != NULL) {
+                // A local coordinates site.
+                
+                const LocalCoordinatesSite& site = dynamic_cast<const LocalCoordinatesSite&>(system.getVirtualSite(i));
+                int numParticles = site.getNumParticles();
+                vector<double> origin, x, y;
+                site.getOriginWeights(origin);
+                site.getXWeights(x);
+                site.getYWeights(y);
+                vsiteLocalCoordsIndexVec.push_back(i);
+                vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
+                for (int j = 0; j < numParticles; j++) {
+                    vsiteLocalCoordsAtomVec.push_back(site.getParticle(j));
+                    vsiteLocalCoordsWeightVec.push_back(origin[j]);
+                    vsiteLocalCoordsWeightVec.push_back(x[j]);
+                    vsiteLocalCoordsWeightVec.push_back(y[j]);
+                }
+                Vec3 pos = site.getLocalPosition();
+                vsiteLocalCoordsPosVec.push_back(mm_double4(pos[0], pos[1], pos[2], 0.0));
+            }
+        }
+    }
+    vsiteLocalCoordsStartVec.push_back(vsiteLocalCoordsAtomVec.size());
+    int num2Avg = vsite2AvgAtomVec.size();
+    int num3Avg = vsite3AvgAtomVec.size();
+    int numOutOfPlane = vsiteOutOfPlaneAtomVec.size();
+    int numLocalCoords = vsiteLocalCoordsPosVec.size();
+    numVsites = num2Avg+num3Avg+numOutOfPlane+numLocalCoords;
+    vsite2AvgAtoms.initialize<mm_int4>(context, max(1, num2Avg), "vsite2AvgAtoms");
+    vsite3AvgAtoms.initialize<mm_int4>(context, max(1, num3Avg), "vsite3AvgAtoms");
+    vsiteOutOfPlaneAtoms.initialize<mm_int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms");
+    vsiteLocalCoordsIndex.initialize<int>(context, max(1, (int) vsiteLocalCoordsIndexVec.size()), "vsiteLocalCoordsIndex");
+    vsiteLocalCoordsAtoms.initialize<int>(context, max(1, (int) vsiteLocalCoordsAtomVec.size()), "vsiteLocalCoordsAtoms");
+    vsiteLocalCoordsStartIndex.initialize<int>(context, max(1, (int) vsiteLocalCoordsStartVec.size()), "vsiteLocalCoordsStartIndex");
+    if (num2Avg > 0)
+        vsite2AvgAtoms.upload(vsite2AvgAtomVec);
+    if (num3Avg > 0)
+        vsite3AvgAtoms.upload(vsite3AvgAtomVec);
+    if (numOutOfPlane > 0)
+        vsiteOutOfPlaneAtoms.upload(vsiteOutOfPlaneAtomVec);
+    if (numLocalCoords > 0) {
+        vsiteLocalCoordsIndex.upload(vsiteLocalCoordsIndexVec);
+        vsiteLocalCoordsAtoms.upload(vsiteLocalCoordsAtomVec);
+        vsiteLocalCoordsStartIndex.upload(vsiteLocalCoordsStartVec);
+    }
+    int elementSize = (context.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
+    vsite2AvgWeights.initialize(context, max(1, num2Avg), 2*elementSize, "vsite2AvgWeights");
+    vsite3AvgWeights.initialize(context, max(1, num3Avg), 4*elementSize, "vsite3AvgWeights");
+    vsiteOutOfPlaneWeights.initialize(context, max(1, numOutOfPlane), 4*elementSize, "vsiteOutOfPlaneWeights");
+    vsiteLocalCoordsWeights.initialize(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), elementSize, "vsiteLocalCoordsWeights");
+    vsiteLocalCoordsPos.initialize(context, max(1, (int) vsiteLocalCoordsPosVec.size()), 4*elementSize, "vsiteLocalCoordsPos");
+    if (num2Avg > 0)
+        vsite2AvgWeights.upload(vsite2AvgWeightVec, true);
+    if (num3Avg > 0)
+        vsite3AvgWeights.upload(vsite3AvgWeightVec, true);
+    if (numOutOfPlane > 0)
+        vsiteOutOfPlaneWeights.upload(vsiteOutOfPlaneWeightVec, true);
+    if (numLocalCoords > 0) {
+        vsiteLocalCoordsWeights.upload(vsiteLocalCoordsWeightVec, true);
+        vsiteLocalCoordsPos.upload(vsiteLocalCoordsPosVec, true);
+    }
+
+    // If multiple virtual sites depend on the same particle, make sure the force distribution
+    // can be done safely.
+    
+    vector<int> atomCounts(numAtoms, 0);
+    for (int i = 0; i < numAtoms; i++)
+        if (system.isVirtualSite(i))
+            for (int j = 0; j < system.getVirtualSite(i).getNumParticles(); j++)
+                atomCounts[system.getVirtualSite(i).getParticle(j)]++;
+    for (int i = 0; i < numAtoms; i++)
+        if (atomCounts[i] > 1)
+            hasOverlappingVsites = true;
+    if (hasOverlappingVsites && !context.getSupports64BitGlobalAtomics())
+        throw OpenMMException("This device does not support 64 bit atomics.  Cannot have multiple virtual sites that depend on the same atom.");
+
+    // Create the kernels used by this class.
+
+    map<string, string> defines;
+    defines["NUM_CCMA_CONSTRAINTS"] = context.intToString(numCCMA);
+    defines["NUM_ATOMS"] = context.intToString(numAtoms);
+    defines["NUM_2_AVERAGE"] = context.intToString(num2Avg);
+    defines["NUM_3_AVERAGE"] = context.intToString(num3Avg);
+    defines["NUM_OUT_OF_PLANE"] = context.intToString(numOutOfPlane);
+    defines["NUM_LOCAL_COORDS"] = context.intToString(numLocalCoords);
+    defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
+    if (hasOverlappingVsites)
+        defines["HAS_OVERLAPPING_VSITES"] = "1";
+    ComputeProgram program = context.compileProgram(CommonKernelSources::integrationUtilities, defines);
+    settlePosKernel = program->createKernel("applySettleToPositions");
+    settleVelKernel = program->createKernel("applySettleToVelocities");
+    shakePosKernel = program->createKernel("applyShakeToPositions");
+    shakeVelKernel = program->createKernel("applyShakeToVelocities");
+    ccmaDirectionsKernel = program->createKernel("computeCCMAConstraintDirections");
+    ccmaPosForceKernel = program->createKernel("computeCCMAPositionConstraintForce");
+    ccmaVelForceKernel = program->createKernel("computeCCMAVelocityConstraintForce");
+    ccmaMultiplyKernel = program->createKernel("multiplyByCCMAConstraintMatrix");
+    ccmaUpdateKernel = program->createKernel("updateCCMAAtomPositions");
+    vsitePositionKernel = program->createKernel("computeVirtualSites");
+    vsiteForceKernel = program->createKernel("distributeVirtualSiteForces");
+    vsiteSaveForcesKernel = program->createKernel("saveDistributedForces");
+    randomKernel = program->createKernel("generateRandomNumbers");
+    timeShiftKernel = program->createKernel("timeShiftVelocities");
+
+    // Set arguments for virtual site kernels.
+
+    vsitePositionKernel->addArg(context.getPosq());
+    if (context.getUseMixedPrecision())
+        vsitePositionKernel->addArg(context.getPosqCorrection());
+    else
+        vsitePositionKernel->addArg(NULL);
+    vsitePositionKernel->addArg(vsite2AvgAtoms);
+    vsitePositionKernel->addArg(vsite2AvgWeights);
+    vsitePositionKernel->addArg(vsite3AvgAtoms);
+    vsitePositionKernel->addArg(vsite3AvgWeights);
+    vsitePositionKernel->addArg(vsiteOutOfPlaneAtoms);
+    vsitePositionKernel->addArg(vsiteOutOfPlaneWeights);
+    vsitePositionKernel->addArg(vsiteLocalCoordsIndex);
+    vsitePositionKernel->addArg(vsiteLocalCoordsAtoms);
+    vsitePositionKernel->addArg(vsiteLocalCoordsWeights);
+    vsitePositionKernel->addArg(vsiteLocalCoordsPos);
+    vsitePositionKernel->addArg(vsiteLocalCoordsStartIndex);
+    vsiteForceKernel->addArg(context.getPosq());
+    if (context.getUseMixedPrecision())
+        vsiteForceKernel->addArg(context.getPosqCorrection());
+    else
+        vsiteForceKernel->addArg(NULL);
+    vsiteForceKernel->addArg(); // Skip argument 2: the force array hasn't been created yet.
+    vsiteForceKernel->addArg(vsite2AvgAtoms);
+    vsiteForceKernel->addArg(vsite2AvgWeights);
+    vsiteForceKernel->addArg(vsite3AvgAtoms);
+    vsiteForceKernel->addArg(vsite3AvgWeights);
+    vsiteForceKernel->addArg(vsiteOutOfPlaneAtoms);
+    vsiteForceKernel->addArg(vsiteOutOfPlaneWeights);
+    vsiteForceKernel->addArg(vsiteLocalCoordsIndex);
+    vsiteForceKernel->addArg(vsiteLocalCoordsAtoms);
+    vsiteForceKernel->addArg(vsiteLocalCoordsWeights);
+    vsiteForceKernel->addArg(vsiteLocalCoordsPos);
+    vsiteForceKernel->addArg(vsiteLocalCoordsStartIndex);
+    for (int i = 0; i < 3; i++)
+        vsiteSaveForcesKernel->addArg();
+
+    // Set arguments for constraint kernels.
+
+    if (settleAtoms.isInitialized()) {
+        settlePosKernel->addArg(settleAtoms.getSize());
+        settlePosKernel->addArg();
+        settlePosKernel->addArg(context.getPosq());
+        settlePosKernel->addArg(posDelta);
+        settlePosKernel->addArg(context.getVelm());
+        settlePosKernel->addArg(settleAtoms);
+        settlePosKernel->addArg(settleParams);
+        if (context.getUseMixedPrecision())
+            settlePosKernel->addArg(context.getPosqCorrection());
+        settleVelKernel->addArg(settleAtoms.getSize());
+        settleVelKernel->addArg();
+        settleVelKernel->addArg(context.getPosq());
+        settleVelKernel->addArg(posDelta);
+        settleVelKernel->addArg(context.getVelm());
+        settleVelKernel->addArg(settleAtoms);
+        settleVelKernel->addArg(settleParams);
+        if (context.getUseMixedPrecision())
+            settleVelKernel->addArg(context.getPosqCorrection());
+    }
+    if (shakeAtoms.isInitialized()) {
+        shakePosKernel->addArg(shakeAtoms.getSize());
+        shakePosKernel->addArg();
+        shakePosKernel->addArg(context.getPosq());
+        shakePosKernel->addArg(posDelta);
+        shakePosKernel->addArg(shakeAtoms);
+        shakePosKernel->addArg(shakeParams);
+        if (context.getUseMixedPrecision())
+            shakePosKernel->addArg(context.getPosqCorrection());
+        shakeVelKernel->addArg(shakeAtoms.getSize());
+        shakeVelKernel->addArg();
+        shakeVelKernel->addArg(context.getPosq());
+        shakeVelKernel->addArg(context.getVelm());
+        shakeVelKernel->addArg(shakeAtoms);
+        shakeVelKernel->addArg(shakeParams);
+        if (context.getUseMixedPrecision())
+            shakeVelKernel->addArg(context.getPosqCorrection());
+    }
+    if (ccmaAtoms.isInitialized()) {
+        ccmaDirectionsKernel->addArg(ccmaAtoms);
+        ccmaDirectionsKernel->addArg(ccmaDistance);
+        ccmaDirectionsKernel->addArg(context.getPosq());
+        ccmaDirectionsKernel->addArg(ccmaConverged);
+        if (context.getUseMixedPrecision())
+            ccmaDirectionsKernel->addArg(context.getPosqCorrection());
+        ccmaPosForceKernel->addArg(ccmaAtoms);
+        ccmaPosForceKernel->addArg(ccmaDistance);
+        ccmaPosForceKernel->addArg(posDelta);
+        ccmaPosForceKernel->addArg(ccmaReducedMass);
+        ccmaPosForceKernel->addArg(ccmaDelta1);
+        ccmaPosForceKernel->addArg(ccmaConverged);
+        ccmaPosForceKernel->addArg();
+        ccmaPosForceKernel->addArg();
+        ccmaPosForceKernel->addArg();
+        ccmaVelForceKernel->addArg(ccmaAtoms);
+        ccmaVelForceKernel->addArg(ccmaDistance);
+        ccmaVelForceKernel->addArg(context.getVelm());
+        ccmaVelForceKernel->addArg(ccmaReducedMass);
+        ccmaVelForceKernel->addArg(ccmaDelta1);
+        ccmaVelForceKernel->addArg(ccmaConverged);
+        ccmaVelForceKernel->addArg();
+        ccmaVelForceKernel->addArg();
+        ccmaVelForceKernel->addArg();
+        ccmaMultiplyKernel->addArg(ccmaDelta1);
+        ccmaMultiplyKernel->addArg(ccmaDelta2);
+        ccmaMultiplyKernel->addArg(ccmaConstraintMatrixColumn);
+        ccmaMultiplyKernel->addArg(ccmaConstraintMatrixValue);
+        ccmaMultiplyKernel->addArg(ccmaConverged);
+        ccmaMultiplyKernel->addArg();
+        ccmaUpdateKernel->addArg(ccmaNumAtomConstraints);
+        ccmaUpdateKernel->addArg(ccmaAtomConstraints);
+        ccmaUpdateKernel->addArg(ccmaDistance);
+        ccmaUpdateKernel->addArg();
+        ccmaUpdateKernel->addArg(context.getVelm());
+        ccmaUpdateKernel->addArg(ccmaDelta1);
+        ccmaUpdateKernel->addArg(ccmaDelta2);
+        ccmaUpdateKernel->addArg(ccmaConverged);
+        ccmaUpdateKernel->addArg();
+    }
+
+    // Arguments for time shift kernel will be set later.
+    
+    for (int i = 0; i < 3; i++)
+        timeShiftKernel->addArg();
+}
+
+void IntegrationUtilities::setNextStepSize(double size) {
+    if (size != lastStepSize.x || size != lastStepSize.y) {
+        lastStepSize = mm_double2(size, size);
+        if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
+            stepSize.upload(&lastStepSize);
+        else {
+            mm_float2 lastStepSizeFloat = mm_float2((float) size, (float) size);
+            stepSize.upload(&lastStepSizeFloat);
+        }
+    }
+}
+
+double IntegrationUtilities::getLastStepSize() {
+    if (context.getUseDoublePrecision() || context.getUseMixedPrecision())
+        stepSize.download(&lastStepSize);
+    else {
+        mm_float2 lastStepSizeFloat;
+        stepSize.download(&lastStepSizeFloat);
+        lastStepSize = mm_double2(lastStepSizeFloat.x, lastStepSizeFloat.y);
+    }
+    return lastStepSize.y;
+}
+
+void IntegrationUtilities::applyConstraints(double tol) {
+    applyConstraintsImpl(false, tol);
+}
+
+void IntegrationUtilities::applyVelocityConstraints(double tol) {
+    applyConstraintsImpl(true, tol);
+}
+
+void IntegrationUtilities::computeVirtualSites() {
+    if (numVsites > 0)
+        vsitePositionKernel->execute(numVsites);
+}
+
+void IntegrationUtilities::initRandomNumberGenerator(unsigned int randomNumberSeed) {
+    if (random.isInitialized()) {
+        if (randomNumberSeed != lastSeed)
+           throw OpenMMException("IntegrationUtilities::initRandomNumberGenerator(): Requested two different values for the random number seed");
+        return;
+    }
+
+    // Create the random number arrays.
+
+    lastSeed = randomNumberSeed;
+    random.initialize<mm_float4>(context, 4*context.getPaddedNumAtoms(), "random");
+    randomSeed.initialize<mm_int4>(context, context.getNumThreadBlocks()*64, "randomSeed");
+    randomPos = random.getSize();
+    randomKernel->addArg(random.getSize());
+    randomKernel->addArg(random);
+    randomKernel->addArg(randomSeed);
+
+    // Use a quick and dirty RNG to pick seeds for the real random number generator.
+
+    vector<mm_int4> seed(randomSeed.getSize());
+    unsigned int r = randomNumberSeed;
+    if (r == 0)
+        r = (unsigned int) osrngseed(); // A seed of 0 means use a unique one
+    for (int i = 0; i < randomSeed.getSize(); i++) {
+        seed[i].x = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
+        seed[i].y = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
+        seed[i].z = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
+        seed[i].w = r = (1664525*r + 1013904223) & 0xFFFFFFFF;
+    }
+    randomSeed.upload(seed);
+}
+
+int IntegrationUtilities::prepareRandomNumbers(int numValues) {
+    if (randomPos+numValues <= random.getSize()) {
+        int oldPos = randomPos;
+        randomPos += numValues;
+        return oldPos;
+    }
+    if (numValues > random.getSize()) {
+        random.resize(numValues);
+        randomKernel->setArg(0, numValues);
+    }
+    randomKernel->execute(random.getSize(), 64);
+    randomPos = numValues;
+    return 0;
+}
+
+void IntegrationUtilities::createCheckpoint(ostream& stream) {
+    int numChains = noseHooverChainState.size();
+    bool useDouble = context.getUseDoublePrecision() || context.getUseMixedPrecision();
+    stream.write((char*) &numChains, sizeof(int));
+    for (auto &chainState: noseHooverChainState){
+        int chainID = chainState.first;
+        int chainLength = chainState.second.getSize();
+        stream.write((char*) &chainID, sizeof(int));
+        stream.write((char*) &chainLength, sizeof(int));
+        if (useDouble) {
+            vector<mm_double2> stateVec;
+            chainState.second.download(stateVec);
+            stream.write((char*) stateVec.data(), sizeof(mm_double2)*chainLength);
+        }
+        else {
+            vector<mm_float2> stateVec;
+            chainState.second.download(stateVec);
+            stream.write((char*) stateVec.data(), sizeof(mm_float2)*chainLength);
+        }
+    }
+    if (!random.isInitialized())
+        return;
+    stream.write((char*) &randomPos, sizeof(int));
+    vector<mm_float4> randomVec;
+    random.download(randomVec);
+    stream.write((char*) &randomVec[0], sizeof(mm_float4)*random.getSize());
+    vector<mm_int4> randomSeedVec;
+    randomSeed.download(randomSeedVec);
+    stream.write((char*) &randomSeedVec[0], sizeof(mm_int4)*randomSeed.getSize());
+}
+
+void IntegrationUtilities::loadCheckpoint(istream& stream) {
+    int numChains;
+    bool useDouble = context.getUseDoublePrecision() || context.getUseMixedPrecision();
+    stream.read((char*) &numChains, sizeof(int));
+    noseHooverChainState.clear();
+    for (int i = 0; i < numChains; i++) {
+        int chainID, chainLength;
+        stream.read((char*) &chainID, sizeof(int));
+        stream.read((char*) &chainLength, sizeof(int));
+        if (useDouble) {
+            noseHooverChainState[chainID] = ComputeArray();
+            noseHooverChainState[chainID].initialize<mm_double2>(context, chainLength, "chainState" + to_string(chainID));
+            vector<mm_double2> stateVec(chainLength);
+            stream.read((char*) &stateVec[0], sizeof(mm_double2)*chainLength);
+            noseHooverChainState[chainID].upload(stateVec);
+        }
+        else {
+            noseHooverChainState[chainID] = ComputeArray();
+            noseHooverChainState[chainID].initialize<mm_float2>(context, chainLength, "chainState" + to_string(chainID));
+            vector<mm_float2> stateVec(chainLength);
+            stream.read((char*) &stateVec[0], sizeof(mm_float2)*chainLength);
+            noseHooverChainState[chainID].upload(stateVec);
+        }
+    }
+    if (!random.isInitialized())
+        return;
+    stream.read((char*) &randomPos, sizeof(int));
+    vector<mm_float4> randomVec(random.getSize());
+    stream.read((char*) &randomVec[0], sizeof(mm_float4)*random.getSize());
+    random.upload(randomVec);
+    vector<mm_int4> randomSeedVec(randomSeed.getSize());
+    stream.read((char*) &randomSeedVec[0], sizeof(mm_int4)*randomSeed.getSize());
+    randomSeed.upload(randomSeedVec);
+}
+
+double IntegrationUtilities::computeKineticEnergy(double timeShift) {
+    int numParticles = context.getNumAtoms();
+    if (timeShift != 0) {
+        // Copy the velocities into the posDelta array while we temporarily modify them.
+
+        context.getVelm().copyTo(posDelta);
+
+        // Apply the time shift.
+
+        timeShiftKernel->setArg(0, context.getVelm());
+        timeShiftKernel->setArg(1, context.getLongForceBuffer());
+        if (context.getUseDoublePrecision())
+            timeShiftKernel->setArg(2, timeShift);
+        else
+            timeShiftKernel->setArg(2, (float) timeShift);
+        timeShiftKernel->execute(numParticles);
+        applyConstraintsImpl(true, 1e-4);
+    }
+    
+    // Compute the kinetic energy.
+    
+    double energy = 0.0;
+    if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) {
+        vector<mm_double4> velm;
+        context.getVelm().download(velm);
+        for (int i = 0; i < numParticles; i++) {
+            mm_double4 v = velm[i];
+            if (v.w != 0)
+                energy += (v.x*v.x+v.y*v.y+v.z*v.z)/v.w;
+        }
+    }
+    else {
+        vector<mm_float4> velm;
+        context.getVelm().download(velm);
+        for (int i = 0; i < numParticles; i++) {
+            mm_float4 v = velm[i];
+            if (v.w != 0)
+                energy += (v.x*v.x+v.y*v.y+v.z*v.z)/v.w;
+        }
+    }
+    
+    // Restore the velocities.
+    
+    if (timeShift != 0)
+        posDelta.copyTo(context.getVelm());
+    return 0.5*energy;
+}