Mods for GB/VI switching of Born radii

a2674e83 · Mark Friedrichs · 38839771 · a2674e83 · a2674e83 · a2674e83
Commit a2674e83 authored Jun 17, 2011 by Mark Friedrichs
8 changed files
--- a/openmmapi/include/openmm/GBVIForce.h
+++ b/openmmapi/include/openmm/GBVIForce.h
-#ifndef OPENMM_GBVIFORCEFIELD_H_
+Vim: Warning: Output is not to a terminal
-#define OPENMM_GBVIFORCEFIELD_H_
+[?1049h[?1h=[1;59r[?12;25h[?12l[?25h[27m[m[H[2J[?25l[59;1H"svn-commit.tmp" 15L, 601C[1;1H[33m  1 
+  2 [m[32m--This line, and those below, will be ignored--[m
-/* -------------------------------------------------------------------------- *
+[33m  3 
- *                                   OpenMM                                   *
+  4 [m[1m[35mM    plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp[m
- * -------------------------------------------------------------------------- *
+[33m  5 [m[1m[35mM    plugins/freeEnergy/platforms/reference/src/gbsa/CpuGBVISoftcore.cpp[m
- * This is part of the OpenMM molecular simulation toolkit originating from   *
+[33m  6 [m[1m[35mM    openmmapi/include/openmm/GBVIForce.h[m
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
+[33m  7 [m[1m[35mM    openmmapi/src/GBVIForce.cpp[m
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+[33m  8 [m[1m[35mM    olla/src/Platform.cpp[m
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+[33m  9 [m[1m[35mM    platforms/opencl/src/OpenCLContext.h[m
- *                                                                            *
+[33m 10 [m[1m[35mM    platforms/cuda/src/CudaKernels.cpp[m
- * Portions copyright (c) 2008-2009 Stanford University and the Authors.      *
+[33m 11 [m[1m[35mM    platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu[m
- * Authors: Peter Eastman                                                     *
+[33m 12 [m[1m[35mM    platforms/cuda/src/kernels/gputypes.h[m
- * Contributors:                                                              *
+[33m 13 [m[1m[35mM    platforms/cuda/src/kernels/cudatypes.h[m
- *                                                                            *
+[33m 14 [m[1m[35mM    platforms/cuda/src/kernels/kForces.cu[m
- * Permission is hereby granted, free of charge, to any person obtaining a    *
+[33m 15 [m[1m[35mM    platforms/cuda/src/kernels/gpu.cpp[m
- * copy of this software and associated documentation files (the "Software"), *
+[1m[34m~                                                                                                                                                                                                                                         [17;1H~                                                                                                                                                                                                                                         [18;1H~                                                                                                                                                                                                                                         [19;1H~                                                                                                                                                                                                                                         [20;1H~                                                                                                                                                                                                                                         [21;1H~                                                                                                                                                                                                                                         [22;1H~                                                                                                                                                                                                                                         [23;1H~                                                                                                                                                                                                                                         [24;1H~                                                                                                                                                                                                                                         [25;1H~                                                                                                                                                                                                                                         [26;1H~                                                                                                                                                                                                                                         [27;1H~                                                                                                                                                                                                                                         [28;1H~                                                                                                                                                                                                                                         [29;1H~                                                                                                                                                                                                                                         [30;1H~                                                                                                                                                                                                                                         [31;1H~                                                                                                                                                                                                                                         [32;1H~                                                                                                                                                                                                                                         [33;1H~                                                                                                                                                                                                                                         [34;1H~                                                                                                                                                                                                                                         [35;1H~                                                                                                                                                                                                                                         [36;1H~                                                                                                                                                                                                                                         [37;1H~                                                                                                                                                                                                                                         [38;1H~                                                                                                                                                                                                                                         [39;1H~                                                                                                                                                                                                                                         [40;1H~                                                                                                                                                                                                                                         [41;1H~                                                                                                                                                                                                                                         [42;1H~                                                                                                                                                                                                                                         [43;1H~                                                                                                                                                                                                                                         [44;1H~                                                                                                                                                                                                                                         [45;1H~                                                                                                                                                                                                                                         [46;1H~                                                                                                                                                                                                                                         [47;1H~                                                                                                                                                                                                                                         [48;1H~                                                                                                                                                                                                                                         [49;1H~                                                                                                                                                                                                                                         [50;1H~                                                                                                                                                                                                                                         [51;1H~                                                                                                                                                                                                                                         [52;1H~                                                                                                                                                                                                                                         [53;1H~                                                                                                                                                                                                                                         [54;1H~                                                                                                                                                                                                                                         [55;1H~                                                                                                                                                                                                                                         [56;1H~                                                                                                                                                                                                                                         [57;1H~                                                                                                                                                                                                                                         [58;1H~                                                                                                                                                                                                                                         [m[59;217H1,0-1[9CAll[1;5H[?12l[?25h[?25l[59;1H[K[59;1H:[?12l[?25hq![?25l[59;1H[K[59;1H[?1l>[?12l[?25h[?1049l
- * to deal in the Software without restriction, including without limitation  *
+Log message unchanged or not specified
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+a)bort, c)ontinue, e)dit
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-#include "Force.h"
-#include <vector>
-#include "internal/windowsExport.h"
-namespace OpenMM {
-/**
- * This class implements an implicit solvation force using the GB/VI model.
- * <p>
- * To use this class, create a GBVIForce object, then call addParticle() once for each particle in the
- * System to define its parameters.  The number of particles for which you define GB/VI parameters must
- * be exactly equal to the number of particles in the System, or else an exception will be thrown when you
- * try to create a Context.  After a particle has been added, you can modify its force field parameters
- * by calling setParticleParameters().
- */
-class OPENMM_EXPORT GBVIForce : public Force {
-public:
-    /** 
-     * This is an enumeration of the different methods that may be used for handling long range nonbonded forces.
-     */
-    enum NonbondedMethod {
-        /**
-         * No cutoff is applied to nonbonded interactions.  The full set of N^2 interactions is computed exactly.
-         * This necessarily means that periodic boundary conditions cannot be used.  This is the default.
-         */
-        NoCutoff = 0,
-        /**
-         * Interactions beyond the cutoff distance are ignored.
-         */
-        CutoffNonPeriodic = 1,
-        /**
-         * Periodic boundary conditions are used, so that each particle interacts only with the nearest periodic copy of
-         * each other particle.  Interactions beyond the cutoff distance are ignored.
-         */
-        CutoffPeriodic = 2,
-    };  
-    /*
-     * Create a GBVIForce.
-     */
-    GBVIForce();
-    /**
-     * Get the number of particles in the system.
-     */
-    int getNumParticles() const {
-        return particles.size();
-    }
-    /**
-     * Add the GB/VI parameters for a particle.  This should be called once for each particle
-     * in the System.  When it is called for the i'th time, it specifies the parameters for the i'th particle.
-     *
-     * @param charge         the charge of the particle, measured in units of the proton charge
-     * @param radius         the GB/VI radius of the particle, measured in nm
-     * @param gamma          the gamma parameter
-     * @return the index of the particle that was added
-     */
-    int addParticle(double charge, double radius, double gamma);
-    /**
-     * Get the force field parameters for a particle.
-     * 
-     * @param index          the index of the particle for which to get parameters
-     * @param charge         the charge of the particle, measured in units of the proton charge
-     * @param radius         the GBSA radius of the particle, measured in nm
-     * @param gamma          the gamma parameter
-     */
-    void getParticleParameters(int index, double& charge, double& radius, double& gamma) const;
-    /**
-     * Set the force field parameters for a particle.
-     * 
-     * @param index          the index of the particle for which to set parameters
-     * @param charge         the charge of the particle, measured in units of the proton charge
-     * @param radius         the GB/VI radius of the particle, measured in nm
-     * @param gamma          the gamma parameter
-     */
-    void setParticleParameters(int index, double charge, double radius, double gamma);
-    /**
-     * Add a bond 
-     *
-     * @param particle1 the index of the first particle 
-     * @param particle2 the index of the second particle
-     * @param distance  the distance between the two particles, measured in nm
-     * @return the index of the bond that was added
-     */
-    int addBond(int particle1, int particle2, double distance);
-    /** 
-     * Get the parameters defining a bond
-     * 
-     * @param index     the index of the bond for which to get parameters
-     * @param particle1 the index of the first particle involved in the bond
-     * @param particle2 the index of the second particle involved in the bond
-     * @param distance  the distance between the two particles, measured in nm
-     */
-    void getBondParameters(int index, int& particle1, int& particle2, double& distance) const;
-    /**
-     * Set 1-2 bonds
-     * 
-     * @param index          index of the bond for which to set parameters
-     * @param particle1      index of first atom in bond
-     * @param particle2      index of second atom in bond
-     * @param bondLength     bond length
-     */
-    void setBondParameters( int index, int particle1, int particle2, double bondLength);
-    /** 
-     * Get number of bonds
-     * 
-     * @return number of bonds
-     */
-    int getNumBonds( void ) const;
-    /**
-     * Get the dielectric constant for the solvent.
-     */
-    double getSolventDielectric() const {
-        return solventDielectric;
-    }
-    /**
-     * Set the dielectric constant for the solvent.
-     */
-    void setSolventDielectric(double dielectric) {
-        solventDielectric = dielectric;
-    }
-    /**
-     * Get the dielectric constant for the solute.
-     */
-    double getSoluteDielectric() const {
-        return soluteDielectric;
-    }
-    /**
-     * Set the dielectric constant for the solute.
-     */
-    void setSoluteDielectric(double dielectric) {
-        soluteDielectric = dielectric;
-    }
-    /** 
-     * Get the method used for handling long range nonbonded interactions.
-     */
-    NonbondedMethod getNonbondedMethod() const;
-    /** 
-     * Set the method used for handling long range nonbonded interactions.
-     */
-    void setNonbondedMethod(NonbondedMethod method);
-    /** 
-     * Get the cutoff distance (in nm) being used for nonbonded interactions.  If the NonbondedMethod in use
-     * is NoCutoff, this value will have no effect.
-     *
-     * @return the cutoff distance, measured in nm
-     */
-    double getCutoffDistance() const;
-    /** 
-     * Set the cutoff distance (in nm) being used for nonbonded interactions.  If the NonbondedMethod in use
-     * is NoCutoff, this value will have no effect.
-     *
-     * @param distance    the cutoff distance, measured in nm
-     */
-    void setCutoffDistance(double distance);
-protected:
-    ForceImpl* createImpl();
-private:
-    class ParticleInfo;
-    NonbondedMethod nonbondedMethod;
-    double cutoffDistance, solventDielectric, soluteDielectric;
-    class BondInfo;
-    std::vector<ParticleInfo> particles;
-    std::vector<BondInfo> bonds;
-};
-/**
- * This is an internal class used to record information about a particle.
- * @private
- */
-class GBVIForce::ParticleInfo {
-public:
-    double charge, radius, gamma;
-    ParticleInfo() {
-        charge = radius = gamma = 0.0;
-    }
-    ParticleInfo(double charge, double radius, double gamma) :
-        charge(charge), radius(radius), gamma(gamma) {
-    }
-};
-/**
- * This is an internal class used to record information about a bond.
- * @private
- */
-class GBVIForce::BondInfo {
-public:
-    int particle1, particle2;
-    double bondLength;
-    BondInfo() {
-        bondLength     = 0.0;
-        particle1      = -1;
-        particle2      = -1;
-    }
-    BondInfo(int atomIndex1, int atomIndex2, double bondLength) :
-             particle1(atomIndex1), particle2(atomIndex2), bondLength(bondLength) {
-    }
-};
-} // namespace OpenMM
-#endif /*OPENMM_GBVIFORCEFIELD_H_*/

--- a/openmmapi/src/GBVIForce.cpp
+++ b/openmmapi/src/GBVIForce.cpp
-/* -------------------------------------------------------------------------- *
+Vim: Warning: Output is not to a terminal
- *                                   OpenMM                                   *
+[?1049h[?1h=[1;59r[?12;25h[?12l[?25h[27m[m[H[2J[?25l[59;1H"svn-commit.tmp" 15L, 601C[1;1H[33m  1 
- * -------------------------------------------------------------------------- *
+  2 [m[32m--This line, and those below, will be ignored--[m
- * This is part of the OpenMM molecular simulation toolkit originating from   *
+[33m  3 
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
+  4 [m[1m[35mM    plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp[m
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+[33m  5 [m[1m[35mM    plugins/freeEnergy/platforms/reference/src/gbsa/CpuGBVISoftcore.cpp[m
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+[33m  6 [m[1m[35mM    openmmapi/include/openmm/GBVIForce.h[m
- *                                                                            *
+[33m  7 [m[1m[35mM    openmmapi/src/GBVIForce.cpp[m
- * Portions copyright (c) 2008-2009 Stanford University and the Authors.      *
+[33m  8 [m[1m[35mM    olla/src/Platform.cpp[m
- * Authors: Peter Eastman                                                     *
+[33m  9 [m[1m[35mM    platforms/opencl/src/OpenCLContext.h[m
- * Contributors:                                                              *
+[33m 10 [m[1m[35mM    platforms/cuda/src/CudaKernels.cpp[m
- *                                                                            *
+[33m 11 [m[1m[35mM    platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu[m
- * Permission is hereby granted, free of charge, to any person obtaining a    *
+[33m 12 [m[1m[35mM    platforms/cuda/src/kernels/gputypes.h[m
- * copy of this software and associated documentation files (the "Software"), *
+[33m 13 [m[1m[35mM    platforms/cuda/src/kernels/cudatypes.h[m
- * to deal in the Software without restriction, including without limitation  *
+[33m 14 [m[1m[35mM    platforms/cuda/src/kernels/kForces.cu[m
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+[33m 15 [m[1m[35mM    platforms/cuda/src/kernels/gpu.cpp[m
- * and/or sell copies of the Software, and to permit persons to whom the      *
+[1m[34m~                                                                                                                                                                                                                                         [17;1H~                                                                                                                                                                                                                                         [18;1H~                                                                                                                                                                                                                                         [19;1H~                                                                                                                                                                                                                                         [20;1H~                                                                                                                                                                                                                                         [21;1H~                                                                                                                                                                                                                                         [22;1H~                                                                                                                                                                                                                                         [23;1H~                                                                                                                                                                                                                                         [24;1H~                                                                                                                                                                                                                                         [25;1H~                                                                                                                                                                                                                                         [26;1H~                                                                                                                                                                                                                                         [27;1H~                                                                                                                                                                                                                                         [28;1H~                                                                                                                                                                                                                                         [29;1H~                                                                                                                                                                                                                                         [30;1H~                                                                                                                                                                                                                                         [31;1H~                                                                                                                                                                                                                                         [32;1H~                                                                                                                                                                                                                                         [33;1H~                                                                                                                                                                                                                                         [34;1H~                                                                                                                                                                                                                                         [35;1H~                                                                                                                                                                                                                                         [36;1H~                                                                                                                                                                                                                                         [37;1H~                                                                                                                                                                                                                                         [38;1H~                                                                                                                                                                                                                                         [39;1H~                                                                                                                                                                                                                                         [40;1H~                                                                                                                                                                                                                                         [41;1H~                                                                                                                                                                                                                                         [42;1H~                                                                                                                                                                                                                                         [43;1H~                                                                                                                                                                                                                                         [44;1H~                                                                                                                                                                                                                                         [45;1H~                                                                                                                                                                                                                                         [46;1H~                                                                                                                                                                                                                                         [47;1H~                                                                                                                                                                                                                                         [48;1H~                                                                                                                                                                                                                                         [49;1H~                                                                                                                                                                                                                                         [50;1H~                                                                                                                                                                                                                                         [51;1H~                                                                                                                                                                                                                                         [52;1H~                                                                                                                                                                                                                                         [53;1H~                                                                                                                                                                                                                                         [54;1H~                                                                                                                                                                                                                                         [55;1H~                                                                                                                                                                                                                                         [56;1H~                                                                                                                                                                                                                                         [57;1H~                                                                                                                                                                                                                                         [58;1H~                                                                                                                                                                                                                                         [m[59;217H1,0-1[9CAll[1;5H[?12l[?25h[?25l[59;1H[K[59;1H:[?12l[?25hq![?25l[59;1H[K[59;1H[?1l>[?12l[?25h[?1049l
- * Software is furnished to do so, subject to the following conditions:       *
+Log message unchanged or not specified
- *                                                                            *
+a)bort, c)ontinue, e)dit
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-#include "openmm/Force.h"
-#include "openmm/OpenMMException.h"
-#include "openmm/GBVIForce.h"
-#include "openmm/internal/GBVIForceImpl.h"
-#include <sstream>
-using namespace OpenMM;
-GBVIForce::GBVIForce() : nonbondedMethod(NoCutoff), cutoffDistance(1.0), solventDielectric(78.3), soluteDielectric(1.0) {
-}
-int GBVIForce::addParticle(double charge, double radius, double gamma) {
-    particles.push_back(ParticleInfo(charge, radius, gamma));
-    return particles.size()-1;
-}
-void GBVIForce::getParticleParameters(int index, double& charge, double& radius, double& gamma) const {
-    charge = particles[index].charge;
-    radius = particles[index].radius;
-    gamma  = particles[index].gamma;
-}
-void GBVIForce::setParticleParameters(int index, double charge, double radius, double gamma) {
-    particles[index].charge = charge;
-    particles[index].radius = radius;
-    particles[index].gamma  = gamma;
-}
-GBVIForce::NonbondedMethod GBVIForce::getNonbondedMethod() const {
-    return nonbondedMethod;
-}
-void GBVIForce::setNonbondedMethod(NonbondedMethod method) {
-    nonbondedMethod = method;
-}
-double GBVIForce::getCutoffDistance() const {
-    return cutoffDistance;
-}
-void GBVIForce::setCutoffDistance(double distance) {
-    cutoffDistance = distance;
-}
-int GBVIForce::addBond(int particle1, int particle2, double bondLength) {
-    bonds.push_back(BondInfo(particle1, particle2, bondLength));
-    return bonds.size()-1;
-}
-void GBVIForce::setBondParameters( int index, int particle1, int particle2, double bondLength) {
-    bonds[index].particle1  = particle1;
-    bonds[index].particle2  = particle2;
-    bonds[index].bondLength = bondLength;
-}
-int GBVIForce::getNumBonds( void ) const {
-   return (int) bonds.size();
-}
-void GBVIForce::getBondParameters(int index, int& bondIndex1, int& bondIndex2, double& bondLength) const {
-    bondIndex1 = bonds[index].particle1;
-    bondIndex2 = bonds[index].particle2;
-    bondLength = bonds[index].bondLength;
-}
-ForceImpl* GBVIForce::createImpl() {
-    return new GBVIForceImpl(*this);
-}

--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
-/* -------------------------------------------------------------------------- *
+Vim: Warning: Output is not to a terminal
- *                                   OpenMM                                   *
+[?1049h[?1h=[1;59r[?12;25h[?12l[?25h[27m[m[H[2J[?25l[59;1H"svn-commit.tmp" 15L, 601C[1;1H[33m  1 
- * -------------------------------------------------------------------------- *
+  2 [m[32m--This line, and those below, will be ignored--[m
- * This is part of the OpenMM molecular simulation toolkit originating from   *
+[33m  3 
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
+  4 [m[1m[35mM    plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp[m
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+[33m  5 [m[1m[35mM    plugins/freeEnergy/platforms/reference/src/gbsa/CpuGBVISoftcore.cpp[m
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+[33m  6 [m[1m[35mM    openmmapi/include/openmm/GBVIForce.h[m
- *                                                                            *
+[33m  7 [m[1m[35mM    openmmapi/src/GBVIForce.cpp[m
- * Portions copyright (c) 2008-2009 Stanford University and the Authors.      *
+[33m  8 [m[1m[35mM    olla/src/Platform.cpp[m
- * Authors: Peter Eastman                                                     *
+[33m  9 [m[1m[35mM    platforms/opencl/src/OpenCLContext.h[m
- * Contributors:                                                              *
+[33m 10 [m[1m[35mM    platforms/cuda/src/CudaKernels.cpp[m
- *                                                                            *
+[33m 11 [m[1m[35mM    platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu[m
- * This program is free software: you can redistribute it and/or modify       *
+[33m 12 [m[1m[35mM    platforms/cuda/src/kernels/gputypes.h[m
- * it under the terms of the GNU Lesser General Public License as published   *
+[33m 13 [m[1m[35mM    platforms/cuda/src/kernels/cudatypes.h[m
- * by the Free Software Foundation, either version 3 of the License, or       *
+[33m 14 [m[1m[35mM    platforms/cuda/src/kernels/kForces.cu[m
- * (at your option) any later version.                                        *
+[33m 15 [m[1m[35mM    platforms/cuda/src/kernels/gpu.cpp[m
- *                                                                            *
+[1m[34m~                                                                                                                                                                                                                                         [17;1H~                                                                                                                                                                                                                                         [18;1H~                                                                                                                                                                                                                                         [19;1H~                                                                                                                                                                                                                                         [20;1H~                                                                                                                                                                                                                                         [21;1H~                                                                                                                                                                                                                                         [22;1H~                                                                                                                                                                                                                                         [23;1H~                                                                                                                                                                                                                                         [24;1H~                                                                                                                                                                                                                                         [25;1H~                                                                                                                                                                                                                                         [26;1H~                                                                                                                                                                                                                                         [27;1H~                                                                                                                                                                                                                                         [28;1H~                                                                                                                                                                                                                                         [29;1H~                                                                                                                                                                                                                                         [30;1H~                                                                                                                                                                                                                                         [31;1H~                                                                                                                                                                                                                                         [32;1H~                                                                                                                                                                                                                                         [33;1H~                                                                                                                                                                                                                                         [34;1H~                                                                                                                                                                                                                                         [35;1H~                                                                                                                                                                                                                                         [36;1H~                                                                                                                                                                                                                                         [37;1H~                                                                                                                                                                                                                                         [38;1H~                                                                                                                                                                                                                                         [39;1H~                                                                                                                                                                                                                                         [40;1H~                                                                                                                                                                                                                                         [41;1H~                                                                                                                                                                                                                                         [42;1H~                                                                                                                                                                                                                                         [43;1H~                                                                                                                                                                                                                                         [44;1H~                                                                                                                                                                                                                                         [45;1H~                                                                                                                                                                                                                                         [46;1H~                                                                                                                                                                                                                                         [47;1H~                                                                                                                                                                                                                                         [48;1H~                                                                                                                                                                                                                                         [49;1H~                                                                                                                                                                                                                                         [50;1H~                                                                                                                                                                                                                                         [51;1H~                                                                                                                                                                                                                                         [52;1H~                                                                                                                                                                                                                                         [53;1H~                                                                                                                                                                                                                                         [54;1H~                                                                                                                                                                                                                                         [55;1H~                                                                                                                                                                                                                                         [56;1H~                                                                                                                                                                                                                                         [57;1H~                                                                                                                                                                                                                                         [58;1H~                                                                                                                                                                                                                                         [m[59;217H1,0-1[9CAll[1;5H[?12l[?25h[?25l[59;1H[K[59;1H:[?12l[?25hq![?25l[59;1H[K[59;1H[?1l>[?12l[?25h[?1049l
- * This program is distributed in the hope that it will be useful,            *
+Log message unchanged or not specified
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+a)bort, c)ontinue, e)dit
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-#include "CudaKernels.h"
-#include "CudaForceInfo.h"
-#include "openmm/LangevinIntegrator.h"
-#include "openmm/Context.h"
-#include "openmm/OpenMMException.h"
-#include "openmm/internal/AndersenThermostatImpl.h"
-#include "openmm/internal/CMAPTorsionForceImpl.h"
-#include "openmm/internal/ContextImpl.h"
-#include "openmm/internal/NonbondedForceImpl.h"
-#include "kernels/gputypes.h"
-#include "kernels/cudaKernels.h"
-#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
-#include <cmath>
-extern "C" int OPENMMCUDA_EXPORT gpuSetConstants( gpuContext gpu );
-using namespace OpenMM;
-using namespace std;
-void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
-}
-void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    _gpuContext* gpu = data.gpu;
-    if (data.nonbondedMethod != NO_CUTOFF && data.computeForceCount%100 == 0)
-        gpuReorderAtoms(gpu);
-    data.computeForceCount++;
-    if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
-        kClearBornSumAndForces(gpu);
-    else if (includeForces)
-        kClearForces(gpu);
-    if (includeEnergy)
-        kClearEnergy(gpu);
-}
-double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    _gpuContext* gpu = data.gpu;
-    if (gpu->bIncludeGBSA || gpu->bIncludeGBVI) {
-        gpu->bRecalculateBornRadii = true;
-        kCalculateCDLJObcGbsaForces1(gpu);
-        kReduceObcGbsaBornForces(gpu);
-        if (gpu->bIncludeGBSA ) {
-           kCalculateObcGbsaForces2(gpu);
-        } else {
-           kCalculateGBVIForces2(gpu);
-        }
-    }
-    else if (data.hasNonbonded)
-        kCalculateCDLJForces(gpu);
-    if (data.hasCustomNonbonded)
-        kCalculateCustomNonbondedForces(gpu, data.hasNonbonded);
-    kCalculateLocalForces(gpu);
-    if (includeForces)
-        kReduceForces(gpu);
-    double energy = 0.0;
-    if (includeEnergy) {
-        energy = kReduceEnergy(gpu)+data.ewaldSelfEnergy;
-        if (data.dispersionCoefficient != 0.0)
-            energy += data.dispersionCoefficient/(gpu->sim.periodicBoxSizeX*gpu->sim.periodicBoxSizeY*gpu->sim.periodicBoxSizeZ);
-    }
-    return energy;
-}
-void CudaUpdateStateDataKernel::initialize(const System& system) {
-}
-double CudaUpdateStateDataKernel::getTime(const ContextImpl& context) const {
-    return data.time;
-}
-void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
-    data.time = time;
-}
-void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, std::vector<Vec3>& positions) {
-    _gpuContext* gpu = data.gpu;
-    gpu->psPosq4->Download();
-    int* order = gpu->psAtomIndex->_pSysData;
-    int numParticles = context.getSystem().getNumParticles();
-    positions.resize(numParticles);
-    for (int i = 0; i < numParticles; ++i) {
-        float4 pos = (*gpu->psPosq4)[i];
-        int3 offset = gpu->posCellOffsets[i];
-        positions[order[i]] = Vec3(pos.x-offset.x*gpu->sim.periodicBoxSizeX, pos.y-offset.y*gpu->sim.periodicBoxSizeY, pos.z-offset.z*gpu->sim.periodicBoxSizeZ);
-    }
-}
-void CudaUpdateStateDataKernel::setPositions(ContextImpl& context, const std::vector<Vec3>& positions) {
-    _gpuContext* gpu = data.gpu;
-    int* order = gpu->psAtomIndex->_pSysData;
-    int numParticles = context.getSystem().getNumParticles();
-    for (int i = 0; i < numParticles; ++i) {
-        float4& pos = (*gpu->psPosq4)[i];
-        const Vec3& p = positions[order[i]];
-        pos.x = (float) p[0];
-        pos.y = (float) p[1];
-        pos.z = (float) p[2];
-    }
-    gpu->psPosq4->Upload();
-    for (int i = 0; i < (int) gpu->posCellOffsets.size(); i++)
-        gpu->posCellOffsets[i] = make_int3(0, 0, 0);
-}
-void CudaUpdateStateDataKernel::getVelocities(ContextImpl& context, std::vector<Vec3>& velocities) {
-    _gpuContext* gpu = data.gpu;
-    gpu->psVelm4->Download();
-    int* order = gpu->psAtomIndex->_pSysData;
-    int numParticles = context.getSystem().getNumParticles();
-    velocities.resize(numParticles);
-    for (int i = 0; i < numParticles; ++i) {
-        float4 vel = (*gpu->psVelm4)[i];
-        velocities[order[i]] = Vec3(vel.x, vel.y, vel.z);
-    }
-}
-void CudaUpdateStateDataKernel::setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities) {
-    _gpuContext* gpu = data.gpu;
-    int* order = gpu->psAtomIndex->_pSysData;
-    int numParticles = context.getSystem().getNumParticles();
-    for (int i = 0; i < numParticles; ++i) {
-        float4& vel = (*gpu->psVelm4)[i];
-        const Vec3& v = velocities[order[i]];
-        vel.x = (float) v[0];
-        vel.y = (float) v[1];
-        vel.z = (float) v[2];
-    }
-    gpu->psVelm4->Upload();
-}
-void CudaUpdateStateDataKernel::getForces(ContextImpl& context, std::vector<Vec3>& forces) {
-    _gpuContext* gpu = data.gpu;
-    int* order = gpu->psAtomIndex->_pSysData;
-    gpu->psForce4->Download();
-    int numParticles = context.getSystem().getNumParticles();
-    forces.resize(numParticles);
-    for (int i = 0; i < numParticles; ++i) {
-        float4 force = (*gpu->psForce4)[i];
-        forces[order[i]] = Vec3(force.x, force.y, force.z);
-    }
-}
-void CudaUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3& a, Vec3& b, Vec3& c) const {
-    _gpuContext* gpu = data.gpu;
-    a = Vec3(gpu->sim.periodicBoxSizeX, 0, 0);
-    b = Vec3(0, gpu->sim.periodicBoxSizeY, 0);
-    c = Vec3(0, 0, gpu->sim.periodicBoxSizeZ);
-}
-void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const {
-    _gpuContext* gpu = data.gpu;
-    gpuSetPeriodicBoxSize(gpu, a[0], b[1], c[2]);
-    gpuSetConstants(gpu);
-}
-void CudaApplyConstraintsKernel::initialize(const System& system) {
-}
-void CudaApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
-    kApplyConstraints(data.gpu);
-}
-class CudaCalcHarmonicBondForceKernel::ForceInfo : public CudaForceInfo {
-public:
-    ForceInfo(const HarmonicBondForce& force) : force(force) {
-    }
-    int getNumParticleGroups() {
-        return force.getNumBonds();
-    }
-    void getParticlesInGroup(int index, std::vector<int>& particles) {
-        int particle1, particle2;
-        double length, k;
-        force.getBondParameters(index, particle1, particle2, length, k);
-        particles.resize(2);
-        particles[0] = particle1;
-        particles[1] = particle2;
-    }
-    bool areGroupsIdentical(int group1, int group2) {
-        int particle1, particle2;
-        double length1, length2, k1, k2;
-        force.getBondParameters(group1, particle1, particle2, length1, k1);
-        force.getBondParameters(group2, particle1, particle2, length2, k2);
-        return (length1 == length2 && k1 == k2);
-    }
-private:
-    const HarmonicBondForce& force;
-};
-CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() {
-}
-void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
-    data.hasBonds = true;
-    numBonds = force.getNumBonds();
-    vector<int> particle1(numBonds);
-    vector<int> particle2(numBonds);
-    vector<float> length(numBonds);
-    vector<float> k(numBonds);
-    for (int i = 0; i < numBonds; i++) {
-        double lengthValue, kValue;
-        force.getBondParameters(i, particle1[i], particle2[i], lengthValue, kValue);
-        length[i] = (float) lengthValue;
-        k[i] = (float) kValue;
-    }
-    gpuSetBondParameters(data.gpu, particle1, particle2, length, k);
-    data.gpu->forces.push_back(new ForceInfo(force));
-}
-double CudaCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    return 0.0;
-}
-class CudaCalcCustomBondForceKernel::ForceInfo : public CudaForceInfo {
-public:
-    ForceInfo(const CustomBondForce& force) : force(force) {
-    }
-    int getNumParticleGroups() {
-        return force.getNumBonds();
-    }
-    void getParticlesInGroup(int index, std::vector<int>& particles) {
-        int particle1, particle2;
-        vector<double> parameters;
-        force.getBondParameters(index, particle1, particle2, parameters);
-        particles.resize(2);
-        particles[0] = particle1;
-        particles[1] = particle2;
-    }
-    bool areGroupsIdentical(int group1, int group2) {
-        int particle1, particle2;
-        vector<double> parameters1, parameters2;
-        force.getBondParameters(group1, particle1, particle2, parameters1);
-        force.getBondParameters(group2, particle1, particle2, parameters2);
-        for (int i = 0; i < (int) parameters1.size(); i++)
-            if (parameters1[i] != parameters2[i])
-                return false;
-        return true;
-    }
-private:
-    const CustomBondForce& force;
-};
-CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
-}
-void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
-    numBonds = force.getNumBonds();
-    vector<int> particle1(numBonds);
-    vector<int> particle2(numBonds);
-    vector<vector<double> > params(numBonds);
-    for (int i = 0; i < numBonds; i++)
-        force.getBondParameters(i, particle1[i], particle2[i], params[i]);
-    vector<string> paramNames;
-    for (int i = 0; i < force.getNumPerBondParameters(); i++)
-        paramNames.push_back(force.getPerBondParameterName(i));
-    globalParamNames.resize(force.getNumGlobalParameters());
-    globalParamValues.resize(force.getNumGlobalParameters());
-    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
-        globalParamNames[i] = force.getGlobalParameterName(i);
-        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
-    }
-    gpuSetCustomBondParameters(data.gpu, particle1, particle2, params, force.getEnergyFunction(), paramNames, globalParamNames);
-    if (globalParamValues.size() > 0)
-        SetCustomBondGlobalParams(globalParamValues);
-    data.gpu->forces.push_back(new ForceInfo(force));
-}
-double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    updateGlobalParams(context);
-    kCalculateCustomBondForces(data.gpu);
-    return 0.0;
-}
-void CudaCalcCustomBondForceKernel::updateGlobalParams(ContextImpl& context) {
-    bool changed = false;
-    for (int i = 0; i < (int) globalParamNames.size(); i++) {
-        float value = (float) context.getParameter(globalParamNames[i]);
-        if (value != globalParamValues[i])
-            changed = true;
-        globalParamValues[i] = value;
-    }
-    if (changed)
-        SetCustomBondGlobalParams(globalParamValues);
-}
-class CudaCalcHarmonicAngleForceKernel::ForceInfo : public CudaForceInfo {
-public:
-    ForceInfo(const HarmonicAngleForce& force) : force(force) {
-    }
-    int getNumParticleGroups() {
-        return force.getNumAngles();
-    }
-    void getParticlesInGroup(int index, std::vector<int>& particles) {
-        int particle1, particle2, particle3;
-        double angle, k;
-        force.getAngleParameters(index, particle1, particle2, particle3, angle, k);
-        particles.resize(3);
-        particles[0] = particle1;
-        particles[1] = particle2;
-        particles[2] = particle3;
-    }
-    bool areGroupsIdentical(int group1, int group2) {
-        int particle1, particle2, particle3;
-        double angle1, angle2, k1, k2;
-        force.getAngleParameters(group1, particle1, particle2, particle3, angle1, k1);
-        force.getAngleParameters(group2, particle1, particle2, particle3, angle2, k2);
-        return (angle1 == angle2 && k1 == k2);
-    }
-private:
-    const HarmonicAngleForce& force;
-};
-CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() {
-}
-void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
-    data.hasAngles = true;
-    numAngles = force.getNumAngles();
-    const float RadiansToDegrees = (float) (180.0/3.14159265);
-    vector<int> particle1(numAngles);
-    vector<int> particle2(numAngles);
-    vector<int> particle3(numAngles);
-    vector<float> angle(numAngles);
-    vector<float> k(numAngles);
-    for (int i = 0; i < numAngles; i++) {
-        double angleValue, kValue;
-        force.getAngleParameters(i, particle1[i], particle2[i], particle3[i], angleValue, kValue);
-        angle[i] = (float) (angleValue*RadiansToDegrees);
-        k[i] = (float) kValue;
-    }
-    gpuSetBondAngleParameters(data.gpu, particle1, particle2, particle3, angle, k);
-    data.gpu->forces.push_back(new ForceInfo(force));
-}
-double CudaCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    return 0.0;
-}
-class CudaCalcCustomAngleForceKernel::ForceInfo : public CudaForceInfo {
-public:
-    ForceInfo(const CustomAngleForce& force) : force(force) {
-    }
-    int getNumParticleGroups() {
-        return force.getNumAngles();
-    }
-    void getParticlesInGroup(int index, std::vector<int>& particles) {
-        int particle1, particle2, particle3;
-        vector<double> parameters;
-        force.getAngleParameters(index, particle1, particle2, particle3, parameters);
-        particles.resize(3);
-        particles[0] = particle1;
-        particles[1] = particle2;
-        particles[2] = particle3;
-    }
-    bool areGroupsIdentical(int group1, int group2) {
-        int particle1, particle2, particle3;
-        vector<double> parameters1, parameters2;
-        force.getAngleParameters(group1, particle1, particle2, particle3, parameters1);
-        force.getAngleParameters(group2, particle1, particle2, particle3, parameters2);
-        for (int i = 0; i < (int) parameters1.size(); i++)
-            if (parameters1[i] != parameters2[i])
-                return false;
-        return true;
-    }
-private:
-    const CustomAngleForce& force;
-};
-CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
-}
-void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
-    numAngles = force.getNumAngles();
-    vector<int> particle1(numAngles);
-    vector<int> particle2(numAngles);
-    vector<int> particle3(numAngles);
-    vector<vector<double> > params(numAngles);
-    for (int i = 0; i < numAngles; i++)
-        force.getAngleParameters(i, particle1[i], particle2[i], particle3[i], params[i]);
-    vector<string> paramNames;
-    for (int i = 0; i < force.getNumPerAngleParameters(); i++)
-        paramNames.push_back(force.getPerAngleParameterName(i));
-    globalParamNames.resize(force.getNumGlobalParameters());
-    globalParamValues.resize(force.getNumGlobalParameters());
-    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
-        globalParamNames[i] = force.getGlobalParameterName(i);
-        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
-    }
-    gpuSetCustomAngleParameters(data.gpu, particle1, particle2, particle3, params, force.getEnergyFunction(), paramNames, globalParamNames);
-    if (globalParamValues.size() > 0)
-        SetCustomAngleGlobalParams(globalParamValues);
-    data.gpu->forces.push_back(new ForceInfo(force));
-}
-double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    updateGlobalParams(context);
-    kCalculateCustomAngleForces(data.gpu);
-    return 0.0;
-}
-void CudaCalcCustomAngleForceKernel::updateGlobalParams(ContextImpl& context) {
-    bool changed = false;
-    for (int i = 0; i < (int) globalParamNames.size(); i++) {
-        float value = (float) context.getParameter(globalParamNames[i]);
-        if (value != globalParamValues[i])
-            changed = true;
-        globalParamValues[i] = value;
-    }
-    if (changed)
-        SetCustomAngleGlobalParams(globalParamValues);
-}
-class CudaCalcPeriodicTorsionForceKernel::ForceInfo : public CudaForceInfo {
-public:
-    ForceInfo(const PeriodicTorsionForce& force) : force(force) {
-    }
-    int getNumParticleGroups() {
-        return force.getNumTorsions();
-    }
-    void getParticlesInGroup(int index, std::vector<int>& particles) {
-        int particle1, particle2, particle3, particle4, periodicity;
-        double phase, k;
-        force.getTorsionParameters(index, particle1, particle2, particle3, particle4, periodicity, phase, k);
-        particles.resize(4);
-        particles[0] = particle1;
-        particles[1] = particle2;
-        particles[2] = particle3;
-        particles[3] = particle4;
-    }
-    bool areGroupsIdentical(int group1, int group2) {
-        int particle1, particle2, particle3, particle4, periodicity1, periodicity2;
-        double phase1, phase2, k1, k2;
-        force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, periodicity1, phase1, k1);
-        force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, periodicity2, phase2, k2);
-        return (periodicity1 == periodicity2 && phase1 == phase2 && k1 == k2);
-    }
-private:
-    const PeriodicTorsionForce& force;
-};
-CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
-}
-void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
-    data.hasPeriodicTorsions = true;
-    numTorsions = force.getNumTorsions();
-    const float RadiansToDegrees = (float)(180.0/3.14159265);
-    vector<int> particle1(numTorsions);
-    vector<int> particle2(numTorsions);
-    vector<int> particle3(numTorsions);
-    vector<int> particle4(numTorsions);
-    vector<float> k(numTorsions);
-    vector<float> phase(numTorsions);
-    vector<int> periodicity(numTorsions);
-    for (int i = 0; i < numTorsions; i++) {
-        double kValue, phaseValue;
-        force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], periodicity[i], phaseValue, kValue);
-        k[i] = (float) kValue;
-        phase[i] = (float) (phaseValue*RadiansToDegrees);
-    }
-    gpuSetDihedralParameters(data.gpu, particle1, particle2, particle3, particle4, k, phase, periodicity);
-    data.gpu->forces.push_back(new ForceInfo(force));
-}
-double CudaCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    return 0.0;
-}
-class CudaCalcRBTorsionForceKernel::ForceInfo : public CudaForceInfo {
-public:
-    ForceInfo(const RBTorsionForce& force) : force(force) {
-    }
-    int getNumParticleGroups() {
-        return force.getNumTorsions();
-    }
-    void getParticlesInGroup(int index, std::vector<int>& particles) {
-        int particle1, particle2, particle3, particle4;
-        double c0, c1, c2, c3, c4, c5;
-        force.getTorsionParameters(index, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
-        particles.resize(4);
-        particles[0] = particle1;
-        particles[1] = particle2;
-        particles[2] = particle3;
-        particles[3] = particle4;
-    }
-    bool areGroupsIdentical(int group1, int group2) {
-        int particle1, particle2, particle3, particle4;
-        double c0a, c0b, c1a, c1b, c2a, c2b, c3a, c3b, c4a, c4b, c5a, c5b;
-        force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, c0a, c1a, c2a, c3a, c4a, c5a);
-        force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, c0b, c1b, c2b, c3b, c4b, c5b);
-        return (c0a == c0b && c1a == c1b && c2a == c2b && c3a == c3b && c4a == c4b && c5a == c5b);
-    }
-private:
-    const RBTorsionForce& force;
-};
-CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
-}
-void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
-    data.hasRB = true;
-    numTorsions = force.getNumTorsions();
-    vector<int> particle1(numTorsions);
-    vector<int> particle2(numTorsions);
-    vector<int> particle3(numTorsions);
-    vector<int> particle4(numTorsions);
-    vector<float> c0(numTorsions);
-    vector<float> c1(numTorsions);
-    vector<float> c2(numTorsions);
-    vector<float> c3(numTorsions);
-    vector<float> c4(numTorsions);
-    vector<float> c5(numTorsions);
-    for (int i = 0; i < numTorsions; i++) {
-        double c[6];
-        force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], c[0], c[1], c[2], c[3], c[4], c[5]);
-        c0[i] = (float) c[0];
-        c1[i] = (float) c[1];
-        c2[i] = (float) c[2];
-        c3[i] = (float) c[3];
-        c4[i] = (float) c[4];
-        c5[i] = (float) c[5];
-    }
-    gpuSetRbDihedralParameters(data.gpu, particle1, particle2, particle3, particle4, c0, c1, c2, c3, c4, c5);
-    data.gpu->forces.push_back(new ForceInfo(force));
-}
-double CudaCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    return 0.0;
-}
-class CudaCalcCMAPTorsionForceKernel::ForceInfo : public CudaForceInfo {
-public:
-    ForceInfo(const CMAPTorsionForce& force) : force(force) {
-    }
-    int getNumParticleGroups() {
-        return force.getNumTorsions();
-    }
-    void getParticlesInGroup(int index, std::vector<int>& particles) {
-        int map, a1, a2, a3, a4, b1, b2, b3, b4;
-        force.getTorsionParameters(index, map, a1, a2, a3, a4, b1, b2, b3, b4);
-        particles.resize(8);
-        particles[0] = a1;
-        particles[1] = a2;
-        particles[2] = a3;
-        particles[3] = a4;
-        particles[4] = b1;
-        particles[5] = b2;
-        particles[6] = b3;
-        particles[7] = b4;
-    }
-    bool areGroupsIdentical(int group1, int group2) {
-        int map1, map2, a1, a2, a3, a4, b1, b2, b3, b4;
-        force.getTorsionParameters(group1, map1, a1, a2, a3, a4, b1, b2, b3, b4);
-        force.getTorsionParameters(group2, map2, a1, a2, a3, a4, b1, b2, b3, b4);
-        return (map1 == map2);
-    }
-private:
-    const CMAPTorsionForce& force;
-};
-CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() {
-    if (coefficients != NULL)
-        delete coefficients;
-    if (mapPositions != NULL)
-        delete mapPositions;
-    if (torsionMaps != NULL)
-        delete torsionMaps;
-    if (torsionIndices != NULL)
-        delete torsionIndices;
-}
-void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
-    numTorsions = force.getNumTorsions();
-    if (numTorsions == 0)
-        return;
-    int numMaps = force.getNumMaps();
-    vector<float4> coeffVec;
-    vector<int2> mapPositionsVec(numMaps);
-    vector<double> energy;
-    vector<vector<double> > c;
-    int currentPosition = 0;
-    mapPositions = new CUDAStream<int2>(numMaps, 1, "cmapTorsionMapPositions");
-    for (int i = 0; i < numMaps; i++) {
-        int size;
-        force.getMapParameters(i, size, energy);
-        CMAPTorsionForceImpl::calcMapDerivatives(size, energy, c);
-        (*mapPositions)[i] = make_int2(currentPosition, size);
-        currentPosition += 4*size*size;
-        for (int j = 0; j < size*size; j++) {
-            coeffVec.push_back(make_float4(c[j][0], c[j][1], c[j][2], c[j][3]));
-            coeffVec.push_back(make_float4(c[j][4], c[j][5], c[j][6], c[j][7]));
-            coeffVec.push_back(make_float4(c[j][8], c[j][9], c[j][10], c[j][11]));
-            coeffVec.push_back(make_float4(c[j][12], c[j][13], c[j][14], c[j][15]));
-        }
-    }
-    coefficients = new CUDAStream<float4>((int) coeffVec.size(), 1, "cmapTorsionCoefficients");;
-    for (int i = 0; i < (int) coeffVec.size(); i++)
-        (*coefficients)[i] = coeffVec[i];
-    torsionMaps = new CUDAStream<int>(numTorsions, 1, "cmapTorsionMaps");
-    torsionIndices = new CUDAStream<int4>(4*numTorsions, 1, "cmapTorsionIndices");
-    vector<int> forceBufferCounter(system.getNumParticles(), 0);
-    for (int i = 0; i < numTorsions; i++) {
-        int map, a1, a2, a3, a4, b1, b2, b3, b4;
-        force.getTorsionParameters(i, map, a1, a2, a3, a4, b1, b2, b3, b4);
-        (*torsionMaps)[i] = map;
-        (*torsionIndices)[i*4] = make_int4(a1, a2, a3, a4);
-        (*torsionIndices)[i*4+1] = make_int4(b1, b2, b3, b4);
-        (*torsionIndices)[i*4+2] = make_int4(forceBufferCounter[a1]++, forceBufferCounter[a2]++, forceBufferCounter[a3]++, forceBufferCounter[a4]++);
-        (*torsionIndices)[i*4+3] = make_int4(forceBufferCounter[b1]++, forceBufferCounter[b2]++, forceBufferCounter[b3]++, forceBufferCounter[b4]++);
-    }
-    coefficients->Upload();
-    mapPositions->Upload();
-    torsionMaps->Upload();
-    torsionIndices->Upload();
-    int maxBuffers = 1;
-    for (int i = 0; i < (int) forceBufferCounter.size(); i++)
-        maxBuffers = max(maxBuffers, forceBufferCounter[i]);
-    if (maxBuffers > data.gpu->sim.outputBuffers)
-        data.gpu->sim.outputBuffers = maxBuffers;
-    data.gpu->forces.push_back(new ForceInfo(force));
-}
-double CudaCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    if( numTorsions )
-        kCalculateCMAPTorsionForces(data.gpu, *coefficients, *mapPositions, *torsionIndices, *torsionMaps);
-    return 0.0;
-}
-class CudaCalcCustomTorsionForceKernel::ForceInfo : public CudaForceInfo {
-public:
-    ForceInfo(const CustomTorsionForce& force) : force(force) {
-    }
-    int getNumParticleGroups() {
-        return force.getNumTorsions();
-    }
-    void getParticlesInGroup(int index, std::vector<int>& particles) {
-        int particle1, particle2, particle3, particle4;
-        vector<double> parameters;
-        force.getTorsionParameters(index, particle1, particle2, particle3, particle4, parameters);
-        particles.resize(4);
-        particles[0] = particle1;
-        particles[1] = particle2;
-        particles[2] = particle3;
-        particles[3] = particle4;
-    }
-    bool areGroupsIdentical(int group1, int group2) {
-        int particle1, particle2, particle3, particle4;
-        vector<double> parameters1, parameters2;
-        force.getTorsionParameters(group1, particle1, particle2, particle3, particle4, parameters1);
-        force.getTorsionParameters(group2, particle1, particle2, particle3, particle4, parameters2);
-        for (int i = 0; i < (int) parameters1.size(); i++)
-            if (parameters1[i] != parameters2[i])
-                return false;
-        return true;
-    }
-private:
-    const CustomTorsionForce& force;
-};
-CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
-}
-void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
-    numTorsions = force.getNumTorsions();
-    vector<int> particle1(numTorsions);
-    vector<int> particle2(numTorsions);
-    vector<int> particle3(numTorsions);
-    vector<int> particle4(numTorsions);
-    vector<vector<double> > params(numTorsions);
-    for (int i = 0; i < numTorsions; i++)
-        force.getTorsionParameters(i, particle1[i], particle2[i], particle3[i], particle4[i], params[i]);
-    vector<string> paramNames;
-    for (int i = 0; i < force.getNumPerTorsionParameters(); i++)
-        paramNames.push_back(force.getPerTorsionParameterName(i));
-    globalParamNames.resize(force.getNumGlobalParameters());
-    globalParamValues.resize(force.getNumGlobalParameters());
-    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
-        globalParamNames[i] = force.getGlobalParameterName(i);
-        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
-    }
-    gpuSetCustomTorsionParameters(data.gpu, particle1, particle2, particle3, particle4, params, force.getEnergyFunction(), paramNames, globalParamNames);
-    if (globalParamValues.size() > 0)
-        SetCustomTorsionGlobalParams(globalParamValues);
-    data.gpu->forces.push_back(new ForceInfo(force));
-}
-double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    updateGlobalParams(context);
-    kCalculateCustomTorsionForces(data.gpu);
-    return 0.0;
-}
-void CudaCalcCustomTorsionForceKernel::updateGlobalParams(ContextImpl& context) {
-    bool changed = false;
-    for (int i = 0; i < (int) globalParamNames.size(); i++) {
-        float value = (float) context.getParameter(globalParamNames[i]);
-        if (value != globalParamValues[i])
-            changed = true;
-        globalParamValues[i] = value;
-    }
-    if (changed)
-        SetCustomTorsionGlobalParams(globalParamValues);
-}
-class CudaCalcNonbondedForceKernel::ForceInfo : public CudaForceInfo {
-public:
-    ForceInfo(const NonbondedForce& force) : force(force) {
-    }
-    bool areParticlesIdentical(int particle1, int particle2) {
-        double charge1, charge2, sigma1, sigma2, epsilon1, epsilon2;
-        force.getParticleParameters(particle1, charge1, sigma1, epsilon1);
-        force.getParticleParameters(particle2, charge2, sigma2, epsilon2);
-        return (charge1 == charge2 && sigma1 == sigma2 && epsilon1 == epsilon2);
-    }
-    int getNumParticleGroups() {
-        return force.getNumExceptions();
-    }
-    void getParticlesInGroup(int index, std::vector<int>& particles) {
-        int particle1, particle2;
-        double chargeProd, sigma, epsilon;
-        force.getExceptionParameters(index, particle1, particle2, chargeProd, sigma, epsilon);
-        particles.resize(2);
-        particles[0] = particle1;
-        particles[1] = particle2;
-    }
-    bool areGroupsIdentical(int group1, int group2) {
-        int particle1, particle2;
-        double chargeProd1, chargeProd2, sigma1, sigma2, epsilon1, epsilon2;
-        force.getExceptionParameters(group1, particle1, particle2, chargeProd1, sigma1, epsilon1);
-        force.getExceptionParameters(group2, particle1, particle2, chargeProd2, sigma2, epsilon2);
-        return (chargeProd1 == chargeProd2 && sigma1 == sigma2 && epsilon1 == epsilon2);
-    }
-private:
-    const NonbondedForce& force;
-};
-CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
-}
-void CudaCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
-    data.hasNonbonded = true;
-    numParticles = force.getNumParticles();
-    _gpuContext* gpu = data.gpu;
-    // Identify which exceptions are 1-4 interactions.
-    vector<pair<int, int> > exclusions;
-    vector<int> exceptions;
-    for (int i = 0; i < force.getNumExceptions(); i++) {
-        int particle1, particle2;
-        double chargeProd, sigma, epsilon;
-        force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
-        exclusions.push_back(pair<int, int>(particle1, particle2));
-        if (chargeProd != 0.0 || epsilon != 0.0)
-            exceptions.push_back(i);
-    }
-    // Initialize nonbonded interactions.
-    {
-        vector<int> particle(numParticles);
-        vector<float> c6(numParticles);
-        vector<float> c12(numParticles);
-        vector<float> q(numParticles);
-        vector<char> symbol;
-        vector<vector<int> > exclusionList(numParticles);
-        for (int i = 0; i < numParticles; i++) {
-            double charge, radius, depth;
-            force.getParticleParameters(i, charge, radius, depth);
-            particle[i] = i;
-            q[i] = (float) charge;
-            c6[i] = (float) (4*depth*pow(radius, 6.0));
-            c12[i] = (float) (4*depth*pow(radius, 12.0));
-            exclusionList[i].push_back(i);
-        }
-        for (int i = 0; i < (int)exclusions.size(); i++) {
-            exclusionList[exclusions[i].first].push_back(exclusions[i].second);
-            exclusionList[exclusions[i].second].push_back(exclusions[i].first);
-        }
-        CudaNonbondedMethod method = NO_CUTOFF;
-        if (force.getNonbondedMethod() != NonbondedForce::NoCutoff) {
-            gpuSetNonbondedCutoff(gpu, (float) force.getCutoffDistance(), (float) force.getReactionFieldDielectric());
-            method = CUTOFF;
-        }
-        if (force.getNonbondedMethod() == NonbondedForce::CutoffPeriodic) {
-            method = PERIODIC;
-        }
-        if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
-            if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
-                double alpha;
-                int kmaxx, kmaxy, kmaxz;
-                NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
-                gpuSetEwaldParameters(gpu, (float) alpha, kmaxx, kmaxy, kmaxz);
-                method = EWALD;
-            }
-            else {
-                double alpha;
-                int gridSizeX, gridSizeY, gridSizeZ;
-                NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
-                gpuSetPMEParameters(gpu, (float) alpha, gridSizeX, gridSizeY, gridSizeZ);
-                method = PARTICLE_MESH_EWALD;
-            }
-        }
-        data.nonbondedMethod = method;
-        gpuSetCoulombParameters(gpu, (float) ONE_4PI_EPS0, particle, c6, c12, q, symbol, exclusionList, method);
-        // Compute the Ewald self energy.
-        data.ewaldSelfEnergy = 0.0;
-        if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
-            double selfEnergyScale = gpu->sim.epsfac*gpu->sim.alphaEwald/std::sqrt(PI);
-                for (int i = 0; i < numParticles; i++)
-                    data.ewaldSelfEnergy -= selfEnergyScale*q[i]*q[i];
-        }
-        // Compute the long range dispersion correction.
-        if (force.getUseDispersionCorrection())
-            data.dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
-        else
-            data.dispersionCoefficient = 0.0;
-    }
-    // Initialize 1-4 nonbonded interactions.
-    {
-        int numExceptions = exceptions.size();
-        vector<int> particle1(numExceptions);
-        vector<int> particle2(numExceptions);
-        vector<float> c6(numExceptions);
-        vector<float> c12(numExceptions);
-        vector<float> q1(numExceptions);
-        vector<float> q2(numExceptions);
-        for (int i = 0; i < numExceptions; i++) {
-            double charge, sig, eps;
-            force.getExceptionParameters(exceptions[i], particle1[i], particle2[i], charge, sig, eps);
-            c6[i] = (float) (4*eps*pow(sig, 6.0));
-            c12[i] = (float) (4*eps*pow(sig, 12.0));
-            q1[i] = (float) charge;
-            q2[i] = 1.0f;
-        }
-        gpuSetLJ14Parameters(gpu, (float) ONE_4PI_EPS0, 1.0f, particle1, particle2, c6, c12, q1, q2);
-    }
-    data.gpu->forces.push_back(new ForceInfo(force));
-}
-double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    return 0.0;
-}
-class CudaCalcCustomNonbondedForceKernel::ForceInfo : public CudaForceInfo {
-public:
-    ForceInfo(const CustomNonbondedForce& force) : force(force) {
-    }
-    bool areParticlesIdentical(int particle1, int particle2) {
-        vector<double> params1;
-        vector<double> params2;
-        force.getParticleParameters(particle1, params1);
-        force.getParticleParameters(particle2, params2);
-        for (int i = 0; i < (int) params1.size(); i++)
-            if (params1[i] != params2[i])
-                return false;
-        return true;
-    }
-    int getNumParticleGroups() {
-        return force.getNumExclusions();
-    }
-    void getParticlesInGroup(int index, std::vector<int>& particles) {
-        int particle1, particle2;
-        force.getExclusionParticles(index, particle1, particle2);
-        particles.resize(2);
-        particles[0] = particle1;
-        particles[1] = particle2;
-    }
-    bool areGroupsIdentical(int group1, int group2) {
-        return true;
-    }
-private:
-    const CustomNonbondedForce& force;
-};
-CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
-}
-void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const CustomNonbondedForce& force) {
-    data.hasCustomNonbonded = true;
-    numParticles = force.getNumParticles();
-    _gpuContext* gpu = data.gpu;
-    // Initialize nonbonded interactions.
-    vector<int> particle(numParticles);
-    vector<vector<double> > parameters(numParticles);
-    vector<vector<int> > exclusionList(numParticles);
-    for (int i = 0; i < numParticles; i++) {
-        force.getParticleParameters(i, parameters[i]);
-        particle[i] = i;
-        exclusionList[i].push_back(i);
-    }
-    for (int i = 0; i < force.getNumExclusions(); i++) {
-        int particle1, particle2;
-        force.getExclusionParticles(i, particle1, particle2);
-        exclusionList[particle1].push_back(particle2);
-        exclusionList[particle2].push_back(particle1);
-    }
-    CudaNonbondedMethod method = NO_CUTOFF;
-    if (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff)
-        method = CUTOFF;
-    if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic) {
-        method = PERIODIC;
-    }
-    data.customNonbondedMethod = method;
-    // Record the tabulated functions.
-    for (int i = 0; i < force.getNumFunctions(); i++) {
-        string name;
-        vector<double> values;
-        double min, max;
-        force.getFunctionParameters(i, name, values, min, max);
-        gpuSetTabulatedFunction(gpu, i, name, values, min, max);
-    }
-    // Record information for the expressions.
-    vector<string> paramNames;
-    for (int i = 0; i < force.getNumPerParticleParameters(); i++)
-        paramNames.push_back(force.getPerParticleParameterName(i));
-    globalParamNames.resize(force.getNumGlobalParameters());
-    globalParamValues.resize(force.getNumGlobalParameters());
-    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
-        globalParamNames[i] = force.getGlobalParameterName(i);
-        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
-    }
-    gpuSetCustomNonbondedParameters(gpu, parameters, exclusionList, method, (float) force.getCutoffDistance(), force.getEnergyFunction(), paramNames, globalParamNames);
-    if (globalParamValues.size() > 0)
-        SetCustomNonbondedGlobalParams(globalParamValues);
-    data.gpu->forces.push_back(new ForceInfo(force));
-}
-double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    updateGlobalParams(context);
-    return 0.0;
-}
-void CudaCalcCustomNonbondedForceKernel::updateGlobalParams(ContextImpl& context) {
-    bool changed = false;
-    for (int i = 0; i < (int) globalParamNames.size(); i++) {
-        float value = (float) context.getParameter(globalParamNames[i]);
-        if (value != globalParamValues[i])
-            changed = true;
-        globalParamValues[i] = value;
-    }
-    if (changed)
-        SetCustomNonbondedGlobalParams(globalParamValues);
-}
-class CudaCalcGBSAOBCForceKernel::ForceInfo : public CudaForceInfo {
-public:
-    ForceInfo(const GBSAOBCForce& force) : force(force) {
-    }
-    bool areParticlesIdentical(int particle1, int particle2) {
-        double charge1, charge2, radius1, radius2, scale1, scale2;
-        force.getParticleParameters(particle1, charge1, radius1, scale1);
-        force.getParticleParameters(particle2, charge2, radius2, scale2);
-        return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2);
-    }
-private:
-    const GBSAOBCForce& force;
-};
-CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
-}
-void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
-    int numParticles = system.getNumParticles();
-    _gpuContext* gpu = data.gpu;
-    vector<float> radius(numParticles);
-    vector<float> scale(numParticles);
-    vector<float> charge(numParticles);
-    for (int i = 0; i < numParticles; i++) {
-        double particleCharge, particleRadius, scalingFactor;
-        force.getParticleParameters(i, particleCharge, particleRadius, scalingFactor);
-        radius[i] = (float) particleRadius;
-        scale[i] = (float) scalingFactor;
-        charge[i] = (float) particleCharge;
-    }
-    gpuSetObcParameters(gpu, (float) force.getSoluteDielectric(), (float) force.getSolventDielectric(), radius, scale, charge);
-    data.gpu->forces.push_back(new ForceInfo(force));
-}
-double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-	return 0.0;
-}
-class CudaCalcGBVIForceKernel::ForceInfo : public CudaForceInfo {
-public:
-    ForceInfo(const GBVIForce& force) : force(force) {
-    }
-    bool areParticlesIdentical(int particle1, int particle2) {
-        double charge1, charge2, radius1, radius2, gamma1, gamma2;
-        force.getParticleParameters(particle1, charge1, radius1, gamma1);
-        force.getParticleParameters(particle2, charge2, radius2, gamma2);
-        return (charge1 == charge2 && radius1 == radius2 && gamma1 == gamma2);
-    }
-private:
-    const GBVIForce& force;
-};
-CudaCalcGBVIForceKernel::~CudaCalcGBVIForceKernel() {
-}
-void CudaCalcGBVIForceKernel::initialize(const System& system, const GBVIForce& force, const std::vector<double> & inputScaledRadii) {
-    int numParticles = system.getNumParticles();
-    _gpuContext* gpu = data.gpu;
-    vector<int> particle(numParticles);
-    vector<float> radius(numParticles);
-    vector<float> scaledRadii(numParticles);
-    vector<float> gammas(numParticles);
-    for (int i = 0; i < numParticles; i++) {
-        double charge, particleRadius, gamma;
-        force.getParticleParameters(i, charge, particleRadius, gamma );
-        particle[i]                  = i;
-        radius[i]                    = (float) particleRadius;
-        gammas[i]                    = (float) gamma;
-        scaledRadii[i]               = (float) inputScaledRadii[i];
-    }
-    gpuSetGBVIParameters(gpu, (float) force.getSoluteDielectric(), (float) force.getSolventDielectric(), particle,
-                         radius, gammas, scaledRadii );
-    data.gpu->forces.push_back(new ForceInfo(force));
-}
-double CudaCalcGBVIForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    return 0.0;
-}
-class CudaCalcCustomExternalForceKernel::ForceInfo : public CudaForceInfo {
-public:
-    ForceInfo(const CustomExternalForce& force, int numParticles) : force(force), indices(numParticles, -1) {
-        vector<double> params;
-        for (int i = 0; i < force.getNumParticles(); i++) {
-            int particle;
-            force.getParticleParameters(i, particle, params);
-            indices[particle] = i;
-        }
-    }
-    bool areParticlesIdentical(int particle1, int particle2) {
-        particle1 = indices[particle1];
-        particle2 = indices[particle2];
-        if (particle1 == -1 && particle2 == -1)
-            return true;
-        if (particle1 == -1 || particle2 == -1)
-            return false;
-        int temp;
-        vector<double> params1;
-        vector<double> params2;
-        force.getParticleParameters(particle1, temp, params1);
-        force.getParticleParameters(particle2, temp, params2);
-        for (int i = 0; i < (int) params1.size(); i++)
-            if (params1[i] != params2[i])
-                return false;
-        return true;
-    }
-private:
-    const CustomExternalForce& force;
-    vector<int> indices;
-};
-CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
-}
-void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
-    numParticles = force.getNumParticles();
-    vector<int> particle(numParticles);
-    vector<vector<double> > params(numParticles);
-    for (int i = 0; i < numParticles; i++)
-        force.getParticleParameters(i, particle[i], params[i]);
-    vector<string> paramNames;
-    for (int i = 0; i < force.getNumPerParticleParameters(); i++)
-        paramNames.push_back(force.getPerParticleParameterName(i));
-    globalParamNames.resize(force.getNumGlobalParameters());
-    globalParamValues.resize(force.getNumGlobalParameters());
-    for (int i = 0; i < force.getNumGlobalParameters(); i++) {
-        globalParamNames[i] = force.getGlobalParameterName(i);
-        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
-    }
-    gpuSetCustomExternalParameters(data.gpu, particle, params, force.getEnergyFunction(), paramNames, globalParamNames);
-    if (globalParamValues.size() > 0)
-        SetCustomExternalGlobalParams(globalParamValues);
-    data.gpu->forces.push_back(new ForceInfo(force, system.getNumParticles()));
-}
-double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    updateGlobalParams(context);
-    kCalculateCustomExternalForces(data.gpu);
-    return 0.0;
-}
-void CudaCalcCustomExternalForceKernel::updateGlobalParams(ContextImpl& context) {
-    bool changed = false;
-    for (int i = 0; i < (int) globalParamNames.size(); i++) {
-        float value = (float) context.getParameter(globalParamNames[i]);
-        if (value != globalParamValues[i])
-            changed = true;
-        globalParamValues[i] = value;
-    }
-    if (changed)
-        SetCustomExternalGlobalParams(globalParamValues);
-}
-void OPENMMCUDA_EXPORT OpenMM::cudaOpenMMInitializeIntegration(const System& system, CudaPlatform::PlatformData& data, const Integrator& integrator) {
-    // Initialize any terms that haven't already been handled by a Force.
-    _gpuContext* gpu = data.gpu;
-    if (!data.hasBonds)
-        gpuSetBondParameters(gpu, vector<int>(), vector<int>(), vector<float>(), vector<float>());
-    if (!data.hasAngles)
-        gpuSetBondAngleParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>());
-    if (!data.hasPeriodicTorsions)
-        gpuSetDihedralParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>(), vector<int>());
-    if (!data.hasRB)
-        gpuSetRbDihedralParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>(),
-                vector<float>(), vector<float>(), vector<float>(), vector<float>());
-    if (!data.hasNonbonded) {
-        gpuSetCoulombParameters(gpu, (float) ONE_4PI_EPS0, vector<int>(), vector<float>(), vector<float>(), vector<float>(), vector<char>(), vector<vector<int> >(), NO_CUTOFF);
-        gpuSetLJ14Parameters(gpu, (float) ONE_4PI_EPS0, 1.0f, vector<int>(), vector<int>(), vector<float>(), vector<float>(), vector<float>(), vector<float>());
-        if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
-            throw OpenMMException("CudaPlatform requires GBSAOBCForce and GBVIForce to be used with a NonbondedForce");
-    }
-    // Set masses.
-    int numParticles = system.getNumParticles();
-    vector<float> mass(numParticles);
-    for (int i = 0; i < numParticles; i++)
-        mass[i] = (float) system.getParticleMass(i);
-    gpuSetMass(gpu, mass);
-    // Set constraints.
-    int numConstraints = system.getNumConstraints();
-    vector<int> particle1(numConstraints);
-    vector<int> particle2(numConstraints);
-    vector<float> distance(numConstraints);
-    vector<float> invMass1(numConstraints);
-    vector<float> invMass2(numConstraints);
-    for (int i = 0; i < numConstraints; i++) {
-        int particle1Index, particle2Index;
-        double constraintDistance;
-        system.getConstraintParameters(i, particle1Index, particle2Index, constraintDistance);
-        particle1[i] = particle1Index;
-        particle2[i] = particle2Index;
-        distance[i] = (float) constraintDistance;
-        invMass1[i] = 1.0f/mass[particle1Index];
-        invMass2[i] = 1.0f/mass[particle2Index];
-    }
-    gpuSetConstraintParameters(gpu, particle1, particle2, distance, invMass1, invMass2, (float)integrator.getConstraintTolerance());
-    // Finish initialization.
-    gpuBuildThreadBlockWorkList(gpu);
-    gpuBuildExclusionList(gpu);
-    gpuBuildOutputBuffers(gpu);
-    gpuSetConstants(gpu);
-    if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
-        kClearBornSumAndForces(gpu);
-    else
-        kClearForces(gpu);
-    cudaThreadSynchronize();
-}
-CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() {
-}
-void CudaIntegrateVerletStepKernel::initialize(const System& system, const VerletIntegrator& integrator) {
-    cudaOpenMMInitializeIntegration(system, data, integrator);
-    prevStepSize = -1.0;
-}
-void CudaIntegrateVerletStepKernel::execute(ContextImpl& context, const VerletIntegrator& integrator) {
-    _gpuContext* gpu = data.gpu;
-    double stepSize = integrator.getStepSize();
-    if (stepSize != prevStepSize) {
-        // Initialize the GPU parameters.
-        gpuSetVerletIntegrationParameters(gpu, (float) stepSize, 0.0f);
-        gpuSetConstants(gpu);
-        prevStepSize = stepSize;
-    }
-    kVerletUpdatePart1(gpu);
-    kApplyShake(gpu);
-    kApplySettle(gpu);
-    kApplyCCMA(gpu);
-    if (data.removeCM)
-        if (data.stepCount%data.cmMotionFrequency == 0)
-            gpu->bCalculateCM = true;
-    kVerletUpdatePart2(gpu);
-    data.time += stepSize;
-    data.stepCount++;
-}
-CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() {
-}
-void CudaIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
-    cudaOpenMMInitializeIntegration(system, data, integrator);
-    _gpuContext* gpu = data.gpu;
-    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
-    gpuInitializeRandoms(gpu);
-    prevTemp = -1.0;
-    prevFriction = -1.0;
-    prevStepSize = -1.0;
-}
-void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const LangevinIntegrator& integrator) {
-    _gpuContext* gpu = data.gpu;
-    double temperature = integrator.getTemperature();
-    double friction = integrator.getFriction();
-    double stepSize = integrator.getStepSize();
-    if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
-        // Initialize the GPU parameters.
-        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
-        gpuSetLangevinIntegrationParameters(gpu, (float) tau, (float) stepSize, (float) temperature, 0.0f);
-        gpuSetConstants(gpu);
-        kGenerateRandoms(gpu);
-        prevTemp = temperature;
-        prevFriction = friction;
-        prevStepSize = stepSize;
-    }
-    kLangevinUpdatePart1(gpu);
-    if (data.removeCM)
-        if (data.stepCount%data.cmMotionFrequency == 0)
-            gpu->bCalculateCM = true;
-    kLangevinUpdatePart2(gpu);
-    kApplyShake(gpu);
-    kApplySettle(gpu);
-    kApplyCCMA(gpu);
-    kSetVelocitiesFromPositions(gpu);
-    data.time += stepSize;
-    data.stepCount++;
-}
-CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() {
-}
-void CudaIntegrateBrownianStepKernel::initialize(const System& system, const BrownianIntegrator& integrator) {
-    cudaOpenMMInitializeIntegration(system, data, integrator);
-    _gpuContext* gpu = data.gpu;
-    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
-    gpuInitializeRandoms(gpu);
-    prevTemp = -1.0;
-    prevFriction = -1.0;
-    prevStepSize = -1.0;
-}
-void CudaIntegrateBrownianStepKernel::execute(ContextImpl& context, const BrownianIntegrator& integrator) {
-    _gpuContext* gpu = data.gpu;
-    double temperature = integrator.getTemperature();
-    double friction = integrator.getFriction();
-    double stepSize = integrator.getStepSize();
-    if (temperature != prevTemp || friction != prevFriction || stepSize != prevStepSize) {
-        // Initialize the GPU parameters.
-        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
-        gpuSetBrownianIntegrationParameters(gpu, (float) tau, (float) stepSize, (float) temperature);
-        gpuSetConstants(gpu);
-        kGenerateRandoms(gpu);
-        prevTemp = temperature;
-        prevFriction = friction;
-        prevStepSize = stepSize;
-    }
-    kBrownianUpdatePart1(gpu);
-    kApplyShake(gpu);
-    kApplySettle(gpu);
-    kApplyCCMA(gpu);
-    if (data.removeCM)
-        if (data.stepCount%data.cmMotionFrequency == 0)
-            gpu->bCalculateCM = true;
-    kBrownianUpdatePart2(gpu);
-    data.time += stepSize;
-    data.stepCount++;
-}
-CudaIntegrateVariableVerletStepKernel::~CudaIntegrateVariableVerletStepKernel() {
-}
-void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, const VariableVerletIntegrator& integrator) {
-    cudaOpenMMInitializeIntegration(system, data, integrator);
-    prevErrorTol = -1.0;
-}
-void CudaIntegrateVariableVerletStepKernel::execute(ContextImpl& context, const VariableVerletIntegrator& integrator, double maxTime) {
-    _gpuContext* gpu = data.gpu;
-    double errorTol = integrator.getErrorTolerance();
-    if (errorTol != prevErrorTol) {
-        // Initialize the GPU parameters.
-        gpuSetVerletIntegrationParameters(gpu, 0.0f, (float) errorTol);
-        gpuSetConstants(gpu);
-        prevErrorTol = errorTol;
-    }
-    float maxStepSize = (float)(maxTime-data.time);
-    kSelectVerletStepSize(gpu, maxStepSize);
-    kVerletUpdatePart1(gpu);
-    kApplyShake(gpu);
-    kApplySettle(gpu);
-    kApplyCCMA(gpu);
-    if (data.removeCM)
-        if (data.stepCount%data.cmMotionFrequency == 0)
-            gpu->bCalculateCM = true;
-    kVerletUpdatePart2(gpu);
-    gpu->psStepSize->Download();
-    data.time += (*gpu->psStepSize)[0].y;
-    if ((*gpu->psStepSize)[0].y == maxStepSize)
-        data.time = maxTime; // Avoid round-off error
-    data.stepCount++;
-}
-CudaIntegrateVariableLangevinStepKernel::~CudaIntegrateVariableLangevinStepKernel() {
-}
-void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
-    cudaOpenMMInitializeIntegration(system, data, integrator);
-    _gpuContext* gpu = data.gpu;
-    gpu->seed = (unsigned long) integrator.getRandomNumberSeed();
-    gpuInitializeRandoms(gpu);
-    prevTemp = -1.0;
-    prevFriction = -1.0;
-    prevErrorTol = -1.0;
-}
-void CudaIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, const VariableLangevinIntegrator& integrator, double maxTime) {
-    _gpuContext* gpu = data.gpu;
-    double temperature = integrator.getTemperature();
-    double friction = integrator.getFriction();
-    double errorTol = integrator.getErrorTolerance();
-    if (temperature != prevTemp || friction != prevFriction || errorTol != prevErrorTol) {
-        // Initialize the GPU parameters.
-        double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
-        gpuSetLangevinIntegrationParameters(gpu, (float) tau, 0.0f, (float) temperature, (float) errorTol);
-        gpuSetConstants(gpu);
-        kGenerateRandoms(gpu);
-        prevTemp = temperature;
-        prevFriction = friction;
-        prevErrorTol = errorTol;
-    }
-    float maxStepSize = (float)(maxTime-data.time);
-    kSelectLangevinStepSize(gpu, maxStepSize);
-    kLangevinUpdatePart1(gpu);
-    if (data.removeCM)
-        if (data.stepCount%data.cmMotionFrequency == 0)
-            gpu->bCalculateCM = true;
-    kLangevinUpdatePart2(gpu);
-    kApplyShake(gpu);
-    kApplySettle(gpu);
-    kApplyCCMA(gpu);
-    kSetVelocitiesFromPositions(gpu);
-    gpu->psStepSize->Download();
-    data.time += (*gpu->psStepSize)[0].y;
-    if ((*gpu->psStepSize)[0].y == maxStepSize)
-        data.time = maxTime; // Avoid round-off error
-    data.stepCount++;
-}
-CudaApplyAndersenThermostatKernel::~CudaApplyAndersenThermostatKernel() {
-    if (atomGroups != NULL)
-        delete atomGroups;
-}
-void CudaApplyAndersenThermostatKernel::initialize(const System& system, const AndersenThermostat& thermostat) {
-    _gpuContext* gpu = data.gpu;
-    gpu->seed = (unsigned long) thermostat.getRandomNumberSeed();
-    gpuInitializeRandoms(gpu);
-    prevTemp = -1.0;
-    prevFrequency = -1.0;
-    prevStepSize = -1.0;
-    // Create the arrays with the group definitions.
-    vector<vector<int> > groups = AndersenThermostatImpl::calcParticleGroups(system);
-    atomGroups = new CUDAStream<int>(system.getNumParticles(), 1, "atomGroups");
-    for (int i = 0; i < (int) groups.size(); i++) {
-        for (int j = 0; j < (int) groups[i].size(); j++)
-            (*atomGroups)[groups[i][j]] = i;
-    }
-    atomGroups->Upload();
-}
-void CudaApplyAndersenThermostatKernel::execute(ContextImpl& context) {
-    _gpuContext* gpu = data.gpu;
-    double temperature = context.getParameter(AndersenThermostat::Temperature());
-    double frequency = context.getParameter(AndersenThermostat::CollisionFrequency());
-    double stepSize = context.getIntegrator().getStepSize();
-    if (temperature != prevTemp || frequency != prevFrequency || stepSize != prevStepSize) {
-        // Initialize the GPU parameters.
-        gpuSetAndersenThermostatParameters(gpu, (float) temperature, (float) frequency);
-        gpuSetConstants(gpu);
-        kGenerateRandoms(gpu);
-        prevTemp = temperature;
-        prevFrequency = frequency;
-        prevStepSize = stepSize;
-    }
-    kCalculateAndersenThermostat(gpu, *atomGroups);
-}
-CudaApplyMonteCarloBarostatKernel::~CudaApplyMonteCarloBarostatKernel() {
-    if (moleculeAtoms != NULL)
-        delete moleculeAtoms;
-    if (moleculeStartIndex != NULL)
-        delete moleculeStartIndex;
-}
-void CudaApplyMonteCarloBarostatKernel::initialize(const System& system, const MonteCarloBarostat& thermostat) {
-}
-void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, double scale) {
-    if (!hasInitializedMolecules) {
-        hasInitializedMolecules = true;
-        // Create the arrays with the molecule definitions.
-        vector<vector<int> > molecules = context.getMolecules();
-        numMolecules = molecules.size();
-        moleculeAtoms = new CUDAStream<int>(context.getSystem().getNumParticles(), 1, "moleculeAtoms");
-        moleculeStartIndex = new CUDAStream<int>(numMolecules+1, 1, "moleculeStartIndex");
-        int index = 0;
-        for (int i = 0; i < numMolecules; i++) {
-            (*moleculeStartIndex)[i] = index;
-            for (int j = 0; j < (int) molecules[i].size(); j++)
-                (*moleculeAtoms)[index++] = molecules[i][j];
-        }
-        (*moleculeStartIndex)[numMolecules] = index;
-        moleculeAtoms->Upload();
-        moleculeStartIndex->Upload();
-    }
-    _gpuContext* gpu = data.gpu;
-    gpu->psPosqP4->CopyFrom(*gpu->psPosq4);
-    kScaleAtomCoordinates(gpu, scale, *moleculeAtoms, *moleculeStartIndex);
-    for (int i = 0; i < (int) gpu->posCellOffsets.size(); i++)
-        gpu->posCellOffsets[i] = make_int3(0, 0, 0);
-}
-void CudaApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
-    _gpuContext* gpu = data.gpu;
-    gpu->psPosq4->CopyFrom(*gpu->psPosqP4);
-}
-void CudaCalcKineticEnergyKernel::initialize(const System& system) {
-    int numParticles = system.getNumParticles();
-    masses.resize(numParticles);
-    for (int i = 0; i < numParticles; ++i)
-        masses[i] = system.getParticleMass(i);
-}
-double CudaCalcKineticEnergyKernel::execute(ContextImpl& context) {
-    // We don't currently have a GPU kernel to do this, so we retrieve the velocities and calculate the energy
-    // on the CPU.
-    _gpuContext* gpu = data.gpu;
-    gpu->psVelm4->Download();
-    double energy = 0.0;
-    for (int i = 0; i < (int) masses.size(); ++i) {
-        float4 v = (*gpu->psVelm4)[i];
-        energy += masses[i]*(v.x*v.x+v.y*v.y+v.z*v.z);
-    }
-    return 0.5*energy;
-}
-void CudaRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) {
-    data.removeCM = true;
-    data.cmMotionFrequency = force.getFrequency();
-}
-void CudaRemoveCMMotionKernel::execute(ContextImpl& context) {
-}

--- a/platforms/cuda/src/kernels/cudatypes.h
+++ b/platforms/cuda/src/kernels/cudatypes.h
-#ifndef CUDATYPES_H
+Vim: Warning: Output is not to a terminal
-#define CUDATYPES_H
+[?1049h[?1h=[1;59r[?12;25h[?12l[?25h[27m[m[H[2J[?25l[59;1H"svn-commit.tmp" 15L, 601C[1;1H[33m  1 
+  2 [m[32m--This line, and those below, will be ignored--[m
-/* -------------------------------------------------------------------------- *
+[33m  3 
- *                                   OpenMM                                   *
+  4 [m[1m[35mM    plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp[m
- * -------------------------------------------------------------------------- *
+[33m  5 [m[1m[35mM    plugins/freeEnergy/platforms/reference/src/gbsa/CpuGBVISoftcore.cpp[m
- * This is part of the OpenMM molecular simulation toolkit originating from   *
+[33m  6 [m[1m[35mM    openmmapi/include/openmm/GBVIForce.h[m
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
+[33m  7 [m[1m[35mM    openmmapi/src/GBVIForce.cpp[m
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+[33m  8 [m[1m[35mM    olla/src/Platform.cpp[m
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+[33m  9 [m[1m[35mM    platforms/opencl/src/OpenCLContext.h[m
- *                                                                            *
+[33m 10 [m[1m[35mM    platforms/cuda/src/CudaKernels.cpp[m
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
+[33m 11 [m[1m[35mM    platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu[m
- * Authors: Scott Le Grand, Peter Eastman                                     *
+[33m 12 [m[1m[35mM    platforms/cuda/src/kernels/gputypes.h[m
- * Contributors:                                                              *
+[33m 13 [m[1m[35mM    platforms/cuda/src/kernels/cudatypes.h[m
- *                                                                            *
+[33m 14 [m[1m[35mM    platforms/cuda/src/kernels/kForces.cu[m
- * This program is free software: you can redistribute it and/or modify       *
+[33m 15 [m[1m[35mM    platforms/cuda/src/kernels/gpu.cpp[m
- * it under the terms of the GNU Lesser General Public License as published   *
+[1m[34m~                                                                                                                                                                                                                                         [17;1H~                                                                                                                                                                                                                                         [18;1H~                                                                                                                                                                                                                                         [19;1H~                                                                                                                                                                                                                                         [20;1H~                                                                                                                                                                                                                                         [21;1H~                                                                                                                                                                                                                                         [22;1H~                                                                                                                                                                                                                                         [23;1H~                                                                                                                                                                                                                                         [24;1H~                                                                                                                                                                                                                                         [25;1H~                                                                                                                                                                                                                                         [26;1H~                                                                                                                                                                                                                                         [27;1H~                                                                                                                                                                                                                                         [28;1H~                                                                                                                                                                                                                                         [29;1H~                                                                                                                                                                                                                                         [30;1H~                                                                                                                                                                                                                                         [31;1H~                                                                                                                                                                                                                                         [32;1H~                                                                                                                                                                                                                                         [33;1H~                                                                                                                                                                                                                                         [34;1H~                                                                                                                                                                                                                                         [35;1H~                                                                                                                                                                                                                                         [36;1H~                                                                                                                                                                                                                                         [37;1H~                                                                                                                                                                                                                                         [38;1H~                                                                                                                                                                                                                                         [39;1H~                                                                                                                                                                                                                                         [40;1H~                                                                                                                                                                                                                                         [41;1H~                                                                                                                                                                                                                                         [42;1H~                                                                                                                                                                                                                                         [43;1H~                                                                                                                                                                                                                                         [44;1H~                                                                                                                                                                                                                                         [45;1H~                                                                                                                                                                                                                                         [46;1H~                                                                                                                                                                                                                                         [47;1H~                                                                                                                                                                                                                                         [48;1H~                                                                                                                                                                                                                                         [49;1H~                                                                                                                                                                                                                                         [50;1H~                                                                                                                                                                                                                                         [51;1H~                                                                                                                                                                                                                                         [52;1H~                                                                                                                                                                                                                                         [53;1H~                                                                                                                                                                                                                                         [54;1H~                                                                                                                                                                                                                                         [55;1H~                                                                                                                                                                                                                                         [56;1H~                                                                                                                                                                                                                                         [57;1H~                                                                                                                                                                                                                                         [58;1H~                                                                                                                                                                                                                                         [m[59;217H1,0-1[9CAll[1;5H[?12l[?25h[?25l[59;1H[K[59;1H:[?12l[?25hq![?25l[59;1H[K[59;1H[?1l>[?12l[?25h[?1049l
- * by the Free Software Foundation, either version 3 of the License, or       *
+Log message unchanged or not specified
- * (at your option) any later version.                                        *
+a)bort, c)ontinue, e)dit
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-#include <stdarg.h>
-#include <limits>
-#include <iostream>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string>
-#include <cuda.h>
-#include <cuda_runtime_api.h>
-#include <cufft.h>
-#include <builtin_types.h>
-#include <vector_functions.h>
-#define RTERROR(status, s) \
-    if (status != cudaSuccess) { \
-        printf("%s %s\n", s, cudaGetErrorString(status)); \
-        exit(-1); \
-    }
-#define LAUNCHERROR(s) \
-    { \
-        cudaError_t status = cudaGetLastError(); \
-        if (status != cudaSuccess) { \
-            printf("Error: %s launching kernel %s\n", cudaGetErrorString(status), s); \
-            exit(-1); \
-        } \
-    }
-// Pure virtual class to define an interface for objects resident both on GPU and CPU
-struct SoADeviceObject {
-    virtual void Allocate() = 0;
-    virtual void Deallocate() = 0;
-    virtual void Upload() = 0;
-    virtual void Download() = 0;
-};
-template <typename T>
-struct CUDAStream : public SoADeviceObject
-{
-    unsigned int    _length;
-    unsigned int    _subStreams;
-    unsigned int    _stride;
-    T**             _pSysStream;
-    T**             _pDevStream;
-    T*              _pSysData;
-    T*              _pDevData;
-    std::string     _name;
-    CUDAStream(int length, int subStreams = 1, std::string name="");
-    CUDAStream(unsigned int length, unsigned int subStreams = 1, std::string name="");
-    CUDAStream(unsigned int length, int subStreams = 1, std::string name="");
-    CUDAStream(int length, unsigned int subStreams = 1, std::string name="");
-    virtual ~CUDAStream();
-    void Allocate();
-    void Deallocate();
-    void Upload();
-    void Download();
-    void CopyFrom(const CUDAStream<T>& src);
-    void Collapse(unsigned int newstreams = 1, unsigned int interleave = 1);
-    T& operator[](int index);
-};
-float CompareStreams(CUDAStream<float>& s1, CUDAStream<float>& s2, float tolerance, unsigned int maxindex = 0);
-template <typename T>
-CUDAStream<T>::CUDAStream(int length, unsigned int subStreams, std::string name) : _length(length), _subStreams(subStreams), _stride((length + 0xf) & 0xfffffff0), _name(name)
-{
-    Allocate();   
-}
-template <typename T>
-CUDAStream<T>::CUDAStream(unsigned int length, int subStreams, std::string name) : _length(length), _subStreams(subStreams), _stride((length + 0xf) & 0xfffffff0), _name(name)
-{
-    Allocate();   
-}
-template <typename T>
-CUDAStream<T>::CUDAStream(unsigned int length, unsigned int subStreams, std::string name) : _length(length), _subStreams(subStreams), _stride((length + 0xf) & 0xfffffff0), _name(name)
-{
-    Allocate();   
-}
-template <typename T>
-CUDAStream<T>::CUDAStream(int length, int subStreams, std::string name) : _length(length), _subStreams(subStreams), _stride((length + 0xf) & 0xfffffff0), _name(name)
-{
-    Allocate();   
-}
-template <typename T>
-CUDAStream<T>::~CUDAStream()
-{
-    Deallocate();
-}
-template <typename T>
-void CUDAStream<T>::Allocate()
-{
-    cudaError_t status;
-    _pSysStream =   new T*[_subStreams];
-    _pDevStream =   new T*[_subStreams];
-    _pSysData =     new T[_subStreams * _stride];
-    status = cudaMalloc((void **) &_pDevData, _stride * _subStreams * sizeof(T));
-    RTERROR(status, (_name+": cudaMalloc in CUDAStream::Allocate failed").c_str());
-    for (unsigned int i = 0; i < _subStreams; i++)
-    {
-        _pSysStream[i] = _pSysData + i * _stride;
-        _pDevStream[i] = _pDevData + i * _stride;
-    }
-}
-template <typename T>
-void CUDAStream<T>::Deallocate()
-{
-    cudaError_t status;
-    delete[] _pSysStream;
-    _pSysStream = NULL;
-    delete[] _pDevStream;
-    _pDevStream = NULL;
-    delete[] _pSysData;
-    _pSysData = NULL;
-    status = cudaFree(_pDevData);
-    RTERROR(status, (_name+": cudaFree in CUDAStream::Deallocate failed").c_str());
-}
-template <typename T>
-void CUDAStream<T>::Upload()
-{
-    cudaError_t status;
-    status = cudaMemcpy(_pDevData, _pSysData, _stride * _subStreams * sizeof(T), cudaMemcpyHostToDevice);
-    RTERROR(status, (_name+": cudaMemcpy in CUDAStream::Upload failed").c_str());
-}
-template <typename T>
-void CUDAStream<T>::Download()
-{
-    cudaError_t status;
-    status = cudaMemcpy(_pSysData, _pDevData, _stride * _subStreams * sizeof(T), cudaMemcpyDeviceToHost);
-    RTERROR(status, (_name+": cudaMemcpy in CUDAStream::Download failed").c_str());
-}
-template <typename T>
-void CUDAStream<T>::CopyFrom(const CUDAStream<T>& src)
-{
-    cudaError_t status;
-    status = cudaMemcpy(_pDevData, src._pDevData, _stride * _subStreams * sizeof(T), cudaMemcpyDeviceToDevice);
-    RTERROR(status, (_name+": cudaMemcpy in CUDAStream::Copy failed").c_str());
-}
-template <typename T>
-void CUDAStream<T>::Collapse(unsigned int newstreams, unsigned int interleave)
-{
-    T* pTemp = new T[_subStreams * _stride];
-    unsigned int stream = 0;
-    unsigned int pos = 0;
-    unsigned int newstride = _stride * _subStreams / newstreams;
-    unsigned int newlength = _length * _subStreams / newstreams;
-    // Copy data into new format
-    for (unsigned int i = 0; i < _length; i++)
-    {
-        for (unsigned int j = 0; j < _subStreams; j++)
-        {
-            pTemp[stream * newstride + pos] = _pSysStream[j][i];
-            stream++;
-            if (stream == newstreams)
-            {
-                stream = 0;
-                pos++;
-            }
-        }
-    }
-    // Remap stream pointers;
-    for (unsigned int i = 0; i < newstreams; i++)
-    {
-        _pSysStream[i] = _pSysData + i * newstride;
-        _pDevStream[i] = _pDevData + i * newstride;
-    }
-    // Copy data back intro original stream
-    for (unsigned int i = 0; i < newlength; i++)
-        for (unsigned int j = 0; j < newstreams; j++)
-            _pSysStream[j][i] = pTemp[j * newstride + i];
-    _stride = newstride;
-    _length = newlength;
-    _subStreams = newstreams;
-    delete[] pTemp;
-}
-template <typename T>
-T& CUDAStream<T>::operator[](int index)
-{
-    return _pSysData[index];
-}
-static const unsigned int GRID = 32;
-static const unsigned int GRIDBITS = 5;
-static const int G8X_BLOCKS_PER_SM                      = 1;
-static const int GT2XX_BLOCKS_PER_SM                    = 1;
-static const int GF1XX_BLOCKS_PER_SM                    = 1;
-static const int G8X_NONBOND_THREADS_PER_BLOCK          = 256;
-static const int GT2XX_NONBOND_THREADS_PER_BLOCK        = 320;
-static const int GF1XX_NONBOND_THREADS_PER_BLOCK        = 768;
-//static const int GF1XX_NONBOND_THREADS_PER_BLOCK        = 768;
-static const int G8X_BORNFORCE2_THREADS_PER_BLOCK       = 256;
-static const int GT2XX_BORNFORCE2_THREADS_PER_BLOCK     = 320;
-static const int GF1XX_BORNFORCE2_THREADS_PER_BLOCK     = 768;
-//static const int GF1XX_BORNFORCE2_THREADS_PER_BLOCK     = 768;
-static const int G8X_SHAKE_THREADS_PER_BLOCK            = 128;
-static const int GT2XX_SHAKE_THREADS_PER_BLOCK          = 256;
-static const int GF1XX_SHAKE_THREADS_PER_BLOCK          = 512;
-static const int G8X_UPDATE_THREADS_PER_BLOCK           = 192;
-static const int GT2XX_UPDATE_THREADS_PER_BLOCK         = 384;
-static const int GF1XX_UPDATE_THREADS_PER_BLOCK         = 768;
-static const int G8X_LOCALFORCES_THREADS_PER_BLOCK      = 192;
-static const int GT2XX_LOCALFORCES_THREADS_PER_BLOCK    = 384;
-static const int GF1XX_LOCALFORCES_THREADS_PER_BLOCK    = 768;
-static const int G8X_THREADS_PER_BLOCK                  = 256;
-static const int GT2XX_THREADS_PER_BLOCK                = 256;
-static const int GF1XX_THREADS_PER_BLOCK                = 512;
-static const int G8X_RANDOM_THREADS_PER_BLOCK           = 256;
-static const int GT2XX_RANDOM_THREADS_PER_BLOCK         = 384;
-static const int GF1XX_RANDOM_THREADS_PER_BLOCK         = 768;
-static const int G8X_NONBOND_WORKUNITS_PER_SM           = 220;
-static const int GT2XX_NONBOND_WORKUNITS_PER_SM         = 256;
-static const int GF1XX_NONBOND_WORKUNITS_PER_SM         = 768;
-static const unsigned int MAX_STACK_SIZE = 8;
-static const unsigned int MAX_TABULATED_FUNCTIONS = 4;
-static const float PI = 3.14159265358979323846f;
-static const int PME_ORDER = 5;
-enum CudaNonbondedMethod
-{
-    NO_CUTOFF,
-    CUTOFF,
-    PERIODIC,
-    EWALD,
-    PARTICLE_MESH_EWALD
-};
-enum ExpressionOp {
-    VARIABLE0 = 0, VARIABLE1, VARIABLE2, VARIABLE3, VARIABLE4, VARIABLE5, VARIABLE6, VARIABLE7, VARIABLE8, MULTIPLY, DIVIDE, ADD, SUBTRACT, POWER, MULTIPLY_CONSTANT, POWER_CONSTANT, ADD_CONSTANT,
-        GLOBAL, CONSTANT, CUSTOM, CUSTOM_DERIV, NEGATE, RECIPROCAL, SQRT, EXP, LOG, SQUARE, CUBE, STEP, SIN, COS, SEC, CSC, TAN, COT, ASIN, ACOS, ATAN, SINH, COSH, TANH, ERF, ERFC,
-        MIN, MAX, ABS
-};
-template<int SIZE>
-struct Expression {
-    int op[SIZE];
-    float arg[SIZE];
-    int length, stackSize;
-};
-struct cudaGmxSimulation {
-    // Constants
-    unsigned int    atoms;                          // Number of atoms
-    unsigned int    paddedNumberOfAtoms;            // Padded number of atoms
-    unsigned int    blocks;                         // Number of blocks to launch across linear kernels
-    unsigned int    blocksPerSM;                    // Number of blocks per share memory
-    unsigned int    nonbond_blocks;                 // Number of blocks to launch across CDLJ and Born Force Part1
-    unsigned int    bornForce2_blocks;              // Number of blocks to launch across Born Force 2
-    unsigned int    interaction_blocks;             // Number of blocks to launch when identifying interacting tiles
-    unsigned int    threads_per_block;              // Threads per block to launch
-    unsigned int    nonbond_threads_per_block;      // Threads per block in nonbond kernel calls
-    unsigned int    bornForce2_threads_per_block;   // Threads per block in nonbond kernel calls
-    unsigned int    max_update_threads_per_block;   // Maximum threads per block in update kernel calls
-    unsigned int    update_threads_per_block;       // Threads per block in update kernel calls
-    unsigned int    bf_reduce_threads_per_block;    // Threads per block in Born Force reduction calls
-    unsigned int    bsf_reduce_threads_per_block;   // Threads per block in Born Sum And Forces reduction calls
-    unsigned int    max_shake_threads_per_block;    // Maximum threads per block in shake kernel calls
-    unsigned int    shake_threads_per_block;        // Threads per block in shake kernel calls
-    unsigned int    settle_threads_per_block;       // Threads per block in SETTLE kernel calls
-    unsigned int    ccma_threads_per_block;         // Threads per block in CCMA kernel calls
-    unsigned int    max_localForces_threads_per_block;  // Threads per block in local forces kernel calls
-    unsigned int    localForces_threads_per_block;  // Threads per block in local forces kernel calls
-    unsigned int    random_threads_per_block;       // Threads per block in RNG kernel calls
-    unsigned int    interaction_threads_per_block;  // Threads per block when identifying interacting tiles
-    unsigned int    custom_exception_threads_per_block; // Threads per block in custom nonbonded exception kernel calls
-    unsigned int    customExpressionStackSize;      // Stack size for evaluating custom nonbonded forces
-    unsigned int    workUnits;                      // Number of work units
-    unsigned int*   pWorkUnit;                      // Pointer to work units
-    unsigned int*   pInteractingWorkUnit;           // Pointer to work units that have interactions
-    unsigned int*   pInteractionFlag;               // Flags for which work units have interactions
-    float2*         pStepSize;                      // The size of the previous and current time steps
-    float*          pLangevinParameters;            // Parameters used for Langevin integration
-    float           errorTol;                       // Error tolerance for selecting the step size
-    size_t*         pInteractionCount;              // A count of the number of work units which have interactions
-    unsigned int    nonbond_workBlock;              // Number of work units running simultaneously per block in CDLJ and Born Force Part 1
-    unsigned int    bornForce2_workBlock;           // Number of work units running second half of Born Forces calculation
-    unsigned int    workUnitsPerSM;                 // Number of workblocks per SM
-    unsigned int    nbWorkUnitsPerBlock;            // Number of work units assigned to each nonbond block
-    unsigned int    nbWorkUnitsPerBlockRemainder;   // Remainder of work units to assign across lower numbered nonbond blocks
-    unsigned int    bf2WorkUnitsPerBlock;           // Number of work units assigned to each bornForce2 block
-    unsigned int    bf2WorkUnitsPerBlockRemainder;  // Remainder of work units to assign across lower numbered bornForce2 blocks
-    unsigned int    stride;                         // Atomic attributes stride
-    unsigned int    stride2;                        // Atomic attributes stride x 2
-    unsigned int    stride3;                        // Atomic attributes stride x 3
-    unsigned int    stride4;                        // Atomic attributes stride x 4
-    unsigned int    nonbondOutputBuffers;           // Nonbond output buffers per nonbond call
-    unsigned int    outputBuffers;                  // Number of output buffers
-    unsigned int    energyOutputBuffers;            // Number of energy output buffers
-    float           bigFloat;                       // Floating point value used as a flag for Shaken atoms 
-    float           epsfac;                         // Epsilon factor for CDLJ calculations
-    CudaNonbondedMethod nonbondedMethod;            // How to handle nonbonded interactions
-    CudaNonbondedMethod customNonbondedMethod;      // How to handle custom nonbonded interactions
-    float           nonbondedCutoff;                // Cutoff distance for nonbonded interactions
-    float           nonbondedCutoffSqr;             // Square of the cutoff distance for nonbonded interactions
-    float           periodicBoxSizeX;               // The X dimension of the periodic box
-    float           periodicBoxSizeY;               // The Y dimension of the periodic box
-    float           periodicBoxSizeZ;               // The Z dimension of the periodic box
-    float           invPeriodicBoxSizeX;            // The 1 over the X dimension of the periodic box
-    float           invPeriodicBoxSizeY;            // The 1 over the Y dimension of the periodic box
-    float           invPeriodicBoxSizeZ;            // The 1 over the Z dimension of the periodic box
-    float           recipBoxSizeX;                  // The X dimension of the reciprocal box for Ewald summation
-    float           recipBoxSizeY;                  // The Y dimension of the reciprocal box for Ewald summation
-    float           recipBoxSizeZ;                  // The Z dimension of the reciprocal box for Ewald summation
-    float           cellVolume;                     // Ewald parameter alpha (a.k.a. kappa)
-    float           alphaEwald;                     // Ewald parameter alpha (a.k.a. kappa)
-    float           factorEwald;                    // - 1 ( 4 * alphaEwald * alphaEwald)
-    int             kmaxX;                          // Maximum number of reciprocal vectors in the X direction
-    int             kmaxY;                          // Maximum number of reciprocal vectors in the Y direction
-    int             kmaxZ;                          // Maximum number of reciprocal vectors in the Z direction
-    float           reactionFieldK;                 // Constant for reaction field correction
-    float           reactionFieldC;                 // Constant for reaction field correction
-    float           probeRadius;                    // SASA probe radius
-    float           surfaceAreaFactor;              // ACE approximation surface area factor
-    float           electricConstant;               // ACE approximation electric constant
-    float           forceConversionFactor;          // kJ to kcal force conversion factor
-    float           preFactor;                      // Born electrostatic pre-factor
-    float           dielectricOffset;               // Born dielectric offset
-    float           alphaOBC;                       // OBC alpha factor
-    float           betaOBC;                        // OBC beta factor
-    float           gammaOBC;                       // OBC gamma factor
-    float           deltaT;                         // Molecular dynamics deltaT constant
-    float           oneOverDeltaT;                  // 1/deltaT
-    float           T;                              // Temperature
-    float           kT;                             // Boltzmann's constant times T
-    float           noiseAmplitude;                 // The magnitude of the noise for Brownian dynamics
-    float           tau;                            // Inverse friction for Langevin or Brownian dynamics
-    float           tauDeltaT;                      // tau*deltaT
-    float           collisionFrequency;             // Collision frequency for Andersen thermostat
-    float2*         pObcData;                       // Pointer to fixed Born data
-    float4*         pGBVIData;                      // Pointer to fixed Born data for GB/VI algorithm
-    float2*         pAttr;                          // Pointer to additional atom attributes (sig, eps)
-    float4*         pCustomParams;                  // Pointer to atom parameters for custom nonbonded force
-    unsigned int    customExceptions;               // Number of custom nonbonded exceptions
-    unsigned int    customParameters;               // Number of parameters for custom nonbonded interactions
-    int4*           pCustomBondID;                  // Atom indices for custom bonds
-    float4*         pCustomBondParams;              // Parameters for custom bonds
-    unsigned int    customBonds;                    // Number of custom bonds
-    unsigned int    customBondParameters;           // Number of parameters for custom bonds
-    int4*           pCustomAngleID1;                // Atom indices for custom angles
-    int2*           pCustomAngleID2;                // Atom indices for custom angles
-    float4*         pCustomAngleParams;             // Parameters for custom angles
-    unsigned int    customAngles;                   // Number of custom angles
-    unsigned int    customAngleParameters;          // Number of parameters for custom angles
-    int4*           pCustomTorsionID1;              // Atom indices for custom torsions
-    int4*           pCustomTorsionID2;              // Atom indices for custom torsions
-    float4*         pCustomTorsionParams;           // Parameters for custom torsions
-    unsigned int    customTorsions;                 // Number of custom torsions
-    unsigned int    customTorsionParameters;        // Number of parameters for custom torsions
-    int*            pCustomExternalID;              // Atom indices for custom external force
-    float4*         pCustomExternalParams;          // Parameters for custom external force
-    unsigned int    customExternals;                // Number of particles for custom external force
-    unsigned int    customExternalParameters;       // Number of parameters for custom external force
-    float4*         pTabulatedFunctionCoefficients[MAX_TABULATED_FUNCTIONS]; // The spline coefficients for each tabulated function
-    float4*         pTabulatedFunctionParams;       // The min, max, and spacing for each tabulated function
-    float2*         pEwaldCosSinSum;                // Pointer to the cos/sin sums (ewald)
-    float*          pTabulatedErfc;                 // Tabulated values for erfc()
-    int             tabulatedErfcSize;              // The number of tabulated values for erfc()
-    float           tabulatedErfcScale;             // Scale factor for the argument to erfc()
-    int3            pmeGridSize;                    // The dimensions of the grid for particle mesh Ewald
-    int3            pmeGroupSize;                   // The dimensions of the groups used in charge spreading for PME
-    cufftComplex*   pPmeGrid;                       // Grid points for particle mesh Ewald
-    float*          pPmeBsplineModuli[3];
-    float4*         pPmeBsplineTheta;
-    float4*         pPmeBsplineDtheta;
-    int*            pPmeAtomRange;                  // The range of sorted atoms at each grid point
-    int2*           pPmeAtomGridIndex;              // The grid point each atom is at
-    unsigned int    bonds;                          // Number of bonds
-    int4*           pBondID;                        // Bond atom and output buffer IDs
-    float2*         pBondParameter;                 // Bond parameters
-    unsigned int    bond_angles;                    // Number of bond angles
-    int4*           pBondAngleID1;                  // Bond angle atom and first output buffer IDs
-    int2*           pBondAngleID2;                  // Bond angle output buffer IDs
-    float2*         pBondAngleParameter;            // Bond angle parameters
-    unsigned int    dihedrals;                      // Number of dihedrals
-    int4*           pDihedralID1;                   // Dihedral IDs
-    int4*           pDihedralID2;                   // Dihedral output buffer IDs
-    float4*         pDihedralParameter;             // Dihedral parameters
-    unsigned int    rb_dihedrals;                   // Number of Ryckaert Bellemans dihedrals
-    int4*           pRbDihedralID1;                 // Ryckaert Bellemans Dihedral IDs
-    int4*           pRbDihedralID2;                 // Ryckaert Bellemans Dihedral output buffer IDs
-    float4*         pRbDihedralParameter1;          // Ryckaert Bellemans Dihedral parameters
-    float2*         pRbDihedralParameter2;          // Ryckaert Bellemans Dihedral parameters
-    unsigned int    LJ14s;                          // Number of Lennard Jones 1-4 interactions
-    int4*           pLJ14ID;                        // Lennard Jones 1-4 atom and output buffer IDs
-    float4*         pLJ14Parameter;                 // Lennard Jones 1-4 parameters
-    float           inverseTotalMass;               // Used in linear momentum removal
-    unsigned int    ShakeConstraints;               // Total number of Shake constraints
-    unsigned int    settleConstraints;              // Total number of Settle constraints
-    unsigned int    ccmaConstraints;                // Total number of CCMA constraints.
-    unsigned int    rigidClusters;                  // Total number of rigid clusters
-    unsigned int    maxRigidClusterSize;            // The size of the largest rigid cluster
-    unsigned int    clusterShakeBlockSize;          // The number of threads to process each rigid cluster
-    unsigned int    maxShakeIterations;             // Maximum shake iterations
-    unsigned int    degreesOfFreedom;               // Number of degrees of freedom in system
-    float           shakeTolerance;                 // Shake tolerance
-    float           InvMassJ;                       // Shake inverse mass for hydrogens
-    int*            pNonShakeID;                    // Not Shaking atoms
-    int4*           pShakeID;                       // Shake atoms and phase
-    float4*         pShakeParameter;                // Shake parameters
-    int4*           pSettleID;                      // Settle atoms
-    float2*         pSettleParameter;               // Settle parameters
-    unsigned int*   pExclusion;                     // Nonbond exclusion data
-    unsigned int*   pExclusionIndex;                // Index of exclusion data for each work unit
-    unsigned int    bond_offset;                    // Offset to end of bonds
-    unsigned int    bond_angle_offset;              // Offset to end of bond angles
-    unsigned int    dihedral_offset;                // Offset to end of dihedrals
-    unsigned int    rb_dihedral_offset;             // Offset to end of Ryckaert Bellemans dihedrals
-    unsigned int    LJ14_offset;                    // Offset to end of Lennard Jones 1-4 parameters
-    int*            pAtomIndex;                     // The original index of each atom
-    float4*         pGridBoundingBox;               // The size of each grid cell
-    float4*         pGridCenter;                    // The center of each grid cell
-    int2*           pCcmaAtoms;                     // The atoms connected by each CCMA constraint
-    float4*         pCcmaDistance;                  // The displacement vector (x, y, z) and constraint distance (w) for each CCMA constraint
-    float*          pCcmaDelta1;                    // Workspace for CCMA
-    float*          pCcmaDelta2;                    // Workspace for CCMA
-    int*            pCcmaAtomConstraints;           // The indices of constraints involving each atom
-    int*            pCcmaNumAtomConstraints;        // The number of constraints involving each atom
-    int*            ccmaConvergedDeviceMarker;      // Device memory used to communicate that CCMA has converged
-    float*          pCcmaReducedMass;               // The reduced mass for each CCMA constraint
-    unsigned int*   pConstraintMatrixColumn;        // The column of each element in the constraint matrix.
-    float*          pConstraintMatrixValue;         // The value of each element in the constraint matrix.
-    // Mutable stuff
-    float4*         pPosq;                          // Pointer to atom positions and charges
-    float4*         pPosqP;                         // Pointer to mid-integration atom positions
-    float4*         pOldPosq;                       // Pointer to old atom positions
-    float4*         pVelm4;                         // Pointer to atom velocity and inverse mass
-    float4*         pForce4;                        // Pointer to force data
-    float*          pEnergy;                        // Pointer to energy output buffer
-    float*          pBornForce;                     // Pointer to Born force data
-    float*	    pBornSum;                       // Pointer to Born Radii calculation output buffers
-    float*	    pBornRadii;                     // Pointer to Born Radii
-    float*          pObcChain;                      // Pointer to OBC chain data
-    float4*         pLinearMomentum;                // Pointer to linear momentum
-    // Random numbers
-    float4*         pRandom4;                       // Pointer to 4 random numbers
-    float2*         pRandom2;                       // Pointer to 2 random numbers
-    uint4*          pRandomSeed;                    // Pointer to random seeds
-    int*            pRandomPosition;                // Pointer to random number positions
-    unsigned int    randoms;                        // Number of randoms
-    unsigned int    totalRandoms;                   // Number of randoms plus overflow.
-    unsigned int    randomIterations;               // Number of iterations before regenerating randoms
-    unsigned int    randomFrames;                   // Number of frames of random numbers
-};
-struct Vectors {
-    float3 v0;
-    float3 v1;
-    float3 v2;
-};
-#endif

--- a/platforms/cuda/src/kernels/gpu.cpp
+++ b/platforms/cuda/src/kernels/gpu.cpp
-/* -------------------------------------------------------------------------- *
+Vim: Warning: Output is not to a terminal
- *                                   OpenMM                                   *
+[?1049h[?1h=[1;59r[?12;25h[?12l[?25h[27m[m[H[2J[?25l[59;1H"svn-commit.tmp" 15L, 601C[1;1H[33m  1 
- * -------------------------------------------------------------------------- *
+  2 [m[32m--This line, and those below, will be ignored--[m
- * This is part of the OpenMM molecular simulation toolkit originating from   *
+[33m  3 
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
+  4 [m[1m[35mM    plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp[m
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+[33m  5 [m[1m[35mM    plugins/freeEnergy/platforms/reference/src/gbsa/CpuGBVISoftcore.cpp[m
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+[33m  6 [m[1m[35mM    openmmapi/include/openmm/GBVIForce.h[m
- *                                                                            *
+[33m  7 [m[1m[35mM    openmmapi/src/GBVIForce.cpp[m
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
+[33m  8 [m[1m[35mM    olla/src/Platform.cpp[m
- * Authors: Scott Le Grand, Peter Eastman                                     *
+[33m  9 [m[1m[35mM    platforms/opencl/src/OpenCLContext.h[m
- * Contributors:                                                              *
+[33m 10 [m[1m[35mM    platforms/cuda/src/CudaKernels.cpp[m
- *                                                                            *
+[33m 11 [m[1m[35mM    platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu[m
- * This program is free software: you can redistribute it and/or modify       *
+[33m 12 [m[1m[35mM    platforms/cuda/src/kernels/gputypes.h[m
- * it under the terms of the GNU Lesser General Public License as published   *
+[33m 13 [m[1m[35mM    platforms/cuda/src/kernels/cudatypes.h[m
- * by the Free Software Foundation, either version 3 of the License, or       *
+[33m 14 [m[1m[35mM    platforms/cuda/src/kernels/kForces.cu[m
- * (at your option) any later version.                                        *
+[33m 15 [m[1m[35mM    platforms/cuda/src/kernels/gpu.cpp[m
- *                                                                            *
+[1m[34m~                                                                                                                                                                                                                                         [17;1H~                                                                                                                                                                                                                                         [18;1H~                                                                                                                                                                                                                                         [19;1H~                                                                                                                                                                                                                                         [20;1H~                                                                                                                                                                                                                                         [21;1H~                                                                                                                                                                                                                                         [22;1H~                                                                                                                                                                                                                                         [23;1H~                                                                                                                                                                                                                                         [24;1H~                                                                                                                                                                                                                                         [25;1H~                                                                                                                                                                                                                                         [26;1H~                                                                                                                                                                                                                                         [27;1H~                                                                                                                                                                                                                                         [28;1H~                                                                                                                                                                                                                                         [29;1H~                                                                                                                                                                                                                                         [30;1H~                                                                                                                                                                                                                                         [31;1H~                                                                                                                                                                                                                                         [32;1H~                                                                                                                                                                                                                                         [33;1H~                                                                                                                                                                                                                                         [34;1H~                                                                                                                                                                                                                                         [35;1H~                                                                                                                                                                                                                                         [36;1H~                                                                                                                                                                                                                                         [37;1H~                                                                                                                                                                                                                                         [38;1H~                                                                                                                                                                                                                                         [39;1H~                                                                                                                                                                                                                                         [40;1H~                                                                                                                                                                                                                                         [41;1H~                                                                                                                                                                                                                                         [42;1H~                                                                                                                                                                                                                                         [43;1H~                                                                                                                                                                                                                                         [44;1H~                                                                                                                                                                                                                                         [45;1H~                                                                                                                                                                                                                                         [46;1H~                                                                                                                                                                                                                                         [47;1H~                                                                                                                                                                                                                                         [48;1H~                                                                                                                                                                                                                                         [49;1H~                                                                                                                                                                                                                                         [50;1H~                                                                                                                                                                                                                                         [51;1H~                                                                                                                                                                                                                                         [52;1H~                                                                                                                                                                                                                                         [53;1H~                                                                                                                                                                                                                                         [54;1H~                                                                                                                                                                                                                                         [55;1H~                                                                                                                                                                                                                                         [56;1H~                                                                                                                                                                                                                                         [57;1H~                                                                                                                                                                                                                                         [58;1H~                                                                                                                                                                                                                                         [m[59;217H1,0-1[9CAll[1;5H[?12l[?25h[?25l[59;1H[K[59;1H:[?12l[?25hq![?25l[59;1H[K[59;1H[?1l>[?12l[?25h[?1049l
- * This program is distributed in the hope that it will be useful,            *
+Log message unchanged or not specified
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+a)bort, c)ontinue, e)dit
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-#include <stdio.h>
-#include <string.h>
-#include <cuda.h>
-#include <vector_functions.h>
-#include <cstdlib>
-#include <string>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <cmath>
-#include <map>
-#include <set>
-#include <algorithm>
-#ifdef WIN32
-  #define _USE_MATH_DEFINES /* M_PI */
-  #include <math.h>
-  #include <windows.h>
-#else
-  #include <stdint.h>
-#endif
-using namespace std;
-#include "gputypes.h"
-#include "cudaKernels.h"
-#include "hilbert.h"
-#include "openmm/OpenMMException.h"
-#include "openmm/internal/SplineFitter.h"
-#include "quern.h"
-#include "Lepton.h"
-#include "rng.h"
-#include "../CudaForceInfo.h"
-// In case we're using some primitive version of Visual Studio this will
-// make sure that erf() and erfc() are defined.
-#include "openmm/internal/MSVC_erfc.h"
-#include "openmm/internal/windowsExport.h"
-using OpenMM::OpenMMException;
-using Lepton::Operation;
-struct ShakeCluster {
-    int centralID;
-    int peripheralID[3];
-    int size;
-    bool valid;
-    float distance;
-    float centralInvMass, peripheralInvMass;
-    ShakeCluster() : valid(true) {
-    }
-    ShakeCluster(int centralID, float invMass) : centralID(centralID), centralInvMass(invMass), size(0), valid(true) {
-    }
-    void addAtom(int id, float dist, float invMass) {
-        if (size == 3 || (size > 0 && dist != distance) || (size > 0 && invMass != peripheralInvMass))
-            valid = false;
-        else {
-            peripheralID[size++] = id;
-            distance = dist;
-            peripheralInvMass = invMass;
-        }
-    }
-};
-struct Constraint
-{
-    Constraint(int atom1, int atom2, float distance2) : atom1(atom1), atom2(atom2), distance2(distance2) {
-    }
-    int atom1, atom2;
-    float distance2;
-};
-struct ConstraintOrderer : public binary_function<int, int, bool> {
-    const vector<int>& atom1;
-    const vector<int>& atom2;
-    ConstraintOrderer(const vector<int>& atom1, const vector<int>& atom2) : atom1(atom1), atom2(atom2) {
-    }
-    bool operator()(int x, int y) {
-        if (atom1[x] != atom1[y])
-            return atom1[x] < atom1[y];
-        return atom2[x] < atom2[y];
-    }
-};
-struct Molecule {
-    vector<int> atoms;
-    vector<int> constraints;
-    vector<vector<int> > groups;
-};
-static const float dielectricOffset         =    0.009f;
-static const float probeRadius              =    0.14f;
-static const float forceConversionFactor    =    0.4184f;
-//static const float surfaceAreaFactor        =   -6.0f * 0.06786f * forceConversionFactor * 1000.0f;  // PI * 4.0f * 0.0049f * 1000.0f;
-//static const float surfaceAreaFactor        =   -6.0f * PI * 4.0f * 0.0049f * 1000.0f;
-static const float surfaceAreaFactor        = -6.0f*PI*0.0216f*1000.0f*0.4184f;
-//static const float surfaceAreaFactor        = -1.7035573959e+001;
-//static const float surfaceAreaFactor        = -166.03185f;
-//static const float surfaceAreaFactor        = 1.0f;
-static const float alphaOBC                 =    1.0f;
-static const float betaOBC                  =    0.8f;
-static const float gammaOBC                 =    4.85f;
-static const float kcalMolTokJNM            =   -0.4184f;
-static const float electricConstant         = -166.03185f;
-static const float defaultInnerDielectric   =    1.0f;
-static const float defaultSolventDielectric =   78.3f;
-static const float KILO                     =    1e3;                      // Thousand
-static const float BOLTZMANN                =    1.380658e-23f;            // (J/K)    
-static const float AVOGADRO                 =    6.0221367e23f;            // ()        
-static const float RGAS                     =    BOLTZMANN * AVOGADRO;     // (J/(mol K))
-static const float BOLTZ                    =    (RGAS / KILO);            // (kJ/(mol K)) 
-#define DUMP_PARAMETERS 0
-template <int SIZE>
-static Expression<SIZE> createExpression(gpuContext gpu, const string& expression, const Lepton::ExpressionProgram& program, const vector<string>& variables,
-        const vector<string>& globalParamNames, unsigned int& maxStackSize) {
-    Expression<SIZE> exp;
-    if (program.getNumOperations() > SIZE)
-        throw OpenMMException("Expression contains too many operations: "+expression);
-    exp.length = program.getNumOperations();
-    exp.stackSize = program.getStackSize();
-    if (exp.stackSize > (int) maxStackSize)
-        maxStackSize = exp.stackSize;
-    for (int i = 0; i < program.getNumOperations(); i++) {
-        const Operation& op = program.getOperation(i);
-        switch (op.getId()) {
-            case Operation::CONSTANT:
-                exp.op[i] = CONSTANT;
-                exp.arg[i] = (float) dynamic_cast<const Operation::Constant*>(&op)->getValue();
-                break;
-            case Operation::VARIABLE:
-                if (variables.size() > 0 && op.getName() == variables[0])
-                    exp.op[i] = VARIABLE0;
-                else if (variables.size() > 1 && op.getName() == variables[1])
-                    exp.op[i] = VARIABLE1;
-                else if (variables.size() > 2 && op.getName() == variables[2])
-                    exp.op[i] = VARIABLE2;
-                else if (variables.size() > 3 && op.getName() == variables[3])
-                    exp.op[i] = VARIABLE3;
-                else if (variables.size() > 4 && op.getName() == variables[4])
-                    exp.op[i] = VARIABLE4;
-                else if (variables.size() > 5 && op.getName() == variables[5])
-                    exp.op[i] = VARIABLE5;
-                else if (variables.size() > 6 && op.getName() == variables[6])
-                    exp.op[i] = VARIABLE6;
-                else if (variables.size() > 7 && op.getName() == variables[7])
-                    exp.op[i] = VARIABLE7;
-                else if (variables.size() > 8 && op.getName() == variables[8])
-                    exp.op[i] = VARIABLE8;
-                else {
-                    int j;
-                    for (j = 0; j < (int) globalParamNames.size() && op.getName() != globalParamNames[j]; j++);
-                    if (j == globalParamNames.size())
-                        throw OpenMMException("Unknown variable '"+op.getName()+"' in expression: "+expression);
-                    exp.op[i] = GLOBAL;
-                    exp.arg[i] = (float) j;
-                }
-                break;
-            case Operation::CUSTOM:
-                exp.op[i] = dynamic_cast<const Operation::Custom*>(&op)->getDerivOrder()[0] == 0 ? CUSTOM : CUSTOM_DERIV;
-                for (int j = 0; j < MAX_TABULATED_FUNCTIONS; j++)
-                    if (op.getName() == gpu->tabulatedFunctions[j].name) {
-                        exp.arg[i] = (float) j;
-                        break;
-                    }
-                break;
-            case Operation::ADD:
-                exp.op[i] = ADD;
-                break;
-            case Operation::SUBTRACT:
-                exp.op[i] = SUBTRACT;
-                break;
-            case Operation::MULTIPLY:
-                exp.op[i] = MULTIPLY;
-                break;
-            case Operation::DIVIDE:
-                exp.op[i] = DIVIDE;
-                break;
-            case Operation::POWER:
-                exp.op[i] = POWER;
-                break;
-            case Operation::NEGATE:
-                exp.op[i] = NEGATE;
-                break;
-            case Operation::SQRT:
-                exp.op[i] = SQRT;
-                break;
-            case Operation::EXP:
-                exp.op[i] = EXP;
-                break;
-            case Operation::LOG:
-                exp.op[i] = LOG;
-                break;
-            case Operation::SIN:
-                exp.op[i] = SIN;
-                break;
-            case Operation::COS:
-                exp.op[i] = COS;
-                break;
-            case Operation::SEC:
-                exp.op[i] = SEC;
-                break;
-            case Operation::CSC:
-                exp.op[i] = CSC;
-                break;
-            case Operation::TAN:
-                exp.op[i] = TAN;
-                break;
-            case Operation::COT:
-                exp.op[i] = COT;
-                break;
-            case Operation::ASIN:
-                exp.op[i] = ASIN;
-                break;
-            case Operation::ACOS:
-                exp.op[i] = ACOS;
-                break;
-            case Operation::ATAN:
-                exp.op[i] = ATAN;
-                break;
-            case Operation::SINH:
-                exp.op[i] = SINH;
-                break;
-            case Operation::COSH:
-                exp.op[i] = COSH;
-                break;
-            case Operation::TANH:
-                exp.op[i] = TANH;
-                break;
-            case Operation::ERF:
-                exp.op[i] = ERF;
-                break;
-            case Operation::ERFC:
-                exp.op[i] = ERFC;
-                break;
-            case Operation::STEP:
-                exp.op[i] = STEP;
-                break;
-            case Operation::SQUARE:
-                exp.op[i] = SQUARE;
-                break;
-            case Operation::CUBE:
-                exp.op[i] = CUBE;
-                break;
-            case Operation::RECIPROCAL:
-                exp.op[i] = RECIPROCAL;
-                break;
-            case Operation::ADD_CONSTANT:
-                exp.op[i] = ADD_CONSTANT;
-                exp.arg[i] = (float) dynamic_cast<const Operation::AddConstant*>(&op)->getValue();
-                break;
-            case Operation::MULTIPLY_CONSTANT:
-                exp.op[i] = MULTIPLY_CONSTANT;
-                exp.arg[i] = (float) dynamic_cast<const Operation::MultiplyConstant*>(&op)->getValue();
-                break;
-            case Operation::POWER_CONSTANT:
-                exp.op[i] = POWER_CONSTANT;
-                exp.arg[i] = (float) dynamic_cast<const Operation::PowerConstant*>(&op)->getValue();
-                break;
-            case Operation::MIN:
-                exp.op[i] = MIN;
-                break;
-            case Operation::MAX:
-                exp.op[i] = MAX;
-                break;
-            case Operation::ABS:
-                exp.op[i] = ABS;
-                break;
-        }
-    }
-    return exp;
-}
-extern "C"
-void gpuSetBondParameters(gpuContext gpu, const vector<int>& atom1, const vector<int>& atom2, const vector<float>& length, const vector<float>& k)
-{
-    int bonds = atom1.size();
-    gpu->sim.bonds                              = bonds;
-    CUDAStream<int4>* psBondID                  = new CUDAStream<int4>(bonds, 1, "BondID");
-    gpu->psBondID                               = psBondID;
-    gpu->sim.pBondID                            = psBondID->_pDevStream[0];
-    CUDAStream<float2>* psBondParameter         = new CUDAStream<float2>(bonds, 1, "BondParameter");
-    gpu->psBondParameter                        = psBondParameter;
-    gpu->sim.pBondParameter                     = psBondParameter->_pDevStream[0];
-    for (int i = 0; i < bonds; i++)
-    {
-        (*psBondID)[i].x = atom1[i];
-        (*psBondID)[i].y = atom2[i];
-        (*psBondParameter)[i].x = length[i];
-        (*psBondParameter)[i].y = k[i];
-        psBondID->_pSysData[i].z = gpu->pOutputBufferCounter[psBondID->_pSysData[i].x]++;
-        psBondID->_pSysData[i].w = gpu->pOutputBufferCounter[psBondID->_pSysData[i].y]++;
-#if (DUMP_PARAMETERS == 1)                
-        cout << 
-            i << " " << 
-            (*psBondID)[i].x << " " <<
-            (*psBondID)[i].y << " " <<
-            (*psBondID)[i].z << " " <<
-            (*psBondID)[i].w << " " <<
-            (*psBondParameter)[i].x << " " <<
-            (*psBondParameter)[i].y <<
-            endl;
-#endif
-    }
-    psBondID->Upload();
-    psBondParameter->Upload();
-}
-extern "C"
-void gpuSetBondAngleParameters(gpuContext gpu, const vector<int>& atom1, const vector<int>& atom2, const vector<int>& atom3,
-        const vector<float>& angle, const vector<float>& k)
-{
-    int bond_angles = atom1.size();
-    gpu->sim.bond_angles                        = bond_angles;
-    CUDAStream<int4>* psBondAngleID1            = new CUDAStream<int4>(bond_angles, 1, "BondAngleID1");
-    gpu->psBondAngleID1                         = psBondAngleID1;
-    gpu->sim.pBondAngleID1                      = psBondAngleID1->_pDevStream[0];
-    CUDAStream<int2>* psBondAngleID2            = new CUDAStream<int2>(bond_angles, 1, "BondAngleID2");
-    gpu->psBondAngleID2                         = psBondAngleID2;
-    gpu->sim.pBondAngleID2                      = psBondAngleID2->_pDevStream[0];
-    CUDAStream<float2>* psBondAngleParameter    = new CUDAStream<float2>(bond_angles, 1, "BondAngleParameter");
-    gpu->psBondAngleParameter                   = psBondAngleParameter;
-    gpu->sim.pBondAngleParameter                = psBondAngleParameter->_pDevStream[0];        
-    for (int i = 0; i < bond_angles; i++)
-    {
-        (*psBondAngleID1)[i].x = atom1[i];
-        (*psBondAngleID1)[i].y = atom2[i];
-        (*psBondAngleID1)[i].z = atom3[i];
-        (*psBondAngleParameter)[i].x = angle[i];
-        (*psBondAngleParameter)[i].y = k[i];
-        psBondAngleID1->_pSysData[i].w = gpu->pOutputBufferCounter[psBondAngleID1->_pSysData[i].x]++;
-        psBondAngleID2->_pSysData[i].x = gpu->pOutputBufferCounter[psBondAngleID1->_pSysData[i].y]++;
-        psBondAngleID2->_pSysData[i].y = gpu->pOutputBufferCounter[psBondAngleID1->_pSysData[i].z]++;
-#if (DUMP_PARAMETERS == 1)
-         cout << 
-            i << " " << 
-            (*psBondAngleID1)[i].x << " " <<
-            (*psBondAngleID1)[i].y << " " <<
-            (*psBondAngleID1)[i].z << " " <<
-            (*psBondAngleID1)[i].w << " " <<
-            (*psBondAngleID2)[i].x << " " <<
-            (*psBondAngleID2)[i].y << " " <<
-            (*psBondAngleParameter)[i].x << " " <<
-            (*psBondAngleParameter)[i].y <<
-            endl;
-#endif
-    }
-    psBondAngleID1->Upload();
-    psBondAngleID2->Upload();
-    psBondAngleParameter->Upload();
-}
-extern "C"
-void gpuSetDihedralParameters(gpuContext gpu, const vector<int>& atom1, const vector<int>& atom2, const vector<int>& atom3, const vector<int>& atom4,
-        const vector<float>& k, const vector<float>& phase, const vector<int>& periodicity)
-{
-        int dihedrals = atom1.size();
-        gpu->sim.dihedrals = dihedrals;
-        CUDAStream<int4>* psDihedralID1             = new CUDAStream<int4>(dihedrals, 1, "DihedralID1");
-        gpu->psDihedralID1                          = psDihedralID1;
-        gpu->sim.pDihedralID1                       = psDihedralID1->_pDevStream[0];
-        CUDAStream<int4>* psDihedralID2             = new CUDAStream<int4>(dihedrals, 1, "DihedralID2");
-        gpu->psDihedralID2                          = psDihedralID2;
-        gpu->sim.pDihedralID2                       = psDihedralID2->_pDevStream[0];
-        CUDAStream<float4>* psDihedralParameter     = new CUDAStream<float4>(dihedrals, 1, "DihedralParameter");
-        gpu->psDihedralParameter                    = psDihedralParameter;
-        gpu->sim.pDihedralParameter                 = psDihedralParameter->_pDevStream[0];
-        for (int i = 0; i < dihedrals; i++)
-        {
-            (*psDihedralID1)[i].x = atom1[i];
-            (*psDihedralID1)[i].y = atom2[i];
-            (*psDihedralID1)[i].z = atom3[i];
-            (*psDihedralID1)[i].w = atom4[i];
-            (*psDihedralParameter)[i].x = k[i];
-            (*psDihedralParameter)[i].y = phase[i];
-            (*psDihedralParameter)[i].z = (float) periodicity[i];
-            psDihedralID2->_pSysData[i].x = gpu->pOutputBufferCounter[psDihedralID1->_pSysData[i].x]++;
-            psDihedralID2->_pSysData[i].y = gpu->pOutputBufferCounter[psDihedralID1->_pSysData[i].y]++;
-            psDihedralID2->_pSysData[i].z = gpu->pOutputBufferCounter[psDihedralID1->_pSysData[i].z]++;
-            psDihedralID2->_pSysData[i].w = gpu->pOutputBufferCounter[psDihedralID1->_pSysData[i].w]++;
-#if (DUMP_PARAMETERS == 1)
-            cout << 
-                i << " " << 
-                (*psDihedralID1)[i].x << " " <<
-                (*psDihedralID1)[i].y << " " <<
-                (*psDihedralID1)[i].z << " " <<
-                (*psDihedralID1)[i].w << " " <<
-                (*psDihedralID2)[i].x << " " <<
-                (*psDihedralID2)[i].y << " " <<
-                (*psDihedralID2)[i].z << " " <<
-                (*psDihedralID2)[i].w << " " <<
-                (*psDihedralParameter)[i].x << " " <<
-                (*psDihedralParameter)[i].y << " " <<
-                (*psDihedralParameter)[i].z << endl;
-#endif
-        }
-        psDihedralID1->Upload();
-        psDihedralID2->Upload();
-        psDihedralParameter->Upload();
-}
-extern "C"
-void gpuSetRbDihedralParameters(gpuContext gpu, const vector<int>& atom1, const vector<int>& atom2, const vector<int>& atom3, const vector<int>& atom4,
-        const vector<float>& c0, const vector<float>& c1, const vector<float>& c2, const vector<float>& c3, const vector<float>& c4, const vector<float>& c5)
-{
-    int rb_dihedrals = atom1.size();
-    gpu->sim.rb_dihedrals = rb_dihedrals;
-    CUDAStream<int4>* psRbDihedralID1           = new CUDAStream<int4>(rb_dihedrals, 1, "RbDihedralID1");
-    gpu->psRbDihedralID1                        = psRbDihedralID1;
-    gpu->sim.pRbDihedralID1                     = psRbDihedralID1->_pDevStream[0];
-    CUDAStream<int4>* psRbDihedralID2           = new CUDAStream<int4>(rb_dihedrals, 1, "RbDihedralID2");
-    gpu->psRbDihedralID2                        = psRbDihedralID2;
-    gpu->sim.pRbDihedralID2                     = psRbDihedralID2->_pDevStream[0];
-    CUDAStream<float4>* psRbDihedralParameter1  = new CUDAStream<float4>(rb_dihedrals, 1, "RbDihedralParameter1");
-    gpu->psRbDihedralParameter1                 = psRbDihedralParameter1;
-    gpu->sim.pRbDihedralParameter1              = psRbDihedralParameter1->_pDevStream[0];
-    CUDAStream<float2>* psRbDihedralParameter2  = new CUDAStream<float2>(rb_dihedrals, 1, "RbDihedralParameter2");
-    gpu->psRbDihedralParameter2                 = psRbDihedralParameter2;
-    gpu->sim.pRbDihedralParameter2              = psRbDihedralParameter2->_pDevStream[0];
-    for (int i = 0; i < rb_dihedrals; i++)
-    {
-        (*psRbDihedralID1)[i].x = atom1[i];
-        (*psRbDihedralID1)[i].y = atom2[i];
-        (*psRbDihedralID1)[i].z = atom3[i];
-        (*psRbDihedralID1)[i].w = atom4[i];
-        (*psRbDihedralParameter1)[i].x = c0[i];
-        (*psRbDihedralParameter1)[i].y = c1[i];
-        (*psRbDihedralParameter1)[i].z = c2[i];
-        (*psRbDihedralParameter1)[i].w = c3[i];
-        (*psRbDihedralParameter2)[i].x = c4[i];
-        (*psRbDihedralParameter2)[i].y = c5[i];
-        psRbDihedralID2->_pSysData[i].x = gpu->pOutputBufferCounter[psRbDihedralID1->_pSysData[i].x]++;
-        psRbDihedralID2->_pSysData[i].y = gpu->pOutputBufferCounter[psRbDihedralID1->_pSysData[i].y]++;
-        psRbDihedralID2->_pSysData[i].z = gpu->pOutputBufferCounter[psRbDihedralID1->_pSysData[i].z]++;
-        psRbDihedralID2->_pSysData[i].w = gpu->pOutputBufferCounter[psRbDihedralID1->_pSysData[i].w]++;
-#if (DUMP_PARAMETERS == 1)
-        cout << 
-            i << " " << 
-            (*psRbDihedralID1)[i].x << " " <<
-            (*psRbDihedralID1)[i].y << " " <<
-            (*psRbDihedralID1)[i].z << " " <<
-            (*psRbDihedralID1)[i].w <<" " <<
-            (*psRbDihedralID2)[i].x << " " <<
-            (*psRbDihedralID2)[i].y << " " <<
-            (*psRbDihedralID2)[i].z << " " <<
-            (*psRbDihedralID2)[i].w <<" " <<
-            (*psRbDihedralParameter1)[i].x << " " <<
-            (*psRbDihedralParameter1)[i].y << " " <<
-            (*psRbDihedralParameter1)[i].z << " " <<
-            (*psRbDihedralParameter1)[i].w << " " <<
-            (*psRbDihedralParameter2)[i].x << " " <<
-            (*psRbDihedralParameter2)[i].y <<
-            endl;
-#endif
-    }
-    psRbDihedralID1->Upload();
-    psRbDihedralID2->Upload();
-    psRbDihedralParameter1->Upload();
-    psRbDihedralParameter2->Upload();
-}
-extern "C"
-void gpuSetLJ14Parameters(gpuContext gpu, float epsfac, float fudge, const vector<int>& atom1, const vector<int>& atom2,
-        const vector<float>& c6, const vector<float>& c12, const vector<float>& q1, const vector<float>& q2)
-{
-    int LJ14s = atom1.size();
-    float scale = epsfac * fudge;
-    gpu->sim.LJ14s                              = LJ14s;
-    CUDAStream<int4>* psLJ14ID                  = new CUDAStream<int4>(LJ14s, 1, "LJ14ID");
-    gpu->psLJ14ID                               = psLJ14ID;
-    gpu->sim.pLJ14ID                            = psLJ14ID->_pDevStream[0];
-    CUDAStream<float4>* psLJ14Parameter         = new CUDAStream<float4>(LJ14s, 1, "LJ14Parameter");
-    gpu->psLJ14Parameter                        = psLJ14Parameter;
-    gpu->sim.pLJ14Parameter                     = psLJ14Parameter->_pDevStream[0];
-    for (int i = 0; i < LJ14s; i++)
-    {
-        (*psLJ14ID)[i].x = atom1[i];
-        (*psLJ14ID)[i].y = atom2[i];
-        psLJ14ID->_pSysData[i].z = gpu->pOutputBufferCounter[psLJ14ID->_pSysData[i].x]++;
-        psLJ14ID->_pSysData[i].w = gpu->pOutputBufferCounter[psLJ14ID->_pSysData[i].y]++;
-        float p0, p1, p2;
-        if (c12[i] == 0.0f)
-        {
-            p0 = 0.0f;
-            p1 = 1.0f;
-        }
-        else
-        {
-            p0 = c6[i] * c6[i] / c12[i];
-            p1 = pow(c12[i] / c6[i], 1.0f / 6.0f);
-        }
-        p2 = scale * q1[i] * q2[i];
-        (*psLJ14Parameter)[i].x = p0;
-        (*psLJ14Parameter)[i].y = p1;
-        (*psLJ14Parameter)[i].z = p2;
-    }
-#if (DUMP_PARAMETERS == 1)
-        cout << 
-            i << " " <<
-            (*psLJ14ID)[i].x << " " <<
-            (*psLJ14ID)[i].y << " " <<
-            (*psLJ14ID)[i].z << " " <<
-            (*psLJ14ID)[i].w << " " <<
-            (*psLJ14Parameter)[i].x << " " <<
-            (*psLJ14Parameter)[i].y << " " <<
-            (*psLJ14Parameter)[i].z << " " <<
-            p0 << " " << 
-            p1 << " " << 
-            p2 << " " << 
-            endl;
-#endif
-    psLJ14ID->Upload();
-    psLJ14Parameter->Upload();
-}
-extern "C" void setExclusions(gpuContext gpu, const vector<vector<int> >& exclusions) {
-    if (gpu->exclusions.size() > 0) {
-        bool ok = (exclusions.size() == gpu->exclusions.size());
-        for (int i = 0; i < (int) exclusions.size() && ok; i++) {
-            if (exclusions[i].size() != gpu->exclusions[i].size())
-                ok = false;
-            else {
-                for (int j = 0; j < (int) exclusions[i].size(); j++)
-                    if (find(gpu->exclusions[i].begin(), gpu->exclusions[i].end(), exclusions[i][j]) == gpu->exclusions[i].end())
-                        ok = false;
-            }
-        }
-        if (!ok)
-            throw OpenMMException("All nonbonded forces must have identical sets of exceptions");
-    }
-    gpu->exclusions = exclusions;
-}
-extern "C"
-void gpuSetCoulombParameters(gpuContext gpu, float epsfac, const vector<int>& atom, const vector<float>& c6, const vector<float>& c12, const vector<float>& q,
-        const vector<char>& symbol, const vector<vector<int> >& exclusions, CudaNonbondedMethod method)
-{
-    unsigned int coulombs = c6.size();
-    gpu->sim.epsfac = epsfac;
-    gpu->sim.nonbondedMethod = method;
-    if (coulombs > 0)
-        setExclusions(gpu, exclusions);
-    for (unsigned int i = 0; i < coulombs; i++)
-    {
-            float p0 = q[i];
-            float p1 = 0.5f, p2 = 0.0f;               
-            if ((c6[i] > 0.0f) && (c12[i] > 0.0f))
-            {
-                p1 = 0.5f * pow(c12[i] / c6[i], 1.0f / 6.0f);
-                p2 = c6[i] * sqrt(1.0f / c12[i]);
-            }
-            if (symbol.size() > 0)
-                gpu->pAtomSymbol[i] = symbol[i];
-            (*gpu->psPosq4)[i].w = p0;
-            (*gpu->psSigEps2)[i].x = p1;
-            (*gpu->psSigEps2)[i].y = p2;
-    }
-    // Dummy out extra atom data
-    for (unsigned int i = gpu->natoms; i < gpu->sim.paddedNumberOfAtoms; i++)
-    {
-        (*gpu->psPosq4)[i].x       = 100000.0f + i * 10.0f;
-        (*gpu->psPosq4)[i].y       = 100000.0f + i * 10.0f;
-        (*gpu->psPosq4)[i].z       = 100000.0f + i * 10.0f;
-        (*gpu->psPosq4)[i].w       = 0.0f;
-        (*gpu->psSigEps2)[i].x     = 0.0f;
-        (*gpu->psSigEps2)[i].y     = 0.0f;
-    }
-    gpu->psPosq4->Upload();
-    gpu->psSigEps2->Upload();
-}
-extern "C"
-void gpuSetNonbondedCutoff(gpuContext gpu, float cutoffDistance, float solventDielectric)
-{
-    if (gpu->sim.nonbondedCutoff != 0.0f && gpu->sim.nonbondedCutoff != cutoffDistance)
-        throw OpenMMException("All nonbonded forces must use the same cutoff");
-    gpu->sim.nonbondedCutoff = cutoffDistance;
-    gpu->sim.nonbondedCutoffSqr = cutoffDistance*cutoffDistance;
-    gpu->sim.reactionFieldK = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0f)/(2.0f*solventDielectric+1.0f);
-    gpu->sim.reactionFieldC = (1.0f / cutoffDistance)*(3.0f*solventDielectric)/(2.0f*solventDielectric+1.0f);
-}
-extern "C"
-void gpuSetTabulatedFunction(gpuContext gpu, int index, const string& name, const vector<double>& values, double min, double max)
-{
-    if (index < 0 || index >= MAX_TABULATED_FUNCTIONS) {
-        stringstream str;
-        str << "Only " << MAX_TABULATED_FUNCTIONS << " tabulated functions are supported";
-        throw OpenMMException(str.str());
-    }
-    if (gpu->tabulatedFunctions[index].coefficients != NULL)
-        delete gpu->tabulatedFunctions[index].coefficients;
-    CUDAStream<float4>* coeff = new CUDAStream<float4>((int) values.size()-1, 1, "TabulatedFunction");
-    gpu->tabulatedFunctions[index].coefficients = coeff;
-    gpu->sim.pTabulatedFunctionCoefficients[index] = coeff->_pDevData;
-    gpu->tabulatedFunctions[index].name = name;
-    gpu->tabulatedFunctions[index].min = min;
-    gpu->tabulatedFunctions[index].max = max;
-    gpu->tabulatedFunctionsChanged = true;
-    // Compute the spline coefficients.
-    int numValues = values.size();
-    vector<double> x(numValues), derivs;
-    for (int i = 0; i < numValues; i++)
-        x[i] = min+i*(max-min)/(numValues-1);
-    OpenMM::SplineFitter::createNaturalSpline(x, values, derivs);
-    for (int i = 0; i < (int) values.size()-1; i++)
-        (*coeff)[i] = make_float4((float) values[i], (float) values[i+1], (float) (derivs[i]/6.0), (float) (derivs[i+1]/6.0));
-    coeff->Upload();
-}
-extern "C"
-void gpuSetCustomBondParameters(gpuContext gpu, const vector<int>& bondAtom1, const vector<int>& bondAtom2, const vector<vector<double> >& bondParams,
-            const string& energyExp, const vector<string>& paramNames, const vector<string>& globalParamNames)
-{
-    if (paramNames.size() > 4)
-        throw OpenMMException("CudaPlatform only supports four per-bond parameters for custom bond forces");
-    if (globalParamNames.size() > 8)
-        throw OpenMMException("CudaPlatform only supports eight global parameters for custom bond forces");
-    if (gpu->psCustomBondID != NULL)
-        throw OpenMMException("CudaPlatform only supports a single CustomBondForce per System");
-    gpu->sim.customBonds = bondAtom1.size();
-    gpu->sim.customBondParameters = paramNames.size();
-    gpu->psCustomBondID = new CUDAStream<int4>(gpu->sim.customBonds, 1, "CustomBondId");
-    gpu->sim.pCustomBondID = gpu->psCustomBondID->_pDevData;
-    gpu->psCustomBondParams = new CUDAStream<float4>(gpu->sim.customBonds, 1, "CustomBondParams");
-    gpu->sim.pCustomBondParams = gpu->psCustomBondParams->_pDevData;
-    vector<int> forceBufferCounter(gpu->natoms, 0);
-    for (int i = 0; i < (int) bondAtom1.size(); i++) {
-        (*gpu->psCustomBondID)[i].x = bondAtom1[i];
-        (*gpu->psCustomBondID)[i].y = bondAtom2[i];
-        (*gpu->psCustomBondID)[i].z = forceBufferCounter[bondAtom1[i]]++;
-        (*gpu->psCustomBondID)[i].w = forceBufferCounter[bondAtom2[i]]++;
-        if (bondParams[i].size() > 0)
-            (*gpu->psCustomBondParams)[i].x = (float) bondParams[i][0];
-        if (bondParams[i].size() > 1)
-            (*gpu->psCustomBondParams)[i].y = (float) bondParams[i][1];
-        if (bondParams[i].size() > 2)
-            (*gpu->psCustomBondParams)[i].z = (float) bondParams[i][2];
-        if (bondParams[i].size() > 3)
-            (*gpu->psCustomBondParams)[i].w = (float) bondParams[i][3];
-    }
-    gpu->psCustomBondID->Upload();
-    gpu->psCustomBondParams->Upload();
-    for (int i = 0; i < (int) forceBufferCounter.size(); i++)
-        if (forceBufferCounter[i] > (int) gpu->pOutputBufferCounter[i])
-            gpu->pOutputBufferCounter[i] = forceBufferCounter[i];
-    // Create the Expressions.
-    vector<string> variables;
-    variables.push_back("r");
-    for (int i = 0; i < (int) paramNames.size(); i++)
-        variables.push_back(paramNames[i]);
-    SetCustomBondEnergyExpression(createExpression<256>(gpu, energyExp, Lepton::Parser::parse(energyExp).optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
-    SetCustomBondForceExpression(createExpression<256>(gpu, energyExp, Lepton::Parser::parse(energyExp).differentiate("r").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
-}
-extern "C"
-void gpuSetCustomAngleParameters(gpuContext gpu, const vector<int>& angleAtom1, const vector<int>& angleAtom2, const vector<int>& angleAtom3, const vector<vector<double> >& angleParams,
-            const string& energyExp, const vector<string>& paramNames, const vector<string>& globalParamNames)
-{
-    if (paramNames.size() > 4)
-        throw OpenMMException("CudaPlatform only supports four per-angle parameters for custom angle forces");
-    if (globalParamNames.size() > 8)
-        throw OpenMMException("CudaPlatform only supports eight global parameters for custom angle forces");
-    if (gpu->psCustomAngleID1 != NULL)
-        throw OpenMMException("CudaPlatform only supports a single CustomAngleForce per System");
-    gpu->sim.customAngles = angleAtom1.size();
-    gpu->sim.customAngleParameters = paramNames.size();
-    gpu->psCustomAngleID1 = new CUDAStream<int4>(gpu->sim.customAngles, 1, "CustomAngleId1");
-    gpu->sim.pCustomAngleID1 = gpu->psCustomAngleID1->_pDevData;
-    gpu->psCustomAngleID2 = new CUDAStream<int2>(gpu->sim.customAngles, 1, "CustomAngleId2");
-    gpu->sim.pCustomAngleID2 = gpu->psCustomAngleID2->_pDevData;
-    gpu->psCustomAngleParams = new CUDAStream<float4>(gpu->sim.customAngles, 1, "CustomAngleParams");
-    gpu->sim.pCustomAngleParams = gpu->psCustomAngleParams->_pDevData;
-    vector<int> forceBufferCounter(gpu->natoms, 0);
-    for (int i = 0; i < (int) angleAtom1.size(); i++) {
-        (*gpu->psCustomAngleID1)[i].x = angleAtom1[i];
-        (*gpu->psCustomAngleID1)[i].y = angleAtom2[i];
-        (*gpu->psCustomAngleID1)[i].z = angleAtom3[i];
-        (*gpu->psCustomAngleID1)[i].w = forceBufferCounter[angleAtom1[i]]++;
-        (*gpu->psCustomAngleID2)[i].x = forceBufferCounter[angleAtom2[i]]++;
-        (*gpu->psCustomAngleID2)[i].y = forceBufferCounter[angleAtom3[i]]++;
-        if (angleParams[i].size() > 0)
-            (*gpu->psCustomAngleParams)[i].x = (float) angleParams[i][0];
-        if (angleParams[i].size() > 1)
-            (*gpu->psCustomAngleParams)[i].y = (float) angleParams[i][1];
-        if (angleParams[i].size() > 2)
-            (*gpu->psCustomAngleParams)[i].z = (float) angleParams[i][2];
-        if (angleParams[i].size() > 3)
-            (*gpu->psCustomAngleParams)[i].w = (float) angleParams[i][3];
-    }
-    gpu->psCustomAngleID1->Upload();
-    gpu->psCustomAngleID2->Upload();
-    gpu->psCustomAngleParams->Upload();
-    for (int i = 0; i < (int) forceBufferCounter.size(); i++)
-        if (forceBufferCounter[i] > (int) gpu->pOutputBufferCounter[i])
-            gpu->pOutputBufferCounter[i] = forceBufferCounter[i];
-    // Create the Expressions.
-    vector<string> variables;
-    variables.push_back("theta");
-    for (int i = 0; i < (int) paramNames.size(); i++)
-        variables.push_back(paramNames[i]);
-    SetCustomAngleEnergyExpression(createExpression<256>(gpu, energyExp, Lepton::Parser::parse(energyExp).optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
-    SetCustomAngleForceExpression(createExpression<256>(gpu, energyExp, Lepton::Parser::parse(energyExp).differentiate("theta").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
-}
-extern "C"
-void gpuSetCustomTorsionParameters(gpuContext gpu, const vector<int>& torsionAtom1, const vector<int>& torsionAtom2, const vector<int>& torsionAtom3, const vector<int>& torsionAtom4, const vector<vector<double> >& torsionParams,
-            const string& energyExp, const vector<string>& paramNames, const vector<string>& globalParamNames)
-{
-    if (paramNames.size() > 4)
-        throw OpenMMException("CudaPlatform only supports four per-torsion parameters for custom torsion forces");
-    if (globalParamNames.size() > 8)
-        throw OpenMMException("CudaPlatform only supports eight global parameters for custom torsion forces");
-    if (gpu->psCustomTorsionID1 != NULL)
-        throw OpenMMException("CudaPlatform only supports a single CustomTorsionForce per System");
-    gpu->sim.customTorsions = torsionAtom1.size();
-    gpu->sim.customTorsionParameters = paramNames.size();
-    gpu->psCustomTorsionID1 = new CUDAStream<int4>(gpu->sim.customTorsions, 1, "CustomTorsionId1");
-    gpu->sim.pCustomTorsionID1 = gpu->psCustomTorsionID1->_pDevData;
-    gpu->psCustomTorsionID2 = new CUDAStream<int4>(gpu->sim.customTorsions, 1, "CustomTorsionId2");
-    gpu->sim.pCustomTorsionID2 = gpu->psCustomTorsionID2->_pDevData;
-    gpu->psCustomTorsionParams = new CUDAStream<float4>(gpu->sim.customTorsions, 1, "CustomTorsionParams");
-    gpu->sim.pCustomTorsionParams = gpu->psCustomTorsionParams->_pDevData;
-    vector<int> forceBufferCounter(gpu->natoms, 0);
-    for (int i = 0; i < (int) torsionAtom1.size(); i++) {
-        (*gpu->psCustomTorsionID1)[i].x = torsionAtom1[i];
-        (*gpu->psCustomTorsionID1)[i].y = torsionAtom2[i];
-        (*gpu->psCustomTorsionID1)[i].z = torsionAtom3[i];
-        (*gpu->psCustomTorsionID1)[i].w = torsionAtom4[i];
-        (*gpu->psCustomTorsionID2)[i].x = forceBufferCounter[torsionAtom1[i]]++;
-        (*gpu->psCustomTorsionID2)[i].y = forceBufferCounter[torsionAtom2[i]]++;
-        (*gpu->psCustomTorsionID2)[i].z = forceBufferCounter[torsionAtom3[i]]++;
-        (*gpu->psCustomTorsionID2)[i].w = forceBufferCounter[torsionAtom4[i]]++;
-        if (torsionParams[i].size() > 0)
-            (*gpu->psCustomTorsionParams)[i].x = (float) torsionParams[i][0];
-        if (torsionParams[i].size() > 1)
-            (*gpu->psCustomTorsionParams)[i].y = (float) torsionParams[i][1];
-        if (torsionParams[i].size() > 2)
-            (*gpu->psCustomTorsionParams)[i].z = (float) torsionParams[i][2];
-        if (torsionParams[i].size() > 3)
-            (*gpu->psCustomTorsionParams)[i].w = (float) torsionParams[i][3];
-    }
-    gpu->psCustomTorsionID1->Upload();
-    gpu->psCustomTorsionID2->Upload();
-    gpu->psCustomTorsionParams->Upload();
-    for (int i = 0; i < (int) forceBufferCounter.size(); i++)
-        if (forceBufferCounter[i] > (int) gpu->pOutputBufferCounter[i])
-            gpu->pOutputBufferCounter[i] = forceBufferCounter[i];
-    // Create the Expressions.
-    vector<string> variables;
-    variables.push_back("theta");
-    for (int i = 0; i < (int) paramNames.size(); i++)
-        variables.push_back(paramNames[i]);
-    SetCustomTorsionEnergyExpression(createExpression<256>(gpu, energyExp, Lepton::Parser::parse(energyExp).optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
-    SetCustomTorsionForceExpression(createExpression<256>(gpu, energyExp, Lepton::Parser::parse(energyExp).differentiate("theta").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
-}
-extern "C"
-void gpuSetCustomExternalParameters(gpuContext gpu, const vector<int>& atomIndex, const vector<vector<double> >& atomParams,
-            const string& energyExp, const vector<string>& paramNames, const vector<string>& globalParamNames)
-{
-    if (paramNames.size() > 4)
-        throw OpenMMException("CudaPlatform only supports four per-particle parameters for custom external forces");
-    if (globalParamNames.size() > 8)
-        throw OpenMMException("CudaPlatform only supports eight global parameters for custom external forces");
-    if (gpu->psCustomExternalID != NULL)
-        throw OpenMMException("CudaPlatform only supports a single CustomExternalForce per System");
-    gpu->sim.customExternals = atomIndex.size();
-    gpu->sim.customExternalParameters = paramNames.size();
-    gpu->psCustomExternalID = new CUDAStream<int>(gpu->sim.customExternals, 1, "CustomExternalId");
-    gpu->sim.pCustomExternalID = gpu->psCustomExternalID->_pDevData;
-    gpu->psCustomExternalParams = new CUDAStream<float4>(gpu->sim.customExternals, 1, "CustomExternalParams");
-    gpu->sim.pCustomExternalParams = gpu->psCustomExternalParams->_pDevData;
-    for (int i = 0; i < (int) atomIndex.size(); i++) {
-        (*gpu->psCustomExternalID)[i] = atomIndex[i];
-        if (atomParams[i].size() > 0)
-            (*gpu->psCustomExternalParams)[i].x = (float) atomParams[i][0];
-        if (atomParams[i].size() > 1)
-            (*gpu->psCustomExternalParams)[i].y = (float) atomParams[i][1];
-        if (atomParams[i].size() > 2)
-            (*gpu->psCustomExternalParams)[i].z = (float) atomParams[i][2];
-        if (atomParams[i].size() > 3)
-            (*gpu->psCustomExternalParams)[i].w = (float) atomParams[i][3];
-    }
-    gpu->psCustomExternalID->Upload();
-    gpu->psCustomExternalParams->Upload();
-    // Create the Expressions.
-    vector<string> variables;
-    variables.push_back("x");
-    variables.push_back("y");
-    variables.push_back("z");
-    for (int i = 0; i < (int) paramNames.size(); i++)
-        variables.push_back(paramNames[i]);
-    SetCustomExternalEnergyExpression(createExpression<256>(gpu, energyExp, Lepton::Parser::parse(energyExp).optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
-    SetCustomExternalForceExpressions(createExpression<256>(gpu, energyExp, Lepton::Parser::parse(energyExp).differentiate("x").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize),
-                                  createExpression<256>(gpu, energyExp, Lepton::Parser::parse(energyExp).differentiate("y").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize),
-                                  createExpression<256>(gpu, energyExp, Lepton::Parser::parse(energyExp).differentiate("z").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
-}
-extern "C"
-void gpuSetCustomNonbondedParameters(gpuContext gpu, const vector<vector<double> >& parameters, const vector<vector<int> >& exclusions,
-            CudaNonbondedMethod method, float cutoffDistance, const string& energyExp,
-            const vector<string>& paramNames, const vector<string>& globalParamNames)
-{
-    if (gpu->sim.nonbondedCutoff != 0.0f && gpu->sim.nonbondedCutoff != cutoffDistance)
-        throw OpenMMException("All nonbonded forces must use the same cutoff");
-    if (paramNames.size() > 4)
-        throw OpenMMException("CudaPlatform only supports four per-atom parameters for custom nonbonded forces");
-    if (globalParamNames.size() > 8)
-        throw OpenMMException("CudaPlatform only supports eight global parameters for custom nonbonded forces");
-    gpu->sim.nonbondedCutoff = cutoffDistance;
-    gpu->sim.nonbondedCutoffSqr = cutoffDistance*cutoffDistance;
-    gpu->sim.customNonbondedMethod = method;
-    gpu->sim.customParameters = paramNames.size();
-    setExclusions(gpu, exclusions);
-    gpu->psCustomParams = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "CustomParams");
-    gpu->sim.pCustomParams = gpu->psCustomParams->_pDevData;
-    for (int i = 0; i < (int) parameters.size(); i++) {
-        if (parameters[i].size() > 0)
-            (*gpu->psCustomParams)[i].x = (float) parameters[i][0];
-        if (parameters[i].size() > 1)
-            (*gpu->psCustomParams)[i].y = (float) parameters[i][1];
-        if (parameters[i].size() > 2)
-            (*gpu->psCustomParams)[i].z = (float) parameters[i][2];
-        if (parameters[i].size() > 3)
-            (*gpu->psCustomParams)[i].w = (float) parameters[i][3];
-    }
-    gpu->psCustomParams->Upload();
-    // This class serves as a placeholder for custom functions in expressions.
-    class FunctionPlaceholder : public Lepton::CustomFunction {
-    public:
-        int getNumArguments() const {
-            return 1;
-        }
-        double evaluate(const double* arguments) const {
-            return 0.0;
-        }
-        double evaluateDerivative(const double* arguments, const int* derivOrder) const {
-            return 0.0;
-        }
-        CustomFunction* clone() const {
-            return new FunctionPlaceholder();
-        }
-    };
-    // Record the tabulated functions, which were previously set with calls to gpuSetTabulatedFunction().
-    FunctionPlaceholder* fp = new FunctionPlaceholder();
-    map<string, Lepton::CustomFunction*> functions;
-    gpu->psTabulatedFunctionParams = new CUDAStream<float4>(MAX_TABULATED_FUNCTIONS, 1, "TabulatedFunctionRange");
-    gpu->sim.pTabulatedFunctionParams = gpu->psTabulatedFunctionParams->_pDevData;
-    for (int i = 0; i < MAX_TABULATED_FUNCTIONS; i++) {
-        gpuTabulatedFunction& func = gpu->tabulatedFunctions[i];
-        if (func.coefficients != NULL) {
-            (*gpu->psTabulatedFunctionParams)[i] = make_float4((float) func.min, (float) func.max, (float) (func.coefficients->_length/(func.max-func.min)), (float) (func.coefficients->_length-1));
-            functions[func.name] = fp;
-        }
-    }
-    gpu->psTabulatedFunctionParams->Upload();
-    // Create the Expressions.
-    vector<string> variables;
-    for (int j = 1; j < 3; j++) {
-        for (int i = 0; i < (int) paramNames.size(); i++) {
-            stringstream name;
-            name << paramNames[i] << j;
-            variables.push_back(name.str());
-        }
-        for (int i = paramNames.size(); i < 4; i++)
-            variables.push_back("");
-    }
-    variables.push_back("r");
-    SetCustomNonbondedEnergyExpression(createExpression<256>(gpu, energyExp, Lepton::Parser::parse(energyExp, functions).optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
-    SetCustomNonbondedForceExpression(createExpression<256>(gpu, energyExp, Lepton::Parser::parse(energyExp, functions).differentiate("r").optimize().createProgram(), variables, globalParamNames, gpu->sim.customExpressionStackSize));
-    delete fp;
-}
-static void tabulateErfc(gpuContext gpu)
-{
-    int tableSize = 2048;
-    gpu->sim.tabulatedErfcSize = tableSize;
-    gpu->sim.tabulatedErfcScale = tableSize/(gpu->sim.alphaEwald*gpu->sim.nonbondedCutoff);
-    gpu->psTabulatedErfc = new CUDAStream<float>(tableSize, 1, "TabulatedErfc");
-    gpu->sim.pTabulatedErfc = gpu->psTabulatedErfc->_pDevData;
-    for (int i = 0; i < tableSize; ++i)
-        (*gpu->psTabulatedErfc)[i] = (float) erfc(i*(gpu->sim.alphaEwald*gpu->sim.nonbondedCutoff)/tableSize);
-    gpu->psTabulatedErfc->Upload();
-}
-extern "C"
-void gpuSetEwaldParameters(gpuContext gpu, float alpha, int kmaxx, int kmaxy, int kmaxz)
-{
-    gpu->sim.alphaEwald         = alpha;
-    gpu->sim.factorEwald        = -1 / (4*alpha*alpha);
-    gpu->sim.kmaxX              = kmaxx;
-    gpu->sim.kmaxY              = kmaxy;
-    gpu->sim.kmaxZ              = kmaxz;
-    gpu->psEwaldCosSinSum       = new CUDAStream<float2>((gpu->sim.kmaxX*2-1) * (gpu->sim.kmaxY*2-1) * (gpu->sim.kmaxZ*2-1), 1, "EwaldCosSinSum");
-    gpu->sim.pEwaldCosSinSum    = gpu->psEwaldCosSinSum->_pDevStream[0];
-    tabulateErfc(gpu);
-}
-extern "C"
-void gpuSetPMEParameters(gpuContext gpu, float alpha, int gridSizeX, int gridSizeY, int gridSizeZ)
-{
-    gpu->sim.alphaEwald         = alpha;
-    int3 gridSize = make_int3(gridSizeX, gridSizeY, gridSizeZ);
-    gpu->sim.pmeGridSize = gridSize;
-    int3 groupSize = make_int3(2, 4, 4);
-    gpu->sim.pmeGroupSize = groupSize;
-    const int3 numGroups = make_int3((gridSize.x+groupSize.x-1)/groupSize.x, (gridSize.y+groupSize.y-1)/groupSize.y, (gridSize.z+groupSize.z-1)/groupSize.z);
-    const unsigned int totalGroups = numGroups.x*numGroups.y*numGroups.z;
-    cufftPlan3d(&gpu->fftplan, gridSize.x, gridSize.y, gridSize.z, CUFFT_C2C);
-    gpu->psPmeGrid = new CUDAStream<cufftComplex>(gridSize.x*gridSize.y*gridSize.z, 1, "PmeGrid");
-    gpu->sim.pPmeGrid = gpu->psPmeGrid->_pDevData;
-    gpu->psPmeBsplineModuli[0] = new CUDAStream<float>(gridSize.x, 1, "PmeBsplineModuli0");
-    gpu->sim.pPmeBsplineModuli[0] = gpu->psPmeBsplineModuli[0]->_pDevData;
-    gpu->psPmeBsplineModuli[1] = new CUDAStream<float>(gridSize.y, 1, "PmeBsplineModuli1");
-    gpu->sim.pPmeBsplineModuli[1] = gpu->psPmeBsplineModuli[1]->_pDevData;
-    gpu->psPmeBsplineModuli[2] = new CUDAStream<float>(gridSize.z, 1, "PmeBsplineModuli2");
-    gpu->sim.pPmeBsplineModuli[2] = gpu->psPmeBsplineModuli[2]->_pDevData;
-    gpu->psPmeBsplineTheta = new CUDAStream<float4>(PME_ORDER*gpu->natoms, 1, "PmeBsplineTheta");
-    gpu->sim.pPmeBsplineTheta = gpu->psPmeBsplineTheta->_pDevData;
-    gpu->psPmeBsplineDtheta = new CUDAStream<float4>(PME_ORDER*gpu->natoms, 1, "PmeBsplineDtheta");
-    gpu->sim.pPmeBsplineDtheta = gpu->psPmeBsplineDtheta->_pDevData;
-    gpu->psPmeAtomRange = new CUDAStream<int>(gridSize.x*gridSize.y*gridSize.z+1, 1, "PmeAtomRange");
-    gpu->sim.pPmeAtomRange = gpu->psPmeAtomRange->_pDevData;
-    gpu->psPmeAtomGridIndex = new CUDAStream<int2>(gpu->natoms, 1, "PmeAtomGridIndex");
-    gpu->sim.pPmeAtomGridIndex = gpu->psPmeAtomGridIndex->_pDevData;
-    tabulateErfc(gpu);
-    // Initialize the b-spline moduli.
-    int maxSize = max(max(gridSize.x, gridSize.y), gridSize.z);
-    vector<double> data(PME_ORDER);
-    vector<double> ddata(PME_ORDER);
-    vector<double> bsplines_data(maxSize);
-    data[PME_ORDER-1] = 0.0;
-    data[1] = 0.0;
-    data[0] = 1.0;
-    for (int i = 3; i < PME_ORDER; i++)
-    {
-        double div = 1.0/(i-1.0);
-        data[i-1] = 0.0;
-        for (int j = 1; j < (i-1); j++)
-            data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
-        data[0] = div*data[0];
-    }
-    // Differentiate.
-    ddata[0] = -data[0];
-    for (int i = 1; i < PME_ORDER; i++)
-        ddata[i] = data[i-1]-data[i];
-    double div = 1.0/(PME_ORDER-1);
-    data[PME_ORDER-1] = 0.0;
-    for (int i = 1; i < (PME_ORDER-1); i++)
-        data[PME_ORDER-i-1] = div*(i*data[PME_ORDER-i-2]+(PME_ORDER-i)*data[PME_ORDER-i-1]);
-    data[0] = div*data[0];
-    for (int i = 0; i < maxSize; i++)
-        bsplines_data[i] = 0.0;
-    for (int i = 1; i <= PME_ORDER; i++)
-        bsplines_data[i] = data[i-1];
-    // Evaluate the actual bspline moduli for X/Y/Z.
-    for(int dim = 0; dim < 3; dim++)
-    {
-        int ndata = (dim == 0 ? gridSize.x : dim == 1 ? gridSize.y : gridSize.z);
-        for (int i = 0; i < ndata; i++)
-        {
-            double sc = 0.0;
-            double ss = 0.0;
-            for (int j = 0; j < ndata; j++)
-            {
-                double arg = (2.0*M_PI*i*j)/ndata;
-                sc += bsplines_data[j]*cos(arg);
-                ss += bsplines_data[j]*sin(arg);
-            }
-            (*gpu->psPmeBsplineModuli[dim])[i] = (float) (sc*sc+ss*ss);
-        }
-        for (int i = 0; i < ndata; i++)
-        {
-            if ((*gpu->psPmeBsplineModuli[dim])[i] < 1.0e-7)
-                (*gpu->psPmeBsplineModuli[dim])[i] = ((*gpu->psPmeBsplineModuli[dim])[i-1]+(*gpu->psPmeBsplineModuli[dim])[i+1])*0.5f;
-        }
-        gpu->psPmeBsplineModuli[dim]->Upload();
-    }
-}
-extern "C"
-void gpuSetPeriodicBoxSize(gpuContext gpu, float xsize, float ysize, float zsize)
-{
-    gpu->sim.periodicBoxSizeX = xsize;
-    gpu->sim.periodicBoxSizeY = ysize;
-    gpu->sim.periodicBoxSizeZ = zsize;
-    gpu->sim.invPeriodicBoxSizeX = 1.0f/xsize;
-    gpu->sim.invPeriodicBoxSizeY = 1.0f/ysize;
-    gpu->sim.invPeriodicBoxSizeZ = 1.0f/zsize;
-    gpu->sim.recipBoxSizeX = 2.0f*PI/gpu->sim.periodicBoxSizeX;
-    gpu->sim.recipBoxSizeY = 2.0f*PI/gpu->sim.periodicBoxSizeY;
-    gpu->sim.recipBoxSizeZ = 2.0f*PI/gpu->sim.periodicBoxSizeZ;
-    gpu->sim.cellVolume = gpu->sim.periodicBoxSizeX*gpu->sim.periodicBoxSizeY*gpu->sim.periodicBoxSizeZ;
-}
-extern "C"
-void gpuSetObcParameters(gpuContext gpu, float innerDielectric, float solventDielectric, const vector<float>& radius, const vector<float>& scale, const vector<float>& charge)
-{
-    unsigned int atoms = radius.size();
-    gpu->bIncludeGBSA = true;
-    for (unsigned int i = 0; i < atoms; i++)
-    {
-            (*gpu->psObcData)[i].x = radius[i] - dielectricOffset;
-            (*gpu->psObcData)[i].y = scale[i] * (*gpu->psObcData)[i].x;
-            (*gpu->psPosq4)[i].w = charge[i];
-#if (DUMP_PARAMETERS == 1)
-        cout << 
-            i << " " << 
-            (*gpu->psObcData)[i].x << " " <<
-            (*gpu->psObcData)[i].y;
-#endif
-    }
-    // Dummy out extra atom data
-    for (unsigned int i = atoms; i < gpu->sim.paddedNumberOfAtoms; i++)
-    {
-        (*gpu->psBornRadii)[i]     = 0.2f;
-        (*gpu->psObcData)[i].x     = 0.01f;
-        (*gpu->psObcData)[i].y     = 0.01f;
-    }
-    gpu->psBornRadii->Upload();
-    gpu->psObcData->Upload();
-    gpu->psPosq4->Upload();
-    gpu->sim.preFactor = 2.0f*electricConstant*((1.0f/innerDielectric)-(1.0f/solventDielectric))*gpu->sim.forceConversionFactor;
-}
-extern "C"
-void gpuSetGBVIParameters(gpuContext gpu, float innerDielectric, float solventDielectric, const vector<int>& atom, const vector<float>& radius, 
-                          const vector<float>& gamma, const vector<float>& scaledRadii )
-{
-    unsigned int atoms = atom.size();
-    gpu->bIncludeGBVI  = true;
-    double tau         = ((1.0f/innerDielectric)-(1.0f/solventDielectric)); 
-    for (unsigned int i = 0; i < atoms; i++)
-    {
-            (*gpu->psGBVIData)[i].x = radius[i];
-            (*gpu->psGBVIData)[i].y = scaledRadii[i];
-            (*gpu->psGBVIData)[i].z = (float) (tau*gamma[i]);
-            (*gpu->psGBVIData)[i].w = 1.0f;
-(*gpu->psObcData)[i].x  = radius[i];
-(*gpu->psObcData)[i].y  = 0.9f*radius[i];
-#undef DUMP_PARAMETERS
-#define DUMP_PARAMETERS 0
-#if (DUMP_PARAMETERS == 1)
-        (void) fprintf( stderr,"GBVI param: %5u R=%14.7e scaledR=%14.7e gamma*tau=%14.7e bornRadiusScaleFactor=%14.7e\n",
-                        i, (*gpu->psGBVIData)[i].x, (*gpu->psGBVIData)[i].y,
-                        (*gpu->psGBVIData)[i].z, (*gpu->psGBVIData)[i].w ); 
-#endif
-    }
-//(void) fprintf( stderr, "gpuSetGBVIParameters: setting Obc parameters!!!! should be removed.\n" );
-    // Dummy out extra atom data
-    for (unsigned int i = atoms; i < gpu->sim.paddedNumberOfAtoms; i++)
-    {
-        (*gpu->psBornRadii)[i]      = 0.2f;
-        (*gpu->psGBVIData)[i].x     = 0.01f;
-        (*gpu->psGBVIData)[i].y     = 0.01f;
-        (*gpu->psGBVIData)[i].z     = 0.01f;
-        (*gpu->psGBVIData)[i].w     = 1.00f;
-    }
-    gpu->psBornRadii->Upload();
-    gpu->psGBVIData->Upload();
-gpu->psObcData->Upload();
-    gpu->sim.preFactor = 2.0f*electricConstant*((1.0f/innerDielectric)-(1.0f/solventDielectric))*gpu->sim.forceConversionFactor;
-#if (DUMP_PARAMETERS == 1)
-(void) fprintf( stderr, "gpuSetGBVIParameters: preFactor=%14.6e elecCnstnt=%.4f frcCnvrsnFctr=%.4f tau=%.4f.\n",
-                gpu->sim.preFactor, 2.0f*electricConstant, gpu->sim.forceConversionFactor, ((1.0f/innerDielectric)-(1.0f/solventDielectric)) );
-#endif
-}
-static void markShakeClusterInvalid(ShakeCluster& cluster, map<int, ShakeCluster>& allClusters, vector<bool>& invalidForShake)
-{
-    cluster.valid = false;
-    invalidForShake[cluster.centralID] = true;
-    for (int i = 0; i < cluster.size; i++) {
-        invalidForShake[cluster.peripheralID[i]] = true;
-        map<int, ShakeCluster>::iterator otherCluster = allClusters.find(cluster.peripheralID[i]);
-        if (otherCluster != allClusters.end() && otherCluster->second.valid)
-            markShakeClusterInvalid(otherCluster->second, allClusters, invalidForShake);
-    }
-}
-extern "C"
-void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const vector<int>& atom2, const vector<float>& distance,
-        const vector<float>& invMass1, const vector<float>& invMass2, float constraintTolerance)
-{
-    // Create a vector for recording which atoms are handled by SHAKE (or SETTLE).
-    vector<bool> isShakeAtom(gpu->natoms, false);
-    // Find how many constraints each atom is involved in.
-    vector<int> constraintCount(gpu->natoms, 0);
-    for (int i = 0; i < (int)atom1.size(); i++) {
-        constraintCount[atom1[i]]++;
-        constraintCount[atom2[i]]++;
-    }
-    // Identify clusters of three atoms that can be treated with SETTLE.  First, for every
-    // atom that might be part of such a cluster, make a list of the two other atoms it is
-    // connected to.
-    vector<map<int, float> > settleConstraints(gpu->natoms);
-    for (int i = 0; i < (int)atom1.size(); i++) {
-        if (constraintCount[atom1[i]] == 2 && constraintCount[atom2[i]] == 2) {
-            settleConstraints[atom1[i]][atom2[i]] = distance[i];
-            settleConstraints[atom2[i]][atom1[i]] = distance[i];
-        }
-    }
-    // Now remove the ones that don't actually form closed loops of three atoms.
-    vector<int> settleClusters;
-    for (int i = 0; i < (int)settleConstraints.size(); i++) {
-        if (settleConstraints[i].size() == 2) {
-            int partner1 = settleConstraints[i].begin()->first;
-            int partner2 = (++settleConstraints[i].begin())->first;
-            if (settleConstraints[partner1].size() != 2 || settleConstraints[partner2].size() != 2 ||
-                    settleConstraints[partner1].find(partner2) == settleConstraints[partner1].end())
-                settleConstraints[i].clear();
-            else if (i < partner1 && i < partner2)
-                settleClusters.push_back(i);
-        }
-        else
-            settleConstraints[i].clear();
-    }
-    // Record the actual SETTLE clusters.
-    CUDAStream<int4>* psSettleID          = new CUDAStream<int4>((int) settleClusters.size(), 1, "SettleID");
-    gpu->psSettleID                       = psSettleID;
-    gpu->sim.pSettleID                    = psSettleID->_pDevStream[0];
-    CUDAStream<float2>* psSettleParameter = new CUDAStream<float2>((int) settleClusters.size(), 1, "SettleParameter");
-    gpu->psSettleParameter                = psSettleParameter;
-    gpu->sim.pSettleParameter             = psSettleParameter->_pDevStream[0];
-    gpu->sim.settleConstraints            = settleClusters.size();
-      for (int i = 0; i < (int)settleClusters.size(); i++) {
-        int atom1 = settleClusters[i];
-        int atom2 = settleConstraints[atom1].begin()->first;
-        int atom3 = (++settleConstraints[atom1].begin())->first;
-        float dist12 = settleConstraints[atom1].find(atom2)->second;
-        float dist13 = settleConstraints[atom1].find(atom3)->second;
-        float dist23 = settleConstraints[atom2].find(atom3)->second;
-        if (dist12 == dist13) { // atom1 is the central atom
-            (*psSettleID)[i].x = atom1;
-            (*psSettleID)[i].y = atom2;
-            (*psSettleID)[i].z = atom3;
-            (*psSettleParameter)[i].x = dist12;
-            (*psSettleParameter)[i].y = dist23;
-        }
-        else if (dist12 == dist23) { // atom2 is the central atom
-            (*psSettleID)[i].x = atom2;
-            (*psSettleID)[i].y = atom1;
-            (*psSettleID)[i].z = atom3;
-            (*psSettleParameter)[i].x = dist12;
-            (*psSettleParameter)[i].y = dist13;
-        }
-        else if (dist13 == dist23) { // atom3 is the central atom
-            (*psSettleID)[i].x = atom3;
-            (*psSettleID)[i].y = atom1;
-            (*psSettleID)[i].z = atom2;
-            (*psSettleParameter)[i].x = dist13;
-            (*psSettleParameter)[i].y = dist12;
-        }
-        else
-            throw OpenMMException("Two of the three distances constrained with SETTLE must be the same.");
-        isShakeAtom[atom1] = true;
-        isShakeAtom[atom2] = true;
-        isShakeAtom[atom3] = true;
-    }
-    psSettleID->Upload();
-    psSettleParameter->Upload();
-    gpu->sim.settle_threads_per_block     = (gpu->sim.settleConstraints + gpu->sim.blocks - 1) / gpu->sim.blocks;
-    if (gpu->sim.settle_threads_per_block > gpu->sim.max_shake_threads_per_block)
-        gpu->sim.settle_threads_per_block = gpu->sim.max_shake_threads_per_block;
-    if (gpu->sim.settle_threads_per_block < 1)
-        gpu->sim.settle_threads_per_block = 1;
-    // Find clusters consisting of a central atom with up to three peripheral atoms.
-    map<int, ShakeCluster> clusters;
-    vector<bool> invalidForShake(gpu->natoms, false);
-    for (int i = 0; i < (int)atom1.size(); i++) {
-        if (isShakeAtom[atom1[i]])
-            continue; // This is being taken care of with SETTLE.
-        // Determine which is the central atom.
-        bool firstIsCentral;
-        if (constraintCount[atom1[i]] > 1)
-            firstIsCentral = true;
-        else if (constraintCount[atom2[i]] > 1)
-            firstIsCentral = false;
-        else if (atom1[i] < atom2[i])
-            firstIsCentral = true;
-        else
-            firstIsCentral = false;
-        int centralID, peripheralID;
-        float centralInvMass, peripheralInvMass;
-        if (firstIsCentral) {
-            centralID = atom1[i];
-            peripheralID = atom2[i];
-            centralInvMass = invMass1[i];
-            peripheralInvMass = invMass2[i];
-        }
-        else {
-            centralID = atom2[i];
-            peripheralID = atom1[i];
-            centralInvMass = invMass2[i];
-            peripheralInvMass = invMass1[i];
-        }
-        // Add it to the cluster.
-        if (clusters.find(centralID) == clusters.end()) {
-            clusters[centralID] = ShakeCluster(centralID, centralInvMass);
-        }
-        ShakeCluster& cluster = clusters[centralID];
-        cluster.addAtom(peripheralID, distance[i], peripheralInvMass);
-        if (constraintCount[peripheralID] != 1 || invalidForShake[atom1[i]] || invalidForShake[atom2[i]]) {
-            markShakeClusterInvalid(cluster, clusters, invalidForShake);
-            map<int, ShakeCluster>::iterator otherCluster = clusters.find(peripheralID);
-            if (otherCluster != clusters.end() && otherCluster->second.valid)
-                markShakeClusterInvalid(otherCluster->second, clusters, invalidForShake);
-        }
-    }
-    int validShakeClusters = 0;
-    for (map<int, ShakeCluster>::iterator iter = clusters.begin(); iter != clusters.end(); ++iter) {
-        ShakeCluster& cluster = iter->second;
-        if (cluster.valid) {
-            cluster.valid = !invalidForShake[cluster.centralID];
-            for (int i = 0; i < cluster.size; i++)
-                if (invalidForShake[cluster.peripheralID[i]])
-                    cluster.valid = false;
-            if (cluster.valid)
-                ++validShakeClusters;
-        }
-    }
-    // Fill in the Cuda streams.
-    CUDAStream<int4>* psShakeID             = new CUDAStream<int4>(validShakeClusters, 1, "ShakeID");
-    gpu->psShakeID                          = psShakeID;
-    gpu->sim.pShakeID                       = psShakeID->_pDevStream[0];
-    CUDAStream<float4>* psShakeParameter    = new CUDAStream<float4>(validShakeClusters, 1, "ShakeParameter");
-    gpu->psShakeParameter                   = psShakeParameter;
-    gpu->sim.pShakeParameter                = psShakeParameter->_pDevStream[0];
-    gpu->sim.ShakeConstraints               = validShakeClusters;
-    int index = 0;
-    for (map<int, ShakeCluster>::const_iterator iter = clusters.begin(); iter != clusters.end(); ++iter) {
-        const ShakeCluster& cluster = iter->second;
-        if (!cluster.valid)
-            continue;
-        (*psShakeID)[index].x = cluster.centralID;
-        (*psShakeID)[index].y = cluster.peripheralID[0];
-        (*psShakeID)[index].z = cluster.size > 1 ? cluster.peripheralID[1] : -1;
-        (*psShakeID)[index].w = cluster.size > 2 ? cluster.peripheralID[2] : -1;
-        (*psShakeParameter)[index].x = cluster.centralInvMass;
-        (*psShakeParameter)[index].y = 0.5f/(cluster.centralInvMass+cluster.peripheralInvMass);
-        (*psShakeParameter)[index].z = cluster.distance*cluster.distance;
-        (*psShakeParameter)[index].w = cluster.peripheralInvMass;
-        isShakeAtom[cluster.centralID] = true;
-        isShakeAtom[cluster.peripheralID[0]] = true;
-        if (cluster.size > 1)
-            isShakeAtom[cluster.peripheralID[1]] = true;
-        if (cluster.size > 2)
-            isShakeAtom[cluster.peripheralID[2]] = true;
-        ++index;
-    }
-    psShakeID->Upload();
-    psShakeParameter->Upload();
-    gpu->sim.shakeTolerance = constraintTolerance;
-    gpu->sim.shake_threads_per_block     = (gpu->sim.ShakeConstraints + gpu->sim.blocks - 1) / gpu->sim.blocks;
-    if (gpu->sim.shake_threads_per_block > gpu->sim.max_shake_threads_per_block)
-        gpu->sim.shake_threads_per_block = gpu->sim.max_shake_threads_per_block;
-    if (gpu->sim.shake_threads_per_block < 1)
-        gpu->sim.shake_threads_per_block = 1;
-    // Find connected constraints for CCMA.
-    vector<int> ccmaConstraints;
-    for (unsigned i = 0; i < atom1.size(); i++)
-        if (!isShakeAtom[atom1[i]])
-            ccmaConstraints.push_back(i);
-    // Record the connections between constraints.
-    int numCCMA = (int) ccmaConstraints.size();
-    vector<vector<int> > atomConstraints(gpu->natoms);
-    for (int i = 0; i < numCCMA; i++) {
-        atomConstraints[atom1[ccmaConstraints[i]]].push_back(i);
-        atomConstraints[atom2[ccmaConstraints[i]]].push_back(i);
-    }
-    vector<vector<int> > linkedConstraints(numCCMA);
-    for (unsigned atom = 0; atom < atomConstraints.size(); atom++) {
-        for (unsigned i = 0; i < atomConstraints[atom].size(); i++)
-            for (unsigned j = 0; j < i; j++) {
-                int c1 = atomConstraints[atom][i];
-                int c2 = atomConstraints[atom][j];
-                linkedConstraints[c1].push_back(c2);
-                linkedConstraints[c2].push_back(c1);
-            }
-    }
-    int maxLinks = 0;
-    for (unsigned i = 0; i < linkedConstraints.size(); i++)
-        maxLinks = max(maxLinks, (int) linkedConstraints[i].size());
-    int maxAtomConstraints = 0;
-    for (unsigned i = 0; i < atomConstraints.size(); i++)
-        maxAtomConstraints = max(maxAtomConstraints, (int) atomConstraints[i].size());
-    // Compute the constraint coupling matrix
-    vector<vector<int> > atomAngles(gpu->natoms);
-    for (int i = 0; i < (int) gpu->sim.bond_angles; i++)
-        atomAngles[(*gpu->psBondAngleID1)[i].y].push_back(i);
-    vector<vector<pair<int, double> > > matrix(numCCMA);
-    if (numCCMA > 0) {
-        for (int j = 0; j < numCCMA; j++) {
-            for (int k = 0; k < numCCMA; k++) {
-                if (j == k) {
-                    matrix[j].push_back(pair<int, double>(j, 1.0));
-                    continue;
-                }
-                double scale;
-                int cj = ccmaConstraints[j];
-                int ck = ccmaConstraints[k];
-                int atomj0 = atom1[cj];
-                int atomj1 = atom2[cj];
-                int atomk0 = atom1[ck];
-                int atomk1 = atom2[ck];
-                int atoma, atomb, atomc;
-                if (atomj0 == atomk0) {
-                    atoma = atomj1;
-                    atomb = atomj0;
-                    atomc = atomk1;
-                    scale = invMass1[cj]/(invMass1[cj]+invMass2[cj]);
-                }
-                else if (atomj1 == atomk1) {
-                    atoma = atomj0;
-                    atomb = atomj1;
-                    atomc = atomk0;
-                    scale = invMass2[cj]/(invMass1[cj]+invMass2[cj]);
-                }
-                else if (atomj0 == atomk1) {
-                    atoma = atomj1;
-                    atomb = atomj0;
-                    atomc = atomk0;
-                    scale = invMass1[cj]/(invMass1[cj]+invMass2[cj]);
-                }
-                else if (atomj1 == atomk0) {
-                    atoma = atomj0;
-                    atomb = atomj1;
-                    atomc = atomk1;
-                    scale = invMass2[cj]/(invMass1[cj]+invMass2[cj]);
-                }
-                else
-                    continue; // These constraints are not connected.
-                // Look for a third constraint forming a triangle with these two.
-                bool foundConstraint = false;
-                for (int other = 0; other < numCCMA; other++) {
-                    if ((atom1[other] == atoma && atom2[other] == atomc) || (atom1[other] == atomc && atom2[other] == atoma)) {
-                        double d1 = distance[cj];
-                        double d2 = distance[ck];
-                        double d3 = distance[other];
-                        matrix[j].push_back(pair<int, double>(k, scale*(d1*d1+d2*d2-d3*d3)/(2.0*d1*d2)));
-                        foundConstraint = true;
-                        break;
-                    }
-                }
-                if (!foundConstraint) {
-                    // We didn't find one, so look for an angle force field term.
-                    const vector<int>& angleCandidates = atomAngles[atomb];
-                    for (vector<int>::const_iterator iter = angleCandidates.begin(); iter != angleCandidates.end(); iter++) {
-                        int4 atoms = (*gpu->psBondAngleID1)[*iter];
-                        if ((atoms.x == atoma && atoms.z == atomc) || (atoms.z == atoma && atoms.x == atomc)) {
-                            double angle = (*gpu->psBondAngleParameter)[*iter].x;
-                            matrix[j].push_back(pair<int, double>(k, scale*cos(angle*PI/180.0)));
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-        // Invert it using QR.
-        vector<int> matrixRowStart;
-        vector<int> matrixColIndex;
-        vector<double> matrixValue;
-        for (int i = 0; i < numCCMA; i++) {
-            matrixRowStart.push_back(matrixValue.size());
-            for (int j = 0; j < (int) matrix[i].size(); j++) {
-                pair<int, double> element = matrix[i][j];
-                matrixColIndex.push_back(element.first);
-                matrixValue.push_back(element.second);
-            }
-        }
-        matrixRowStart.push_back(matrixValue.size());
-        int *qRowStart, *qColIndex, *rRowStart, *rColIndex;
-        double *qValue, *rValue;
-        int result = QUERN_compute_qr(numCCMA, numCCMA, &matrixRowStart[0], &matrixColIndex[0], &matrixValue[0], NULL,
-                &qRowStart, &qColIndex, &qValue, &rRowStart, &rColIndex, &rValue);
-        vector<double> rhs(numCCMA);
-        matrix.clear();
-        matrix.resize(numCCMA);
-        for (int i = 0; i < numCCMA; i++) {
-            // Extract column i of the inverse matrix.
-            for (int j = 0; j < numCCMA; j++)
-                rhs[j] = (i == j ? 1.0 : 0.0);
-            result = QUERN_multiply_with_q_transpose(numCCMA, qRowStart, qColIndex, qValue, &rhs[0]);
-            result = QUERN_solve_with_r(numCCMA, rRowStart, rColIndex, rValue, &rhs[0], &rhs[0]);
-            for (int j = 0; j < numCCMA; j++) {
-                double value = rhs[j]*distance[ccmaConstraints[i]]/distance[ccmaConstraints[j]];
-                if (abs(value) > 0.05)
-                    matrix[j].push_back(pair<int, double>(i, value));
-            }
-        }
-        QUERN_free_result(qRowStart, qColIndex, qValue);
-        QUERN_free_result(rRowStart, rColIndex, rValue);
-    }
-    int maxRowElements = 0;
-    for (unsigned i = 0; i < matrix.size(); i++)
-        maxRowElements = max(maxRowElements, (int) matrix[i].size());
-    maxRowElements++;
-    // Sort the constraints.
-    vector<int> constraintOrder(numCCMA);
-    for (int i = 0; i < numCCMA; ++i)
-        constraintOrder[i] = i;
-    sort(constraintOrder.begin(), constraintOrder.end(), ConstraintOrderer(atom1, atom2));
-    vector<int> inverseOrder(numCCMA);
-    for (int i = 0; i < numCCMA; ++i)
-        inverseOrder[constraintOrder[i]] = i;
-    for (int i = 0; i < (int)matrix.size(); ++i)
-        for (int j = 0; j < (int)matrix[i].size(); ++j)
-            matrix[i][j].first = inverseOrder[matrix[i][j].first];
-    // Fill in the CUDA streams.
-    CUDAStream<int2>* psCcmaAtoms = new CUDAStream<int2>(numCCMA, 1, "CcmaAtoms");
-    gpu->psCcmaAtoms              = psCcmaAtoms;
-    gpu->sim.pCcmaAtoms           = psCcmaAtoms->_pDevData;
-    CUDAStream<float4>* psCcmaDistance = new CUDAStream<float4>(numCCMA, 1, "CcmaDistance");
-    gpu->psCcmaDistance                = psCcmaDistance;
-    gpu->sim.pCcmaDistance             = psCcmaDistance->_pDevData;
-    CUDAStream<int>* psCcmaAtomConstraints = new CUDAStream<int>(gpu->natoms*maxAtomConstraints, 1, "CcmaAtomConstraints");
-    gpu->psCcmaAtomConstraints             = psCcmaAtomConstraints;
-    gpu->sim.pCcmaAtomConstraints          = psCcmaAtomConstraints->_pDevData;
-    CUDAStream<int>* psCcmaNumAtomConstraints = new CUDAStream<int>(gpu->natoms, 1, "CcmaAtomConstraintsIndex");
-    gpu->psCcmaNumAtomConstraints             = psCcmaNumAtomConstraints;
-    gpu->sim.pCcmaNumAtomConstraints          = psCcmaNumAtomConstraints->_pDevData;
-    CUDAStream<float>* psCcmaDelta1 = new CUDAStream<float>(numCCMA, 1, "CcmaDelta1");
-    gpu->psCcmaDelta1             = psCcmaDelta1;
-    gpu->sim.pCcmaDelta1          = psCcmaDelta1->_pDevData;
-    CUDAStream<float>* psCcmaDelta2 = new CUDAStream<float>(numCCMA, 1, "CcmaDelta2");
-    gpu->psCcmaDelta2             = psCcmaDelta2;
-    gpu->sim.pCcmaDelta2          = psCcmaDelta2->_pDevData;
-    CUDAStream<float>* psCcmaReducedMass = new CUDAStream<float>(numCCMA, 1, "CcmaReducedMass");
-    gpu->psCcmaReducedMass             = psCcmaReducedMass;
-    gpu->sim.pCcmaReducedMass          = psCcmaReducedMass->_pDevData;
-    CUDAStream<unsigned int>* psConstraintMatrixColumn = new CUDAStream<unsigned int>(numCCMA*maxRowElements, 1, "ConstraintMatrixColumn");
-    gpu->psConstraintMatrixColumn               = psConstraintMatrixColumn;
-    gpu->sim.pConstraintMatrixColumn            = psConstraintMatrixColumn->_pDevData;
-    CUDAStream<float>* psConstraintMatrixValue = new CUDAStream<float>(numCCMA*maxRowElements, 1, "ConstraintMatrixValue");
-    gpu->psConstraintMatrixValue             = psConstraintMatrixValue;
-    gpu->sim.pConstraintMatrixValue          = psConstraintMatrixValue->_pDevData;
-    cudaHostAlloc((void**) &gpu->ccmaConvergedHostMarker, sizeof(int), cudaHostAllocMapped);
-    cudaHostGetDevicePointer((void**) &gpu->sim.ccmaConvergedDeviceMarker, (void*) gpu->ccmaConvergedHostMarker, 0);
-    cudaEventCreate(&gpu->ccmaEvent);
-    gpu->sim.ccmaConstraints = numCCMA;
-    for (int i = 0; i < numCCMA; i++) {
-        int index = constraintOrder[i];
-        int c = ccmaConstraints[index];
-        (*psCcmaAtoms)[i].x = atom1[c];
-        (*psCcmaAtoms)[i].y = atom2[c];
-        (*psCcmaDistance)[i].w = distance[c];
-        (*psCcmaReducedMass)[i] = 0.5f/(invMass1[c]+invMass2[c]);
-        for (unsigned int j = 0; j < matrix[index].size(); j++) {
-            (*psConstraintMatrixColumn)[i+j*numCCMA] = matrix[index][j].first;
-            (*psConstraintMatrixValue)[i+j*numCCMA] = (float) matrix[index][j].second;
-        }
-        (*psConstraintMatrixColumn)[i+matrix[index].size()*numCCMA] = numCCMA;
-    }
-    for (unsigned int i = 0; i < atomConstraints.size(); i++) {
-        (*psCcmaNumAtomConstraints)[i] = atomConstraints[i].size();
-        for (unsigned int j = 0; j < atomConstraints[i].size(); j++) {
-            bool forward = (atom1[ccmaConstraints[atomConstraints[i][j]]] == i);
-            (*psCcmaAtomConstraints)[i+j*gpu->natoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1);
-        }
-    }
-    psCcmaAtoms->Upload();
-    psCcmaDistance->Upload();
-    psCcmaReducedMass->Upload();
-    psCcmaAtomConstraints->Upload();
-    psCcmaNumAtomConstraints->Upload();
-    psConstraintMatrixColumn->Upload();
-    psConstraintMatrixValue->Upload();
-    gpu->sim.ccma_threads_per_block = (gpu->sim.ccmaConstraints + gpu->sim.blocks - 1) / gpu->sim.blocks;
-    if (gpu->sim.ccma_threads_per_block > gpu->sim.threads_per_block)
-        gpu->sim.ccma_threads_per_block = gpu->sim.threads_per_block;
-    if (gpu->sim.ccma_threads_per_block < gpu->sim.blocks)
-        gpu->sim.ccma_threads_per_block = gpu->sim.blocks;
-}
-extern "C"
-int gpuAllocateInitialBuffers(gpuContext gpu)
-{
-    gpu->sim.atoms                      = gpu->natoms;
-    gpu->sim.paddedNumberOfAtoms        = ((gpu->sim.atoms + GRID - 1) >> GRIDBITS) << GRIDBITS;
-    gpu->sim.degreesOfFreedom           = 3 * gpu->sim.atoms - 6;
-    gpu->gpAtomTable                    = NULL;
-    gpu->gAtomTypes                     = 0;
-    gpu->psPosq4                        = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "Posq");
-    gpu->sim.stride                     = gpu->psPosq4->_stride;
-    gpu->sim.stride2                    = gpu->sim.stride * 2;
-    gpu->sim.stride3                    = gpu->sim.stride * 3;
-    gpu->sim.stride4                    = gpu->sim.stride * 4;
-    gpu->sim.pPosq                      = gpu->psPosq4->_pDevStream[0];
-    gpu->sim.stride                     = gpu->psPosq4->_stride;
-    gpu->sim.stride2                    = 2 * gpu->sim.stride;
-    gpu->sim.stride3                    = 3 * gpu->sim.stride;
-    gpu->sim.stride4                    = 4 * gpu->sim.stride;
-    gpu->psPosqP4                       = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "PosqP");
-    gpu->sim.pPosqP                     = gpu->psPosqP4->_pDevStream[0];
-    gpu->psOldPosq4                     = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "OldPosq");
-    gpu->sim.pOldPosq                   = gpu->psOldPosq4->_pDevStream[0];
-    gpu->psVelm4                        = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "Velm");
-    gpu->sim.pVelm4                     = gpu->psVelm4->_pDevStream[0];
-    gpu->psBornRadii                    = new CUDAStream<float>(gpu->sim.paddedNumberOfAtoms, 1, "BornRadii");
-    gpu->sim.pBornRadii                 = gpu->psBornRadii->_pDevStream[0];
-    gpu->psObcChain                     = new CUDAStream<float>(gpu->sim.paddedNumberOfAtoms, 1, "ObcChain");
-    gpu->sim.pObcChain                  = gpu->psObcChain->_pDevStream[0];
-    gpu->psSigEps2                      = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms, 1, "SigEps2");
-    gpu->sim.pAttr                      = gpu->psSigEps2->_pDevStream[0];
-    gpu->psObcData                      = new CUDAStream<float2>(gpu->sim.paddedNumberOfAtoms, 1, "ObcData");
-    gpu->sim.pObcData                   = gpu->psObcData->_pDevStream[0];
-    gpu->psGBVIData                     = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, 1, "GBVIData");
-    gpu->sim.pGBVIData                  = gpu->psGBVIData->_pDevStream[0];
-    gpu->psStepSize                     = new CUDAStream<float2>(1, 1, "StepSize");
-    gpu->sim.pStepSize                  = gpu->psStepSize->_pDevStream[0];
-    (*gpu->psStepSize)[0] = make_float2(0.0f, 0.0f);
-    gpu->psStepSize->Upload();
-    gpu->psLangevinParameters           = new CUDAStream<float>(3, 1, "LangevinParameters");
-    gpu->sim.pLangevinParameters        = gpu->psLangevinParameters->_pDevStream[0];
-    gpu->pAtomSymbol                    = new unsigned char[gpu->natoms];
-    gpu->psAtomIndex                    = new CUDAStream<int>(gpu->sim.paddedNumberOfAtoms, 1, "AtomIndex");
-    gpu->sim.pAtomIndex                 = gpu->psAtomIndex->_pDevStream[0];
-    for (int i = 0; i < (int) gpu->sim.paddedNumberOfAtoms; i++)
-        (*gpu->psAtomIndex)[i] = i;
-    gpu->psAtomIndex->Upload();
-    gpu->posCellOffsets.resize(gpu->natoms, make_int3(0, 0, 0));
-    gpu->sim.outputBuffers = 0;
-    // Determine randoms
-    gpu->seed                           = 1;
-    gpu->sim.randomFrames               = 20;
-    gpu->sim.randomIterations           = gpu->sim.randomFrames;
-    gpu->sim.randoms                    = gpu->sim.randomFrames * gpu->sim.paddedNumberOfAtoms;
-    gpu->sim.totalRandoms               = gpu->sim.randoms + gpu->sim.paddedNumberOfAtoms;
-    gpu->psRandom4                      = new CUDAStream<float4>(gpu->sim.totalRandoms, 1, "Random4");
-    gpu->psRandom2                      = new CUDAStream<float2>(gpu->sim.totalRandoms, 1, "Random2");
-    gpu->psRandomPosition               = new CUDAStream<int>(gpu->sim.blocks, 1, "RandomPosition");
-    gpu->psRandomSeed                   = new CUDAStream<uint4>(gpu->sim.blocks * gpu->sim.random_threads_per_block, 1, "RandomSeed");
-    gpu->sim.pRandom4                   = gpu->psRandom4->_pDevStream[0];
-    gpu->sim.pRandom2                   = gpu->psRandom2->_pDevStream[0];
-    gpu->sim.pRandomPosition            = gpu->psRandomPosition->_pDevStream[0];
-    gpu->sim.pRandomSeed                = gpu->psRandomSeed->_pDevStream[0];
-    // Allocate and clear linear momentum buffer
-    gpu->psLinearMomentum = new CUDAStream<float4>(gpu->sim.blocks, 1, "LinearMomentum");
-    gpu->sim.pLinearMomentum = gpu->psLinearMomentum->_pDevStream[0];
-    for (int i = 0; i < (int) gpu->sim.blocks; i++)
-    {
-        (*gpu->psLinearMomentum)[i].x = 0.0f;
-        (*gpu->psLinearMomentum)[i].y = 0.0f;
-        (*gpu->psLinearMomentum)[i].z = 0.0f;
-        (*gpu->psLinearMomentum)[i].w = 0.0f;
-    }
-    gpu->psLinearMomentum->Upload();
-    return 1;
-}
-extern "C"
-void gpuSetPositions(gpuContext gpu, const vector<float>& x, const vector<float>& y, const vector<float>& z)
-{
-    for (int i = 0; i < gpu->natoms; i++)
-    {
-        (*gpu->psPosq4)[i].x = x[i];
-        (*gpu->psPosq4)[i].y = y[i];
-        (*gpu->psPosq4)[i].z = z[i];
-    }
-    gpu->psPosq4->Upload();
-	 // set flag to recalculate Born radii
-	 gpu->bRecalculateBornRadii = true;
-} 
-extern "C"
-void gpuSetVelocities(gpuContext gpu, const vector<float>& x, const vector<float>& y, const vector<float>& z)
-{
-    for (int i = 0; i < gpu->natoms; i++)
-    {
-        (*gpu->psVelm4)[i].x = x[i];
-        (*gpu->psVelm4)[i].y = y[i];
-        (*gpu->psVelm4)[i].z = z[i];
-    }
-    gpu->psVelm4->Upload();
-} 
-extern "C"
-void gpuSetMass(gpuContext gpu, const vector<float>& mass)
-{
-    float totalMass = 0.0f;
-    for (int i = 0; i < gpu->natoms; i++)
-    {
-        (*gpu->psVelm4)[i].w = 1.0f/mass[i];
-        totalMass += mass[i];
-    }
-    gpu->sim.inverseTotalMass = 1.0f / totalMass;
-    gpu->psVelm4->Upload();
-} 
-extern "C"
-void gpuInitializeRandoms(gpuContext gpu)
-{
-    for (int i = 0; i < (int) gpu->sim.blocks; i++)
-    {
-        (*gpu->psRandomPosition)[i] = 0;
-    }
-    int seed = gpu->seed | ((gpu->seed ^ 0xffffffff) << 16);
-#if 0
-    srand(seed);
-    for (int i = 0; i < (int) (gpu->sim.blocks * gpu->sim.random_threads_per_block); i++)
-    {
-        (*gpu->psRandomSeed)[i].x = rand();
-        (*gpu->psRandomSeed)[i].y = rand();
-        (*gpu->psRandomSeed)[i].z = rand();
-        (*gpu->psRandomSeed)[i].w = rand();
-    }
-#else
-    RNG rng(seed);
-    for (int i = 0; i < (int) (gpu->sim.blocks * gpu->sim.random_threads_per_block); i++)
-    {
-        (*gpu->psRandomSeed)[i].x = rng.rand_int();
-        (*gpu->psRandomSeed)[i].y = rng.rand_int();
-        (*gpu->psRandomSeed)[i].z = rng.rand_int();
-        (*gpu->psRandomSeed)[i].w = rng.rand_int();
-    }
-#endif
-    gpu->psRandomPosition->Upload();
-    gpu->psRandomSeed->Upload();
-    gpuSetConstants(gpu);
-    kGenerateRandoms(gpu);
-    return;
-}
-extern "C"
-bool gpuIsAvailable()
-{
-    int deviceCount;
-    cudaGetDeviceCount(&deviceCount);
-    return (deviceCount > 0);
-}
-extern "C"
-void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
-{
-    gpuContext gpu = new _gpuContext;
-    int LRFSize = 0;
-    int SMCount = 0;
-    int SMMajor = 0;
-    int SMMinor = 0;
-    // Select which device to use
-    int currentDevice;
-    cudaError_t status = cudaGetDevice(&currentDevice);
-    RTERROR(status, "Error getting CUDA device")
-    if (device != currentDevice)
-        cudaSetDevice(device); // Ignore errors
-    status = cudaGetDevice(&gpu->device);
-    RTERROR(status, "Error getting CUDA device")
-    status = cudaSetDeviceFlags(cudaDeviceMapHost+(useBlockingSync ? cudaDeviceBlockingSync : cudaDeviceScheduleAuto));
-    RTERROR(status, "Error setting device flags")
-    gpu->useBlockingSync = useBlockingSync;
-    // Determine kernel call configuration
-    cudaDeviceProp deviceProp;
-    cudaGetDeviceProperties(&deviceProp, currentDevice);
-    // Determine SM version
-    if (deviceProp.major == 1)
-    {
-        switch (deviceProp.minor)
-        {
-        case 0:
-        case 1:
-            gpu->sm_version = SM_10;
-            gpu->sim.workUnitsPerSM = G8X_NONBOND_WORKUNITS_PER_SM;
-            break;
-        default:
-            gpu->sm_version = SM_12;
-            gpu->sim.workUnitsPerSM = GT2XX_NONBOND_WORKUNITS_PER_SM;
-            break;
-        }
-    } 
-    else
-    {    
-        gpu->sm_version = SM_20;
-        gpu->sim.workUnitsPerSM = GF1XX_NONBOND_WORKUNITS_PER_SM;
-    }
-    if (deviceProp.regsPerBlock == 8192)
-    {
-        gpu->sim.nonbond_threads_per_block          = G8X_NONBOND_THREADS_PER_BLOCK;
-        gpu->sim.bornForce2_threads_per_block       = G8X_BORNFORCE2_THREADS_PER_BLOCK;
-        gpu->sim.max_shake_threads_per_block        = G8X_SHAKE_THREADS_PER_BLOCK;
-        gpu->sim.max_update_threads_per_block       = G8X_UPDATE_THREADS_PER_BLOCK;
-        gpu->sim.max_localForces_threads_per_block  = G8X_LOCALFORCES_THREADS_PER_BLOCK;
-        gpu->sim.threads_per_block                  = G8X_THREADS_PER_BLOCK;
-        gpu->sim.random_threads_per_block           = G8X_RANDOM_THREADS_PER_BLOCK;
-        gpu->blocksPerSM                            = G8X_BLOCKS_PER_SM;
-    }
-    else if (deviceProp.regsPerBlock <= 16384)
-    {
-        gpu->sim.nonbond_threads_per_block          = GT2XX_NONBOND_THREADS_PER_BLOCK;
-        gpu->sim.bornForce2_threads_per_block       = GT2XX_BORNFORCE2_THREADS_PER_BLOCK;
-        gpu->sim.max_shake_threads_per_block        = GT2XX_SHAKE_THREADS_PER_BLOCK;
-        gpu->sim.max_update_threads_per_block       = GT2XX_UPDATE_THREADS_PER_BLOCK;
-        gpu->sim.max_localForces_threads_per_block  = GT2XX_LOCALFORCES_THREADS_PER_BLOCK;
-        gpu->sim.threads_per_block                  = GT2XX_THREADS_PER_BLOCK;
-        gpu->sim.random_threads_per_block           = GT2XX_RANDOM_THREADS_PER_BLOCK;
-        gpu->blocksPerSM                            = GT2XX_BLOCKS_PER_SM;
-    }
-    else
-    {
-        gpu->sim.nonbond_threads_per_block          = GF1XX_NONBOND_THREADS_PER_BLOCK;
-        gpu->sim.bornForce2_threads_per_block       = GF1XX_BORNFORCE2_THREADS_PER_BLOCK;
-        gpu->sim.max_shake_threads_per_block        = GF1XX_SHAKE_THREADS_PER_BLOCK;
-        gpu->sim.max_update_threads_per_block       = GF1XX_UPDATE_THREADS_PER_BLOCK;
-        gpu->sim.max_localForces_threads_per_block  = GF1XX_LOCALFORCES_THREADS_PER_BLOCK;
-        gpu->sim.threads_per_block                  = GF1XX_THREADS_PER_BLOCK;
-        gpu->sim.random_threads_per_block           = GF1XX_RANDOM_THREADS_PER_BLOCK;
-        gpu->blocksPerSM                            = GF1XX_BLOCKS_PER_SM;
-    }
-    gpu->sim.nonbond_blocks = deviceProp.multiProcessorCount*gpu->blocksPerSM;
-    gpu->sim.bornForce2_blocks = deviceProp.multiProcessorCount*gpu->blocksPerSM;
-    gpu->sim.blocks = deviceProp.multiProcessorCount;
-    gpu->sharedMemoryPerBlock = deviceProp.sharedMemPerBlock;
-    gpu->sim.shake_threads_per_block                = gpu->sim.max_shake_threads_per_block;
-    gpu->sim.localForces_threads_per_block          = gpu->sim.max_localForces_threads_per_block;
-    gpu->natoms = numAtoms;
-    gpuAllocateInitialBuffers(gpu);
-    gpu->iterations = 0;
-    gpu->sim.update_threads_per_block               = (gpu->natoms + gpu->sim.blocks - 1) / gpu->sim.blocks;
-    if (gpu->sim.update_threads_per_block > gpu->sim.max_update_threads_per_block)
-        gpu->sim.update_threads_per_block = gpu->sim.max_update_threads_per_block;
-    if (gpu->sim.update_threads_per_block < gpu->psLangevinParameters->_length)
-            gpu->sim.update_threads_per_block = gpu->psLangevinParameters->_length;
-    gpu->sim.bf_reduce_threads_per_block = gpu->sim.update_threads_per_block;
-    gpu->sim.bsf_reduce_threads_per_block = (gpu->sim.stride4 + gpu->natoms + gpu->sim.blocks - 1) / gpu->sim.blocks;
-    gpu->sim.bsf_reduce_threads_per_block = ((gpu->sim.bsf_reduce_threads_per_block + (GRID - 1)) / GRID) * GRID;
-    if (gpu->sim.bsf_reduce_threads_per_block > gpu->sim.threads_per_block)
-        gpu->sim.bsf_reduce_threads_per_block = gpu->sim.threads_per_block;
-    if (gpu->sim.bsf_reduce_threads_per_block < 1)
-        gpu->sim.bsf_reduce_threads_per_block = 1;
-    // Initialize constants to reasonable values
-    gpu->sim.probeRadius            = probeRadius;
-    gpu->sim.surfaceAreaFactor      = surfaceAreaFactor;
-    gpu->sim.electricConstant       = electricConstant;
-    gpu->sim.nonbondedMethod        = NO_CUTOFF;
-    gpu->sim.nonbondedCutoff        = 0.0f;
-    gpu->sim.nonbondedCutoffSqr     = 0.0f;
-    gpu->sim.bigFloat               = 99999999.0f;
-    gpu->sim.forceConversionFactor  = forceConversionFactor;
-    gpu->sim.preFactor              = 2.0f*electricConstant*((1.0f/defaultInnerDielectric)-(1.0f/defaultSolventDielectric))*gpu->sim.forceConversionFactor;
-    gpu->sim.dielectricOffset       = dielectricOffset;
-    gpu->sim.alphaOBC               = alphaOBC;
-    gpu->sim.betaOBC                = betaOBC;
-    gpu->sim.gammaOBC               = gammaOBC;
-    gpu->sim.maxShakeIterations     = 15;
-    gpu->sim.shakeTolerance         = 1.0e-04f * 2.0f;
-    gpu->sim.InvMassJ               = 9.920635e-001f;
-    gpu->grid                       = GRID;
-    gpu->bCalculateCM               = false;
-    gpu->bRemoveCM                  = false;
-    gpu->bRecalculateBornRadii      = true;
-    gpu->bIncludeGBSA               = false;
-    gpu->bIncludeGBVI               = false;
-    gpuInitializeRandoms(gpu);
-    // To be determined later
-    gpu->psLJ14ID                   = NULL;
-    gpu->psForce4                   = NULL;
-    gpu->psEnergy                   = NULL;
-    gpu->sim.pForce4                = NULL;
-    gpu->psBornForce                = NULL;
-    gpu->sim.pBornForce             = NULL;
-    gpu->psBornSum                  = NULL;
-    gpu->sim.pBornSum               = NULL;
-    gpu->psBondID                   = NULL;
-    gpu->psBondParameter            = NULL;
-    gpu->psBondAngleID1             = NULL;
-    gpu->psBondAngleID2             = NULL;
-    gpu->psBondAngleParameter       = NULL;
-    gpu->psDihedralID1              = NULL;
-    gpu->psDihedralID2              = NULL;
-    gpu->psDihedralParameter        = NULL;
-    gpu->psRbDihedralID1            = NULL;
-    gpu->psRbDihedralID2            = NULL;
-    gpu->psRbDihedralParameter1     = NULL;
-    gpu->psRbDihedralParameter2     = NULL;
-    gpu->psLJ14ID                   = NULL;
-    gpu->psLJ14Parameter            = NULL;
-    gpu->psCustomParams             = NULL;
-    gpu->psCustomBondID             = NULL;
-    gpu->psCustomBondParams         = NULL;
-    gpu->psCustomAngleID1           = NULL;
-    gpu->psCustomAngleID2           = NULL;
-    gpu->psCustomAngleParams        = NULL;
-    gpu->psCustomTorsionID1         = NULL;
-    gpu->psCustomTorsionID2         = NULL;
-    gpu->psCustomTorsionParams      = NULL;
-    gpu->psCustomExternalID         = NULL;
-    gpu->psCustomExternalParams     = NULL;
-    gpu->psEwaldCosSinSum           = NULL;
-    gpu->psTabulatedErfc            = NULL;
-    gpu->psPmeGrid                  = NULL;
-    gpu->psPmeBsplineModuli[0]      = NULL;
-    gpu->psPmeBsplineModuli[1]      = NULL;
-    gpu->psPmeBsplineModuli[2]      = NULL;
-    gpu->psPmeBsplineTheta          = NULL;
-    gpu->psPmeBsplineDtheta         = NULL;
-    gpu->psPmeAtomRange             = NULL;
-    gpu->psPmeAtomGridIndex         = NULL;
-    gpu->psShakeID                  = NULL;
-    gpu->psShakeParameter           = NULL;
-    gpu->psSettleID                 = NULL;
-    gpu->psSettleParameter          = NULL;
-    gpu->psExclusion                = NULL;
-    gpu->psExclusionIndex           = NULL;
-    gpu->psWorkUnit                 = NULL;
-    gpu->psInteractingWorkUnit      = NULL;
-    gpu->psInteractionFlag          = NULL;
-    gpu->psInteractionCount         = NULL;
-    gpu->psGridBoundingBox          = NULL;
-    gpu->psGridCenter               = NULL;
-    gpu->psCcmaAtoms                = NULL;
-    gpu->psCcmaDistance             = NULL;
-    gpu->psCcmaAtomConstraints      = NULL;
-    gpu->psCcmaNumAtomConstraints   = NULL;
-    gpu->psCcmaDelta1               = NULL;
-    gpu->psCcmaDelta2               = NULL;
-    gpu->psCcmaReducedMass          = NULL;
-    gpu->psConstraintMatrixColumn   = NULL;
-    gpu->psConstraintMatrixValue    = NULL;
-    gpu->psTabulatedFunctionParams  = NULL;
-    for (int i = 0; i < MAX_TABULATED_FUNCTIONS; i++)
-        gpu->tabulatedFunctions[i].coefficients = NULL;
-    gpu->sim.customExpressionStackSize = 0;
-    gpu->sim.customBonds = 0;
-    gpu->sim.customAngles = 0;
-    gpu->sim.customTorsions = 0;
-    // Initialize output buffer before reading parameters
-    gpu->pOutputBufferCounter       = new unsigned int[gpu->sim.paddedNumberOfAtoms];
-    memset(gpu->pOutputBufferCounter, 0, gpu->sim.paddedNumberOfAtoms * sizeof(unsigned int));
-    return (void*)gpu;
-}
-extern "C"
-void gpuSetLangevinIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature, float errorTol) {
-    gpu->sim.deltaT                 = deltaT;
-    gpu->sim.oneOverDeltaT          = 1.0f/deltaT;
-    gpu->sim.errorTol               = errorTol;
-    gpu->sim.tau                    = tau;
-    gpu->sim.T                      = temperature;
-    gpu->sim.kT                     = BOLTZ * gpu->sim.T;
-    double vscale = exp(-deltaT/tau);
-    double fscale = (1-vscale)*tau;
-    double noisescale = sqrt(2*gpu->sim.kT/tau)*sqrt(0.5*(1-vscale*vscale)*tau);
-    (*gpu->psLangevinParameters)[0] = (float) vscale;
-    (*gpu->psLangevinParameters)[1] = (float) fscale;
-    (*gpu->psLangevinParameters)[2] = (float) noisescale;
-    gpu->psLangevinParameters->Upload();
-    gpu->psStepSize->Download();
-    if ((*gpu->psStepSize)[0].x == 0)
-        (*gpu->psStepSize)[0].x = deltaT;
-    (*gpu->psStepSize)[0].y = deltaT;
-    gpu->psStepSize->Upload();
-}
-extern "C"
-void gpuSetVerletIntegrationParameters(gpuContext gpu, float deltaT, float errorTol) {
-    gpu->sim.deltaT                 = deltaT;
-    gpu->sim.oneOverDeltaT          = 1.0f/deltaT;
-    gpu->sim.errorTol               = errorTol;
-    gpu->psStepSize->Download();
-    if ((*gpu->psStepSize)[0].x == 0)
-        (*gpu->psStepSize)[0].x = deltaT;
-    (*gpu->psStepSize)[0].y = deltaT;
-    gpu->psStepSize->Upload();
-}
-extern "C"
-void gpuSetBrownianIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature) {
-    gpu->sim.deltaT                 = deltaT;
-    gpu->sim.oneOverDeltaT          = 1.0f/deltaT;
-    gpu->sim.tau                    = tau;
-    gpu->sim.tauDeltaT              = gpu->sim.deltaT * gpu->sim.tau;
-    gpu->sim.T                      = temperature;
-    gpu->sim.kT                     = BOLTZ * gpu->sim.T;
-    gpu->sim.noiseAmplitude         = sqrt(2.0f*gpu->sim.kT*deltaT*tau);
-    gpu->psStepSize->Download();
-    if ((*gpu->psStepSize)[0].x == 0)
-        (*gpu->psStepSize)[0].x = deltaT;
-    (*gpu->psStepSize)[0].y = deltaT;
-    gpu->psStepSize->Upload();
-}
-extern "C"
-void gpuSetAndersenThermostatParameters(gpuContext gpu, float temperature, float collisionFrequency) {
-    gpu->sim.T                      = temperature;
-    gpu->sim.kT                     = BOLTZ * gpu->sim.T;
-    gpu->sim.collisionFrequency     = collisionFrequency;
-}
-extern "C"
-void gpuShutDown(gpuContext gpu)
-{
-    // Delete sysmem pointers
-    delete[] gpu->pOutputBufferCounter;
-    delete[] gpu->gpAtomTable;
-    delete[] gpu->pAtomSymbol;
-    // Delete device pointers
-    delete gpu->psPosq4;
-    delete gpu->psPosqP4;
-    delete gpu->psOldPosq4;
-    delete gpu->psVelm4;
-    delete gpu->psForce4;
-    delete gpu->psEnergy;
-    delete gpu->psSigEps2;
-    if (gpu->psCustomParams != NULL)
-        delete gpu->psCustomParams;
-    if (gpu->psCustomBondParams != NULL) {
-        delete gpu->psCustomBondID;
-        delete gpu->psCustomBondParams;
-    }
-    if (gpu->psCustomAngleParams != NULL) {
-        delete gpu->psCustomAngleID1;
-        delete gpu->psCustomAngleID2;
-        delete gpu->psCustomAngleParams;
-    }
-    if (gpu->psCustomTorsionParams != NULL) {
-        delete gpu->psCustomTorsionID1;
-        delete gpu->psCustomTorsionID2;
-        delete gpu->psCustomTorsionParams;
-    }
-    if (gpu->psCustomExternalParams != NULL) {
-        delete gpu->psCustomExternalID;
-        delete gpu->psCustomExternalParams;
-    }
-    if (gpu->psEwaldCosSinSum != NULL)
-        delete gpu->psEwaldCosSinSum;
-    if (gpu->psPmeGrid != NULL) {
-        delete gpu->psPmeGrid;
-        delete gpu->psPmeBsplineModuli[0];
-        delete gpu->psPmeBsplineModuli[1];
-        delete gpu->psPmeBsplineModuli[2];
-        delete gpu->psPmeBsplineTheta;
-        delete gpu->psPmeBsplineDtheta;
-        delete gpu->psPmeAtomRange;
-        delete gpu->psPmeAtomGridIndex;
-        cufftDestroy(gpu->fftplan);
-    }
-    if (gpu->psTabulatedErfc != NULL)
-        delete gpu->psTabulatedErfc;
-    delete gpu->psObcData;
-    delete gpu->psGBVIData;
-    delete gpu->psObcChain;
-    delete gpu->psBornForce;
-    delete gpu->psBornRadii;
-    delete gpu->psBornSum;
-    delete gpu->psBondID;
-    delete gpu->psBondParameter;
-    delete gpu->psBondAngleID1;
-    delete gpu->psBondAngleID2;
-    delete gpu->psBondAngleParameter;
-    delete gpu->psDihedralID1;
-    delete gpu->psDihedralID2;
-    delete gpu->psDihedralParameter;
-    delete gpu->psRbDihedralID1;
-    delete gpu->psRbDihedralID2;
-    delete gpu->psRbDihedralParameter1;
-    delete gpu->psRbDihedralParameter2;
-    delete gpu->psLJ14ID;
-    delete gpu->psLJ14Parameter;
-    delete gpu->psShakeID;
-    delete gpu->psShakeParameter;
-    delete gpu->psSettleID;
-    delete gpu->psSettleParameter;
-    delete gpu->psExclusion;
-    delete gpu->psExclusionIndex;
-    delete gpu->psWorkUnit;
-    delete gpu->psInteractingWorkUnit;
-    delete gpu->psInteractionFlag;
-    delete gpu->psInteractionCount;
-    delete gpu->psStepSize;
-    delete gpu->psLangevinParameters;
-    delete gpu->psRandom4;
-    delete gpu->psRandom2;
-    delete gpu->psRandomPosition;    
-    delete gpu->psRandomSeed;
-    delete gpu->psLinearMomentum;
-    delete gpu->psAtomIndex;
-    delete gpu->psGridBoundingBox;
-    delete gpu->psGridCenter;
-    delete gpu->psCcmaAtoms;
-    delete gpu->psCcmaDistance;
-    delete gpu->psCcmaAtomConstraints;
-    delete gpu->psCcmaNumAtomConstraints;
-    delete gpu->psCcmaDelta1;
-    delete gpu->psCcmaDelta2;
-    delete gpu->psCcmaReducedMass;
-    cudaEventDestroy(gpu->ccmaEvent);
-    delete gpu->psConstraintMatrixColumn;
-    delete gpu->psConstraintMatrixValue;
-    delete gpu->psTabulatedFunctionParams;
-    for (int i = 0; i < MAX_TABULATED_FUNCTIONS; i++)
-        if (gpu->tabulatedFunctions[i].coefficients != NULL)
-            delete gpu->tabulatedFunctions[i].coefficients;
-    if (gpu->compactPlan.valid)
-        destroyCompactionPlan(gpu->compactPlan);
-    // Wrap up
-    delete gpu;
-    cudaThreadExit();
-    return;
-}
-extern "C"
-int gpuBuildOutputBuffers(gpuContext gpu)
-{
-    // Select the number of output buffer to use.
-    gpu->bOutputBufferPerWarp           = true;
-    gpu->sim.nonbondOutputBuffers       = gpu->sim.nonbond_blocks * gpu->sim.nonbond_threads_per_block / GRID;
-    if (gpu->sim.nonbondOutputBuffers >= gpu->sim.paddedNumberOfAtoms/GRID)
-    {
-        // For small systems, it is more efficient to have one output buffer per block of 32 atoms instead of one per warp.
-        gpu->bOutputBufferPerWarp           = false;
-        gpu->sim.nonbondOutputBuffers       = gpu->sim.paddedNumberOfAtoms / GRID;
-    }
-    if (gpu->sim.nonbondOutputBuffers > gpu->sim.outputBuffers)
-        gpu->sim.outputBuffers = gpu->sim.nonbondOutputBuffers;
-    unsigned int outputBuffers = gpu->sim.outputBuffers;
-    for (unsigned int i = 0; i < gpu->sim.paddedNumberOfAtoms; i++)
-    {
-        if (outputBuffers < gpu->pOutputBufferCounter[i])
-        {
-            outputBuffers = gpu->pOutputBufferCounter[i];
-        }
-    }    
-    gpu->sim.outputBuffers      = outputBuffers;
-    gpu->sim.energyOutputBuffers = max(gpu->sim.nonbond_threads_per_block, gpu->sim.localForces_threads_per_block)*gpu->sim.blocks;
-    gpu->psForce4               = new CUDAStream<float4>(gpu->sim.paddedNumberOfAtoms, outputBuffers, "Force");
-    gpu->psEnergy               = new CUDAStream<float>(gpu->sim.energyOutputBuffers, 1, "Energy");
-    gpu->psBornForce            = new CUDAStream<float>(gpu->sim.paddedNumberOfAtoms, gpu->sim.nonbondOutputBuffers, "BornForce");
-    gpu->psBornSum              = new CUDAStream<float>(gpu->sim.paddedNumberOfAtoms, gpu->sim.nonbondOutputBuffers, "BornSum");
-    gpu->sim.pForce4            = gpu->psForce4->_pDevStream[0];
-    gpu->sim.pEnergy            = gpu->psEnergy->_pDevStream[0];
-    gpu->sim.pBornForce         = gpu->psBornForce->_pDevStream[0];
-    gpu->sim.pBornSum           = gpu->psBornSum->_pDevStream[0];
-    // Determine local energy paramter offsets for bonded interactions
-    gpu->sim.bond_offset        =                                  gpu->psBondParameter->_stride;
-    gpu->sim.bond_angle_offset  = gpu->sim.bond_offset           + gpu->psBondAngleParameter->_stride;
-    gpu->sim.dihedral_offset    = gpu->sim.bond_angle_offset     + gpu->psDihedralParameter->_stride;
-    gpu->sim.rb_dihedral_offset = gpu->sim.dihedral_offset       + gpu->psRbDihedralParameter1->_stride;
-    gpu->sim.LJ14_offset        = gpu->sim.rb_dihedral_offset    + gpu->psLJ14Parameter->_stride;
-    gpu->sim.localForces_threads_per_block  = (max(gpu->sim.LJ14_offset, gpu->sim.customBonds) / gpu->sim.blocks + 15) & 0xfffffff0;
-    if (gpu->sim.localForces_threads_per_block > gpu->sim.max_localForces_threads_per_block)
-        gpu->sim.localForces_threads_per_block = gpu->sim.max_localForces_threads_per_block;
-    if (gpu->sim.localForces_threads_per_block < 1)
-        gpu->sim.localForces_threads_per_block = 1;
-    // Flip local force output buffers
-    int flip = outputBuffers - 1;
-    for (int i = 0; i < (int) gpu->sim.bonds; i++)
-    {
-        (*gpu->psBondID)[i].z = flip - (*gpu->psBondID)[i].z;
-        (*gpu->psBondID)[i].w = flip - (*gpu->psBondID)[i].w;
-    }
-    for (int i = 0; i < (int) gpu->sim.bond_angles; i++)
-    {
-        (*gpu->psBondAngleID1)[i].w = flip - (*gpu->psBondAngleID1)[i].w;
-        (*gpu->psBondAngleID2)[i].x = flip - (*gpu->psBondAngleID2)[i].x;
-        (*gpu->psBondAngleID2)[i].y = flip - (*gpu->psBondAngleID2)[i].y;
-    }
-    for (int i = 0; i < (int) gpu->sim.dihedrals; i++)
-    {
-        (*gpu->psDihedralID2)[i].x = flip - (*gpu->psDihedralID2)[i].x;
-        (*gpu->psDihedralID2)[i].y = flip - (*gpu->psDihedralID2)[i].y;
-        (*gpu->psDihedralID2)[i].z = flip - (*gpu->psDihedralID2)[i].z;
-        (*gpu->psDihedralID2)[i].w = flip - (*gpu->psDihedralID2)[i].w;
-    }
-    for (int i = 0; i < (int) gpu->sim.rb_dihedrals; i++)
-    {
-        (*gpu->psRbDihedralID2)[i].x = flip - (*gpu->psRbDihedralID2)[i].x;
-        (*gpu->psRbDihedralID2)[i].y = flip - (*gpu->psRbDihedralID2)[i].y;
-        (*gpu->psRbDihedralID2)[i].z = flip - (*gpu->psRbDihedralID2)[i].z;
-        (*gpu->psRbDihedralID2)[i].w = flip - (*gpu->psRbDihedralID2)[i].w;
-    }
-    for (int i = 0; i < (int) gpu->sim.LJ14s; i++)
-    {
-        (*gpu->psLJ14ID)[i].z = flip - (*gpu->psLJ14ID)[i].z;
-        (*gpu->psLJ14ID)[i].w = flip - (*gpu->psLJ14ID)[i].w;
-    }
-    gpu->psBondID->Upload();
-    gpu->psBondAngleID1->Upload();
-    gpu->psBondAngleID2->Upload();
-    gpu->psDihedralID2->Upload();
-    gpu->psRbDihedralID2->Upload();
-    gpu->psLJ14ID->Upload();
-    return 1;
-}
-extern "C"
-int gpuBuildThreadBlockWorkList(gpuContext gpu)
-{
-    const unsigned int atoms = gpu->sim.paddedNumberOfAtoms;
-    const unsigned int grid = gpu->grid;
-    const unsigned int dim = (atoms + (grid - 1)) / grid;
-    const unsigned int cells = dim * (dim + 1) / 2;
-    CUDAStream<unsigned int>* psWorkUnit = new CUDAStream<unsigned int>(cells, 1u, "WorkUnit");
-    unsigned int* pWorkList = psWorkUnit->_pSysData;
-    gpu->psWorkUnit = psWorkUnit;
-    gpu->sim.pWorkUnit = psWorkUnit->_pDevStream[0];
-    CUDAStream<unsigned int>* psInteractingWorkUnit = new CUDAStream<unsigned int>(cells, 1u, "InteractingWorkUnit");
-    gpu->psInteractingWorkUnit = psInteractingWorkUnit;
-    gpu->sim.pInteractingWorkUnit = psInteractingWorkUnit->_pDevStream[0];
-    CUDAStream<unsigned int>* psInteractionFlag = new CUDAStream<unsigned int>(cells, 1u, "InteractionFlag");
-    gpu->psInteractionFlag = psInteractionFlag;
-    gpu->sim.pInteractionFlag = psInteractionFlag->_pDevStream[0];
-    CUDAStream<size_t>* psInteractionCount = new CUDAStream<size_t>(1, 1u, "InteractionCount");
-    gpu->psInteractionCount = psInteractionCount;
-    gpu->sim.pInteractionCount = psInteractionCount->_pDevStream[0];
-    CUDAStream<float4>* psGridBoundingBox = new CUDAStream<float4>(dim, 1u, "GridBoundingBox");
-    gpu->psGridBoundingBox = psGridBoundingBox;
-    gpu->sim.pGridBoundingBox = psGridBoundingBox->_pDevStream[0];
-    CUDAStream<float4>* psGridCenter = new CUDAStream<float4>(dim, 1u, "GridCenter");
-    gpu->psGridCenter = psGridCenter;
-    gpu->sim.pGridCenter = psGridCenter->_pDevStream[0];
-    gpu->sim.nonbond_workBlock      = gpu->sim.nonbond_threads_per_block / GRID;
-    gpu->sim.bornForce2_workBlock   = gpu->sim.bornForce2_threads_per_block / GRID;
-    gpu->sim.workUnits = cells;
-    // Initialize the plan for doing stream compaction.
-    planCompaction(gpu->compactPlan);
-    // Increase block count if necessary for extra large molecules that would
-    // otherwise overflow the SM workunit buffers
-//    int minimumBlocks = (cells + gpu->sim.workUnitsPerSM - 1) / gpu->sim.workUnitsPerSM;
-//    if ((int) gpu->sim.nonbond_blocks < minimumBlocks)
-//    {
-//        gpu->sim.nonbond_blocks = gpu->sim.nonbond_blocks * ((minimumBlocks + gpu->sim.nonbond_blocks - 1) / gpu->sim.nonbond_blocks);
-//    }
-//    if ((int) gpu->sim.bornForce2_blocks < minimumBlocks)
-//    {
-//        gpu->sim.bornForce2_blocks = gpu->sim.bornForce2_blocks * ((minimumBlocks + gpu->sim.bornForce2_blocks - 1) / gpu->sim.bornForce2_blocks);
-//    }
-    gpu->sim.nbWorkUnitsPerBlock            = cells / gpu->sim.nonbond_blocks;
-    gpu->sim.nbWorkUnitsPerBlockRemainder   = cells - gpu->sim.nonbond_blocks * gpu->sim.nbWorkUnitsPerBlock;
-    gpu->sim.bf2WorkUnitsPerBlock           = cells / gpu->sim.bornForce2_blocks;
-    gpu->sim.bf2WorkUnitsPerBlockRemainder  = cells - gpu->sim.bornForce2_blocks * gpu->sim.bf2WorkUnitsPerBlock;
-    gpu->sim.interaction_threads_per_block = 64;
-    gpu->sim.interaction_blocks = (gpu->sim.workUnits + gpu->sim.interaction_threads_per_block - 1) / gpu->sim.interaction_threads_per_block;
-    if (gpu->sim.interaction_blocks > 8*gpu->sim.blocks)
-        gpu->sim.interaction_blocks = 8*gpu->sim.blocks;
-    // Decrease thread count for extra small molecules to spread computation
-    // across entire chip
-    int activeWorkUnits = gpu->sim.nonbond_blocks * gpu->sim.nonbond_workBlock;
-    if (activeWorkUnits > (int) cells)
-    {
-        int balancedWorkBlock                   = (cells + gpu->sim.nonbond_blocks - 1) / gpu->sim.nonbond_blocks;
-        gpu->sim.nonbond_threads_per_block      = balancedWorkBlock * GRID;
-        gpu->sim.nonbond_workBlock              = balancedWorkBlock;
-    }
-    activeWorkUnits = gpu->sim.bornForce2_blocks * gpu->sim.bornForce2_workBlock;
-    if (activeWorkUnits > (int) cells)
-    {
-        int balancedWorkBlock                   = (cells + gpu->sim.bornForce2_blocks - 1) / gpu->sim.bornForce2_blocks;
-        gpu->sim.bornForce2_threads_per_block   = balancedWorkBlock * GRID;
-        gpu->sim.bornForce2_workBlock           = balancedWorkBlock;
-    }
-    unsigned int count = 0;
-    for (unsigned int y = 0; y < dim; y++)
-    {
-        for (unsigned int x = y; x < dim; x++)
-        {
-            pWorkList[count] = (x << 17) | (y << 2);
-            count++;
-        }
-    }
-    (*gpu->psInteractionCount)[0] = gpu->sim.workUnits;
-    gpu->psInteractionCount->Upload();
-    psWorkUnit->Upload();
-    gpuSetConstants(gpu);
-    return cells;
-}
-extern "C"
-void gpuBuildExclusionList(gpuContext gpu)
-{
-    const unsigned int atoms = gpu->sim.paddedNumberOfAtoms;
-    const unsigned int grid = gpu->grid;
-    const unsigned int dim = atoms/grid;
-    unsigned int* pWorkList = gpu->psWorkUnit->_pSysData;
-    // Mark which work units have exclusions.
-    for (int atom1 = 0; atom1 < (int)gpu->exclusions.size(); ++atom1)
-    {
-        int x = atom1/grid;
-        for (int j = 0; j < (int)gpu->exclusions[atom1].size(); ++j)
-        {
-            int atom2 = gpu->exclusions[atom1][j];
-            int y = atom2/grid;
-            int cell = (x > y ? x+y*dim-y*(y+1)/2 : y+x*dim-x*(x+1)/2);
-            pWorkList[cell] |= 1;
-        }
-    }
-    if ((int)gpu->sim.paddedNumberOfAtoms > gpu->natoms)
-    {
-        int lastBlock = gpu->natoms/grid;
-        for (int i = 0; i < (int)gpu->sim.workUnits; ++i)
-        {
-            int x = pWorkList[i]>>17;
-            int y = (pWorkList[i]>>2)&0x7FFF;
-            if (x == lastBlock || y == lastBlock)
-                pWorkList[i] |= 1;
-        }
-    }
-    // Build a list of indexes for the work units with exclusions.
-    CUDAStream<unsigned int>* psExclusionIndex = new CUDAStream<unsigned int>(gpu->sim.workUnits, 1u, "ExclusionIndex");
-    gpu->psExclusionIndex = psExclusionIndex;
-    unsigned int* pExclusionIndex = psExclusionIndex->_pSysData;
-    gpu->sim.pExclusionIndex = psExclusionIndex->_pDevData;
-    int numWithExclusions = 0;
-    for (int i = 0; i < (int)psExclusionIndex->_length; ++i)
-        if ((pWorkList[i]&1) == 1)
-            pExclusionIndex[i] = (numWithExclusions++)*grid;
-    // Record the exclusion data.
-    CUDAStream<unsigned int>* psExclusion = new CUDAStream<unsigned int>(numWithExclusions*grid, 1u, "Exclusion");
-    gpu->psExclusion = psExclusion;
-    unsigned int* pExclusion = psExclusion->_pSysData;
-    gpu->sim.pExclusion = psExclusion->_pDevData;
-    for (int i = 0; i < (int)psExclusion->_length; ++i)
-        pExclusion[i] = 0xFFFFFFFF;
-    for (int atom1 = 0; atom1 < (int)gpu->exclusions.size(); ++atom1)
-    {
-        int x = atom1/grid;
-        int offset1 = atom1-x*grid;
-        for (int j = 0; j < (int)gpu->exclusions[atom1].size(); ++j)
-        {
-            int atom2 = gpu->exclusions[atom1][j];
-            int y = atom2/grid;
-            int offset2 = atom2-y*grid;
-            if (x > y)
-            {
-                int cell = x+y*dim-y*(y+1)/2;
-                pExclusion[pExclusionIndex[cell]+offset1] &= 0xFFFFFFFF-(1<<offset2);
-            }
-            else
-            {
-                int cell = y+x*dim-x*(x+1)/2;
-                pExclusion[pExclusionIndex[cell]+offset2] &= 0xFFFFFFFF-(1<<offset1);
-            }
-        }
-    }
-    // Mark all interactions that involve a padding atom as being excluded.
-    for (int atom1 = gpu->natoms; atom1 < (int)atoms; ++atom1)
-    {
-        int x = atom1/grid;
-        int offset1 = atom1-x*grid;
-        for (int atom2 = 0; atom2 < (int)atoms; ++atom2)
-        {
-            int y = atom2/grid;
-            int offset2 = atom2-y*grid;
-            if (x >= y)
-            {
-                int cell = x+y*dim-y*(y+1)/2;
-                pExclusion[pExclusionIndex[cell]+offset1] &= 0xFFFFFFFF-(1<<offset2);
-            }
-            if (y >= x)
-            {
-                int cell = y+x*dim-x*(x+1)/2;
-                pExclusion[pExclusionIndex[cell]+offset2] &= 0xFFFFFFFF-(1<<offset1);
-            }
-        }
-    }
-    psExclusion->Upload();
-    psExclusionIndex->Upload();
-    gpu->psWorkUnit->Upload();
-    gpuSetConstants(gpu);
-}
-extern "C"
-int gpuSetConstants(gpuContext gpu)
-{
-    SetCalculateCDLJForcesSim(gpu);
-    SetCalculateCDLJObcGbsaForces1Sim(gpu);
-    SetCalculateCustomNonbondedForcesSim(gpu);
-    SetCalculateCustomBondForcesSim(gpu);
-    SetCalculateCustomAngleForcesSim(gpu);
-    SetCalculateCustomTorsionForcesSim(gpu);
-    SetCalculateCustomExternalForcesSim(gpu);
-    SetCalculateLocalForcesSim(gpu);
-    SetCalculateObcGbsaBornSumSim(gpu);
-    SetCalculateGBVIBornSumSim(gpu);
-    SetCalculateObcGbsaForces2Sim(gpu);
-    SetCalculateGBVIForces2Sim(gpu);
-    SetCalculateAndersenThermostatSim(gpu);
-    SetCalculatePMESim(gpu);
-    SetForcesSim(gpu);
-    SetShakeHSim(gpu);
-    SetLangevinUpdateSim(gpu);
-    SetVerletUpdateSim(gpu);
-    SetBrownianUpdateSim(gpu);
-    SetSettleSim(gpu);
-    SetCCMASim(gpu);
-    SetRandomSim(gpu);
-    return 1;
-}
-static void tagAtomsInMolecule(int atom, int molecule, vector<int>& atomMolecule, vector<vector<int> >& atomBonds)
-{
-    // Recursively tag atoms as belonging to a particular molecule.
-    atomMolecule[atom] = molecule;
-    for (int i = 0; i < (int)atomBonds[atom].size(); i++)
-        if (atomMolecule[atomBonds[atom][i]] == -1)
-            tagAtomsInMolecule(atomBonds[atom][i], molecule, atomMolecule, atomBonds);
-}
-static void findMoleculeGroups(gpuContext gpu)
-{
-    // First make a list of constraints for future use.
-    vector<Constraint> constraints;
-    for (int i = 0; i < (int)gpu->sim.ShakeConstraints; i++)
-    {
-        int atom1 = (*gpu->psShakeID)[i].x;
-        int atom2 = (*gpu->psShakeID)[i].y;
-        int atom3 = (*gpu->psShakeID)[i].z;
-        int atom4 = (*gpu->psShakeID)[i].w;
-        float distance2 = (*gpu->psShakeParameter)[i].z;
-        constraints.push_back(Constraint(atom1, atom2, distance2));
-        if (atom3 != -1)
-            constraints.push_back(Constraint(atom1, atom3, distance2));
-        if (atom4 != -1)
-            constraints.push_back(Constraint(atom1, atom4, distance2));
-    }
-    for (int i = 0; i < (int)gpu->sim.settleConstraints; i++)
-    {
-        int atom1 = (*gpu->psSettleID)[i].x;
-        int atom2 = (*gpu->psSettleID)[i].y;
-        int atom3 = (*gpu->psSettleID)[i].z;
-        float distance12 = (*gpu->psSettleParameter)[i].x;
-        float distance23 = (*gpu->psSettleParameter)[i].y;
-        constraints.push_back(Constraint(atom1, atom2, distance12*distance12));
-        constraints.push_back(Constraint(atom1, atom3, distance12*distance12));
-        constraints.push_back(Constraint(atom2, atom3, distance23*distance23));
-    }
-    for (int i = 0; i < (int)gpu->sim.ccmaConstraints; i++)
-    {
-        int atom1 = (*gpu->psCcmaAtoms)[i].x;
-        int atom2 = (*gpu->psCcmaAtoms)[i].y;
-        float distance2 = (*gpu->psCcmaDistance)[i].w;
-        constraints.push_back(Constraint(atom1, atom2, distance2));
-    }
-    // First make a list of every other atom to which each atom is connect by a bond, constraint, or exclusion.
-    int numAtoms = gpu->natoms;
-    vector<vector<int> > atomBonds(numAtoms);
-    for (int i = 0; i < (int) gpu->forces.size(); i++) {
-        for (int j = 0; j < gpu->forces[i]->getNumParticleGroups(); j++) {
-            vector<int> particles;
-            gpu->forces[i]->getParticlesInGroup(j, particles);
-            for (int k = 0; k < (int) particles.size(); k++)
-                for (int m = 0; m < (int) particles.size(); m++)
-                    if (k != m)
-                        atomBonds[particles[k]].push_back(particles[m]);
-        }
-    }
-    for (int i = 0; i < (int)constraints.size(); i++)
-    {
-        int atom1 = constraints[i].atom1;
-        int atom2 = constraints[i].atom2;
-        atomBonds[atom1].push_back(atom2);
-        atomBonds[atom2].push_back(atom1);
-    }
-    // Now tag atoms by which molecule they belong to.
-    vector<int> atomMolecule(numAtoms, -1);
-    int numMolecules = 0;
-    for (int i = 0; i < numAtoms; i++)
-        if (atomMolecule[i] == -1)
-            tagAtomsInMolecule(i, numMolecules++, atomMolecule, atomBonds);
-    vector<vector<int> > atomIndices(numMolecules);
-    for (int i = 0; i < numAtoms; i++)
-        atomIndices[atomMolecule[i]].push_back(i);
-    // Construct a description of each molecule.
-    vector<Molecule> molecules(numMolecules);
-    for (int i = 0; i < numMolecules; i++)
-    {
-        molecules[i].atoms = atomIndices[i];
-        molecules[i].groups.resize(gpu->forces.size());
-    }
-    for (int i = 0; i < (int) gpu->forces.size(); i++)
-        for (int j = 0; j < gpu->forces[i]->getNumParticleGroups(); j++)
-        {
-            vector<int> particles;
-            gpu->forces[i]->getParticlesInGroup(j, particles);
-            molecules[atomMolecule[particles[0]]].groups[i].push_back(j);
-        }
-    for (int i = 0; i < (int)constraints.size(); i++)
-    {
-        molecules[atomMolecule[constraints[i].atom1]].constraints.push_back(i);
-    }
-    // Sort them into groups of identical molecules.
-    vector<Molecule> uniqueMolecules;
-    vector<vector<int> > moleculeInstances;
-    for (int molIndex = 0; molIndex < (int)molecules.size(); molIndex++)
-    {
-        Molecule& mol = molecules[molIndex];
-        // See if it is identical to another molecule.
-        bool isNew = true;
-        for (int j = 0; j < (int)uniqueMolecules.size() && isNew; j++)
-        {
-            Molecule& mol2 = uniqueMolecules[j];
-            bool identical = (mol.atoms.size() == mol2.atoms.size() && mol.constraints.size() == mol2.constraints.size());
-            // See if the atoms are identical.
-            int atomOffset = mol2.atoms[0]-mol.atoms[0];
-            float4* velm = gpu->psVelm4->_pSysData;
-            for (int i = 0; i < (int) mol.atoms.size() && identical; i++) {
-                if (mol.atoms[i] != mol2.atoms[i]-atomOffset || velm[mol.atoms[i]].w != velm[mol2.atoms[i]].w)
-                    identical = false;
-                for (int k = 0; k < (int) gpu->forces.size(); k++)
-                    if (!gpu->forces[k]->areParticlesIdentical(mol.atoms[i], mol2.atoms[i]))
-                        identical = false;
-            }
-            // See if the constraints are identical.
-            for (int i = 0; i < (int) mol.constraints.size() && identical; i++)
-                if (constraints[mol.constraints[i]].atom1 != constraints[mol2.constraints[i]].atom1-atomOffset ||
-                        constraints[mol.constraints[i]].atom2 != constraints[mol2.constraints[i]].atom2-atomOffset ||
-                        constraints[mol.constraints[i]].distance2 != constraints[mol2.constraints[i]].distance2)
-                    identical = false;
-            // See if the force groups are identical.
-            for (int i = 0; i < (int) gpu->forces.size() && identical; i++)
-            {
-                if (mol.groups[i].size() != mol2.groups[i].size())
-                    identical = false;
-                for (int k = 0; k < (int) mol.groups[i].size() && identical; k++)
-                    if (!gpu->forces[i]->areGroupsIdentical(mol.groups[i][k], mol2.groups[i][k]))
-                        identical = false;
-            }
-            if (identical)
-            {
-                moleculeInstances[j].push_back(mol.atoms[0]);
-                isNew = false;
-            }
-        }
-        if (isNew)
-        {
-            uniqueMolecules.push_back(mol);
-            moleculeInstances.push_back(vector<int>());
-            moleculeInstances[moleculeInstances.size()-1].push_back(mol.atoms[0]);
-        }
-    }
-    gpu->moleculeGroups.resize(moleculeInstances.size());
-    for (int i = 0; i < (int)moleculeInstances.size(); i++)
-    {
-        gpu->moleculeGroups[i].instances = moleculeInstances[i];
-        vector<int>& atoms = uniqueMolecules[i].atoms;
-        gpu->moleculeGroups[i].atoms.resize(atoms.size());
-        for (int j = 0; j < (int)atoms.size(); j++)
-            gpu->moleculeGroups[i].atoms[j] = atoms[j]-atoms[0];
-    }
-}
-extern "C"
-void gpuReorderAtoms(gpuContext gpu)
-{
-    if (gpu->natoms == 0 || gpu->sim.nonbondedCutoffSqr == 0.0)
-        return;
-    if (gpu->moleculeGroups.size() == 0)
-        findMoleculeGroups(gpu);
-    // Find the range of positions and the number of bins along each axis.
-    int numAtoms = gpu->natoms;
-    gpu->psPosq4->Download();
-    gpu->psVelm4->Download();
-    float4* posq = gpu->psPosq4->_pSysData;
-    float4* velm = gpu->psVelm4->_pSysData;
-    float minx = posq[0].x, maxx = posq[0].x;
-    float miny = posq[0].y, maxy = posq[0].y;
-    float minz = posq[0].z, maxz = posq[0].z;
-    if (gpu->sim.nonbondedMethod == PERIODIC || gpu->sim.nonbondedMethod == EWALD || gpu->sim.nonbondedMethod == PARTICLE_MESH_EWALD)
-    {
-        minx = miny = minz = 0.0;
-        maxx = gpu->sim.periodicBoxSizeX;
-        maxy = gpu->sim.periodicBoxSizeY;
-        maxz = gpu->sim.periodicBoxSizeZ;
-    }
-    else
-    {
-        for (int i = 1; i < numAtoms; i++)
-        {
-            minx = min(minx, posq[i].x);
-            maxx = max(maxx, posq[i].x);
-            miny = min(miny, posq[i].y);
-            maxy = max(maxy, posq[i].y);
-            minz = min(minz, posq[i].z);
-            maxz = max(maxz, posq[i].z);
-        }
-    }
-    // Loop over each group of identical molecules and reorder them.
-    vector<int> originalIndex(numAtoms);
-    vector<float4> newPosq(numAtoms);
-    vector<float4> newVelm(numAtoms);
-    vector<int3> newCellOffsets(numAtoms);
-    for (int group = 0; group < (int)gpu->moleculeGroups.size(); group++)
-    {
-        // Find the center of each molecule.
-        gpuMoleculeGroup& mol = gpu->moleculeGroups[group];
-        int numMolecules = mol.instances.size();
-        vector<int>& atoms = mol.atoms;
-        vector<float3> molPos(numMolecules);
-        for (int i = 0; i < numMolecules; i++)
-        {
-            molPos[i].x = 0.0f;
-            molPos[i].y = 0.0f;
-            molPos[i].z = 0.0f;
-            for (int j = 0; j < (int)atoms.size(); j++)
-            {
-                int atom = atoms[j]+mol.instances[i];
-                molPos[i].x += posq[atom].x;
-                molPos[i].y += posq[atom].y;
-                molPos[i].z += posq[atom].z;
-            }
-            molPos[i].x /= atoms.size();
-            molPos[i].y /= atoms.size();
-            molPos[i].z /= atoms.size();
-        }
-        if (gpu->sim.nonbondedMethod == PERIODIC || gpu->sim.nonbondedMethod == EWALD || gpu->sim.nonbondedMethod == PARTICLE_MESH_EWALD)
-        {
-            // Move each molecule position into the same box.
-            for (int i = 0; i < numMolecules; i++)
-            {
-                int xcell = (int) floor(molPos[i].x/gpu->sim.periodicBoxSizeX);
-                int ycell = (int) floor(molPos[i].y/gpu->sim.periodicBoxSizeY);
-                int zcell = (int) floor(molPos[i].z/gpu->sim.periodicBoxSizeZ);
-                float dx = xcell*gpu->sim.periodicBoxSizeX;
-                float dy = ycell*gpu->sim.periodicBoxSizeY;
-                float dz = zcell*gpu->sim.periodicBoxSizeZ;
-                if (dx != 0.0f || dy != 0.0f || dz != 0.0f)
-                {
-                    molPos[i].x -= dx;
-                    molPos[i].y -= dy;
-                    molPos[i].z -= dz;
-                    for (int j = 0; j < (int)atoms.size(); j++)
-                    {
-                        int atom = atoms[j]+mol.instances[i];
-                        posq[atom].x -= dx;
-                        posq[atom].y -= dy;
-                        posq[atom].z -= dz;
-                        gpu->posCellOffsets[atom].x -= xcell;
-                        gpu->posCellOffsets[atom].y -= ycell;
-                        gpu->posCellOffsets[atom].z -= zcell;
-                    }
-                }
-            }
-        }
-        // Select a bin for each molecule, then sort them by bin.
-        bool useHilbert = (numMolecules > 5000 || atoms.size() > 8); // For small systems, a simple zigzag curve works better than a Hilbert curve.
-        float binWidth;
-        if (useHilbert)
-            binWidth = (float)(max(max(maxx-minx, maxy-miny), maxz-minz)/255.0);
-        else
-            binWidth = (float)(0.2*sqrt(gpu->sim.nonbondedCutoffSqr));
-        int xbins = 1 + (int) ((maxx-minx)/binWidth);
-        int ybins = 1 + (int) ((maxy-miny)/binWidth);
-        vector<pair<int, int> > molBins(numMolecules);
-        bitmask_t coords[3];
-        for (int i = 0; i < numMolecules; i++)
-        {
-            int x = (int) ((molPos[i].x-minx)/binWidth);
-            int y = (int) ((molPos[i].y-miny)/binWidth);
-            int z = (int) ((molPos[i].z-minz)/binWidth);
-            int bin;
-            if (useHilbert)
-            {
-                coords[0] = x;
-                coords[1] = y;
-                coords[2] = z;
-                bin = (int) hilbert_c2i(3, 8, coords);
-            }
-            else
-            {
-                int yodd = y&1;
-                int zodd = z&1;
-                bin = z*xbins*ybins;
-                bin += (zodd ? ybins-y : y)*xbins;
-                bin += (yodd ? xbins-x : x);
-            }
-            molBins[i] = pair<int, int>(bin, i);
-        }
-        sort(molBins.begin(), molBins.end());
-        // Reorder the atoms.
-        for (int i = 0; i < numMolecules; i++)
-        {
-            for (int j = 0; j < (int)atoms.size(); j++)
-            {
-                int oldIndex = mol.instances[molBins[i].second]+atoms[j];
-                int newIndex = mol.instances[i]+atoms[j];
-                originalIndex[newIndex] = (*gpu->psAtomIndex)[oldIndex];
-                newPosq[newIndex] = posq[oldIndex];
-                newVelm[newIndex] = velm[oldIndex];
-                newCellOffsets[newIndex] = gpu->posCellOffsets[oldIndex];
-            }
-        }
-    }
-    // Update the streams.
-    for (int i = 0; i < numAtoms; i++) {
-        posq[i] = newPosq[i];
-        velm[i] = newVelm[i];
-        (*gpu->psAtomIndex)[i] = originalIndex[i];
-        gpu->posCellOffsets[i] = newCellOffsets[i];
-    }
-    gpu->psPosq4->Upload();
-    gpu->psVelm4->Upload();
-    gpu->psAtomIndex->Upload();
-}

--- a/platforms/cuda/src/kernels/gputypes.h
+++ b/platforms/cuda/src/kernels/gputypes.h
-#ifndef __GPUTYPES_H__
+Vim: Warning: Output is not to a terminal
-#define __GPUTYPES_H__
+[?1049h[?1h=[1;59r[?12;25h[?12l[?25h[27m[m[H[2J[?25l[59;1H"svn-commit.tmp" 15L, 601C[1;1H[33m  1 
+  2 [m[32m--This line, and those below, will be ignored--[m
-/* -------------------------------------------------------------------------- *
+[33m  3 
- *                                   OpenMM                                   *
+  4 [m[1m[35mM    plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp[m
- * -------------------------------------------------------------------------- *
+[33m  5 [m[1m[35mM    plugins/freeEnergy/platforms/reference/src/gbsa/CpuGBVISoftcore.cpp[m
- * This is part of the OpenMM molecular simulation toolkit originating from   *
+[33m  6 [m[1m[35mM    openmmapi/include/openmm/GBVIForce.h[m
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
+[33m  7 [m[1m[35mM    openmmapi/src/GBVIForce.cpp[m
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+[33m  8 [m[1m[35mM    olla/src/Platform.cpp[m
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+[33m  9 [m[1m[35mM    platforms/opencl/src/OpenCLContext.h[m
- *                                                                            *
+[33m 10 [m[1m[35mM    platforms/cuda/src/CudaKernels.cpp[m
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
+[33m 11 [m[1m[35mM    platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu[m
- * Authors: Scott Le Grand, Peter Eastman                                     *
+[33m 12 [m[1m[35mM    platforms/cuda/src/kernels/gputypes.h[m
- * Contributors:                                                              *
+[33m 13 [m[1m[35mM    platforms/cuda/src/kernels/cudatypes.h[m
- *                                                                            *
+[33m 14 [m[1m[35mM    platforms/cuda/src/kernels/kForces.cu[m
- * This program is free software: you can redistribute it and/or modify       *
+[33m 15 [m[1m[35mM    platforms/cuda/src/kernels/gpu.cpp[m
- * it under the terms of the GNU Lesser General Public License as published   *
+[1m[34m~                                                                                                                                                                                                                                         [17;1H~                                                                                                                                                                                                                                         [18;1H~                                                                                                                                                                                                                                         [19;1H~                                                                                                                                                                                                                                         [20;1H~                                                                                                                                                                                                                                         [21;1H~                                                                                                                                                                                                                                         [22;1H~                                                                                                                                                                                                                                         [23;1H~                                                                                                                                                                                                                                         [24;1H~                                                                                                                                                                                                                                         [25;1H~                                                                                                                                                                                                                                         [26;1H~                                                                                                                                                                                                                                         [27;1H~                                                                                                                                                                                                                                         [28;1H~                                                                                                                                                                                                                                         [29;1H~                                                                                                                                                                                                                                         [30;1H~                                                                                                                                                                                                                                         [31;1H~                                                                                                                                                                                                                                         [32;1H~                                                                                                                                                                                                                                         [33;1H~                                                                                                                                                                                                                                         [34;1H~                                                                                                                                                                                                                                         [35;1H~                                                                                                                                                                                                                                         [36;1H~                                                                                                                                                                                                                                         [37;1H~                                                                                                                                                                                                                                         [38;1H~                                                                                                                                                                                                                                         [39;1H~                                                                                                                                                                                                                                         [40;1H~                                                                                                                                                                                                                                         [41;1H~                                                                                                                                                                                                                                         [42;1H~                                                                                                                                                                                                                                         [43;1H~                                                                                                                                                                                                                                         [44;1H~                                                                                                                                                                                                                                         [45;1H~                                                                                                                                                                                                                                         [46;1H~                                                                                                                                                                                                                                         [47;1H~                                                                                                                                                                                                                                         [48;1H~                                                                                                                                                                                                                                         [49;1H~                                                                                                                                                                                                                                         [50;1H~                                                                                                                                                                                                                                         [51;1H~                                                                                                                                                                                                                                         [52;1H~                                                                                                                                                                                                                                         [53;1H~                                                                                                                                                                                                                                         [54;1H~                                                                                                                                                                                                                                         [55;1H~                                                                                                                                                                                                                                         [56;1H~                                                                                                                                                                                                                                         [57;1H~                                                                                                                                                                                                                                         [58;1H~                                                                                                                                                                                                                                         [m[59;217H1,0-1[9CAll[1;5H[?12l[?25h[?25l[59;1H[K[59;1H:[?12l[?25hq![?25l[59;1H[K[59;1H[?1l>[?12l[?25h[?1049l
- * by the Free Software Foundation, either version 3 of the License, or       *
+Log message unchanged or not specified
- * (at your option) any later version.                                        *
+a)bort, c)ontinue, e)dit
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-#include "cudatypes.h"
-#include "cudaCompact.h"
-#include <vector>
-#include "windowsExportCuda.h"
-namespace OpenMM {
-    class CudaForceInfo;
-}
-struct gpuAtomType {
-    std::string name;
-    char symbol;
-    float r;
-};
-struct gpuMoleculeGroup {
-    std::vector<int> atoms;
-    std::vector<int> instances;
-};
-struct gpuTabulatedFunction {
-    gpuTabulatedFunction() : coefficients(NULL) {
-    }
-    std::string name;
-    double min, max;
-    CUDAStream<float4>* coefficients;
-};
-enum SM_VERSION
-{
-    SM_10,
-    SM_11,
-    SM_12,
-    SM_20
-};
-/* Pointer to this structure will be given 
- * to gromacs functions*/
-struct _gpuContext {
-    //Cache this here so that it doesn't
-    //have to be repeatedly passed around
-    int natoms;
-    int device;
-    bool useBlockingSync;
-    gpuAtomType* gpAtomTable;
-    int gAtomTypes;
-    unsigned int blocksPerSM;
-    unsigned int sharedMemoryPerBlock;
-    cudaGmxSimulation sim;
-    unsigned int* pOutputBufferCounter;
-    std::vector<OpenMM::CudaForceInfo*> forces;
-    std::vector<std::vector<int> > exclusions;
-    unsigned char* pAtomSymbol;
-    std::vector<gpuMoleculeGroup> moleculeGroups;
-    gpuTabulatedFunction tabulatedFunctions[MAX_TABULATED_FUNCTIONS];
-    std::vector<int3> posCellOffsets;
-    int iterations;
-    float epsfac;
-    float solventDielectric;
-    float soluteDielectric;
-    int grid;
-    bool bCalculateCM;
-    bool bRemoveCM;
-    bool bRecalculateBornRadii;
-    bool bOutputBufferPerWarp;
-    bool bIncludeGBSA;
-    bool bIncludeGBVI;
-    bool tabulatedFunctionsChanged;
-    unsigned long seed;
-    SM_VERSION sm_version;
-    compactionPlan compactPlan;
-    cufftHandle fftplan;
-    CUDAStream<float4>* psPosq4;
-    CUDAStream<float4>* psPosqP4;
-    CUDAStream<float4>* psOldPosq4;
-    CUDAStream<float4>* psVelm4;
-    CUDAStream<float4>* psForce4;
-    CUDAStream<float>*  psEnergy;           // Energy output buffer
-    CUDAStream<float2>* psSigEps2; 
-    CUDAStream<float4>* psCustomParams;     // Atom parameters for custom nonbonded force
-    CUDAStream<int4>* psCustomBondID;             // Atom indices for custom bonds
-    CUDAStream<float4>* psCustomBondParams;       // Parameters for custom bonds
-    CUDAStream<int4>* psCustomAngleID1;           // Atom indices for custom angles
-    CUDAStream<int2>* psCustomAngleID2;           // Atom indices for custom angles
-    CUDAStream<float4>* psCustomAngleParams;      // Parameters for custom angles
-    CUDAStream<int4>* psCustomTorsionID1;           // Atom indices for custom torsions
-    CUDAStream<int4>* psCustomTorsionID2;           // Atom indices for custom torsions
-    CUDAStream<float4>* psCustomTorsionParams;      // Parameters for custom torsions
-    CUDAStream<int>* psCustomExternalID;          // Atom indices for custom external force
-    CUDAStream<float4>* psCustomExternalParams;   // Parameters for custom external force
-    CUDAStream<float4>* psTabulatedFunctionParams; // The min, max, and spacing for each tabulated function
-    CUDAStream<float2>* psEwaldCosSinSum;
-    CUDAStream<float>* psTabulatedErfc;     // Tabulated values for erfc()
-    CUDAStream<cufftComplex>* psPmeGrid;    // Grid points for particle mesh Ewald
-    CUDAStream<float>* psPmeBsplineModuli[3];
-    CUDAStream<float4>* psPmeBsplineTheta;
-    CUDAStream<float4>* psPmeBsplineDtheta;
-    CUDAStream<int>* psPmeAtomRange;           // The range of sorted atoms at each grid point
-    CUDAStream<int2>* psPmeAtomGridIndex;      // The grid point each atom is at
-    CUDAStream<float2>* psObcData;
-    CUDAStream<float4>* psGBVIData;
-    CUDAStream<float>* psObcChain;
-    CUDAStream<float>* psBornForce;
-    CUDAStream<float>* psBornRadii;
-    CUDAStream<float>* psBornSum;
-    CUDAStream<int4>* psBondID;
-    CUDAStream<float2>* psBondParameter;
-    CUDAStream<int4>* psBondAngleID1;
-    CUDAStream<int2>* psBondAngleID2;
-    CUDAStream<float2>* psBondAngleParameter;
-    CUDAStream<int4>* psDihedralID1;
-    CUDAStream<int4>* psDihedralID2;
-    CUDAStream<float4>* psDihedralParameter;
-    CUDAStream<int4>* psRbDihedralID1;
-    CUDAStream<int4>* psRbDihedralID2;
-    CUDAStream<float4>* psRbDihedralParameter1;
-    CUDAStream<float2>* psRbDihedralParameter2;
-    CUDAStream<int4>* psLJ14ID;
-    CUDAStream<float4>* psLJ14Parameter;
-    CUDAStream<int4>* psShakeID;
-    CUDAStream<float4>* psShakeParameter;
-    CUDAStream<int4>* psSettleID;
-    CUDAStream<float2>* psSettleParameter;
-    CUDAStream<unsigned int>* psExclusion;
-    CUDAStream<unsigned int>* psExclusionIndex;
-    CUDAStream<unsigned int>* psWorkUnit;
-    CUDAStream<unsigned int>* psInteractingWorkUnit;
-    CUDAStream<unsigned int>* psInteractionFlag;
-    CUDAStream<size_t>* psInteractionCount;
-    CUDAStream<float2>* psStepSize;         // The size of the previous and current time steps
-    CUDAStream<float>* psLangevinParameters;// Parameters used for Langevin integration
-    CUDAStream<float4>* psRandom4;          // Pointer to sets of 4 random numbers for MD integration
-    CUDAStream<float2>* psRandom2;          // Pointer to sets of 2 random numbers for MD integration
-    CUDAStream<uint4>* psRandomSeed;        // Pointer to each random seed
-    CUDAStream<int>* psRandomPosition;      // Pointer to random number positions
-    CUDAStream<float4>* psLinearMomentum;   // Pointer to total linear momentum per CTA
-    CUDAStream<int>* psAtomIndex;           // The original index of each atom
-    CUDAStream<float4>* psGridBoundingBox;  // The size of each grid cell
-    CUDAStream<float4>* psGridCenter;       // The center and radius for each grid cell
-    CUDAStream<int2>* psCcmaAtoms;          // The atoms connected by each CCMA constraint
-    CUDAStream<float4>* psCcmaDistance;     // The displacement vector (x, y, z) and constraint distance (w) for each CCMA constraint
-    CUDAStream<int>* psCcmaAtomConstraints; // The indices of constraints involving each atom
-    CUDAStream<int>* psCcmaNumAtomConstraints; // The number of constraints involving each atom
-    CUDAStream<float>* psCcmaDelta1;        // Workspace for CCMA
-    CUDAStream<float>* psCcmaDelta2;        // Workspace for CCMA
-    int* ccmaConvergedHostMarker;           // Host memory used to communicate that CCMA has converged
-    cudaEvent_t ccmaEvent;                  // Used to optimize communication during CCMA
-    CUDAStream<float>* psCcmaReducedMass;   // The reduced mass for each CCMA constraint
-    CUDAStream<float>* psRigidClusterMatrix;// The inverse constraint matrix for each rigid cluster
-    CUDAStream<unsigned int>* psRigidClusterConstraintIndex; // The index of each cluster in the stream containing cluster constraints.
-    CUDAStream<unsigned int>* psRigidClusterMatrixIndex; // The index of each cluster in the stream containing cluster matrices.
-    CUDAStream<unsigned int>* psConstraintMatrixColumn; // The column of each element in the constraint matrix.
-    CUDAStream<float>* psConstraintMatrixValue; // The value of each element in the constraint matrix.
-};
-typedef struct _gpuContext *gpuContext;
-// Function prototypes
-extern "C"
-bool gpuIsAvailable();
-extern "C"
-void gpuSetBondParameters(gpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<float>& length, const std::vector<float>& k);
-extern "C"
-void gpuSetBondAngleParameters(gpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<int>& atom3,
-        const std::vector<float>& angle, const std::vector<float>& k);
-extern "C"
-void gpuSetDihedralParameters(gpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<int>& atom3, const std::vector<int>& atom4,
-        const std::vector<float>& k, const std::vector<float>& phase, const std::vector<int>& periodicity);
-extern "C"
-void gpuSetRbDihedralParameters(gpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<int>& atom3, const std::vector<int>& atom4,
-        const std::vector<float>& c0, const std::vector<float>& c1, const std::vector<float>& c2, const std::vector<float>& c3, const std::vector<float>& c4, const std::vector<float>& c5);
-extern "C"
-void gpuSetLJ14Parameters(gpuContext gpu, float epsfac, float fudge, const std::vector<int>& atom1, const std::vector<int>& atom2,
-        const std::vector<float>& c6, const std::vector<float>& c12, const std::vector<float>& q1, const std::vector<float>& q2);
-extern "C"
-void gpuSetCoulombParameters(gpuContext gpu, float epsfac, const std::vector<int>& atom, const std::vector<float>& c6, const std::vector<float>& c12, const std::vector<float>& q,
-        const std::vector<char>& symbol, const std::vector<std::vector<int> >& exclusions, CudaNonbondedMethod method);
-extern "C"
-void gpuSetNonbondedCutoff(gpuContext gpu, float cutoffDistance, float solventDielectric);
-extern "C"
-void gpuSetTabulatedFunction(gpuContext gpu, int index, const std::string& name, const std::vector<double>& values, double min, double max);
-extern "C"
-void gpuSetCustomBondParameters(gpuContext gpu, const std::vector<int>& bondAtom1, const std::vector<int>& bondAtom2, const std::vector<std::vector<double> >& bondParams,
-            const std::string& energyExp, const std::vector<std::string>& paramNames, const std::vector<std::string>& globalParamNames);
-extern "C"
-void gpuSetCustomAngleParameters(gpuContext gpu, const std::vector<int>& angleAtom1, const std::vector<int>& angleAtom2, const std::vector<int>& angleAtom3, const std::vector<std::vector<double> >& angleParams,
-            const std::string& energyExp, const std::vector<std::string>& paramNames, const std::vector<std::string>& globalParamNames);
-extern "C"
-void gpuSetCustomTorsionParameters(gpuContext gpu, const std::vector<int>& torsionAtom1, const std::vector<int>& torsionAtom2, const std::vector<int>& torsionAtom3, const std::vector<int>& torsionAtom4, const std::vector<std::vector<double> >& torsionParams,
-            const std::string& energyExp, const std::vector<std::string>& paramNames, const std::vector<std::string>& globalParamNames);
-extern "C"
-void gpuSetCustomExternalParameters(gpuContext gpu, const std::vector<int>& atomIndex, const std::vector<std::vector<double> >& atomParams,
-            const std::string& energyExp, const std::vector<std::string>& paramNames, const std::vector<std::string>& globalParamNames);
-extern "C"
-void gpuSetCustomNonbondedParameters(gpuContext gpu, const std::vector<std::vector<double> >& parameters, const std::vector<std::vector<int> >& exclusions,
-            CudaNonbondedMethod method, float cutoffDistance, const std::string& energyExp,
-            const std::vector<std::string>& paramNames, const std::vector<std::string>& globalParamNames);
-extern "C"
-void gpuSetEwaldParameters(gpuContext gpu, float alpha, int kmaxx, int kmaxy, int kmaxz);
-extern "C"
-void gpuSetPMEParameters(gpuContext gpu, float alpha, int gridSizeX, int gridSizeY, int gridSizeZ);
-extern "C"
-void OPENMMCUDA_EXPORT gpuSetPeriodicBoxSize(gpuContext gpu, float xsize, float ysize, float zsize);
-extern "C"
-void gpuSetObcParameters(gpuContext gpu, float innerDielectric, float solventDielectric, const std::vector<float>& radius, const std::vector<float>& scale, const std::vector<float>& charge);
-extern "C" 
-void gpuSetGBVIParameters(gpuContext gpu, float innerDielectric, float solventDielectric, const std::vector<int>& atom, const std::vector<float>& radius,
-                          const std::vector<float>& gammas, const std::vector<float>& scaledRadii);
-extern "C"
-void gpuSetConstraintParameters(gpuContext gpu, const std::vector<int>& atom1, const std::vector<int>& atom2, const std::vector<float>& distance,
-        const std::vector<float>& invMass1, const std::vector<float>& invMass2, float constraintTolerance);
-extern "C"
-int gpuAllocateInitialBuffers(gpuContext gpu);
-extern "C"
-void gpuSetPositions(gpuContext gpu, const std::vector<float>& x, const std::vector<float>& y, const std::vector<float>& z);
-extern "C"
-void gpuSetVelocities(gpuContext gpu, const std::vector<float>& x, const std::vector<float>& y, const std::vector<float>& z);
-extern "C"
-void gpuSetMass(gpuContext gpu, const std::vector<float>& mass);
-extern "C"
-void OPENMMCUDA_EXPORT gpuInitializeRandoms(gpuContext gpu);
-extern "C"
-OPENMMCUDA_EXPORT void* gpuInit(int numAtoms, unsigned int device = 0, bool useBlockingSync = false);
-extern "C"
-void gpuSetLangevinIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature, float errorTol);
-extern "C"
-void gpuSetVerletIntegrationParameters(gpuContext gpu, float deltaT, float errorTol);
-extern "C"
-void gpuSetBrownianIntegrationParameters(gpuContext gpu, float tau, float deltaT, float temperature);
-extern "C"
-void gpuSetAndersenThermostatParameters(gpuContext gpu, float temperature, float collisionFrequency);
-extern "C"
-void gpuShutDown(gpuContext gpu);
-extern "C"
-int gpuBuildOutputBuffers(gpuContext gpu);
-extern "C"
-int gpuBuildThreadBlockWorkList(gpuContext gpu);
-extern "C"
-void gpuBuildExclusionList(gpuContext gpu);
-extern "C"
-int OPENMMCUDA_EXPORT gpuSetConstants(gpuContext gpu);
-extern "C"
-void gpuReorderAtoms(gpuContext gpu);
-extern "C"
-void setExclusions(gpuContext gpu, const std::vector<std::vector<int> >& exclusions);
-#endif //__GPUTYPES_H__

--- a/platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
+++ b/platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
-/* -------------------------------------------------------------------------- *
+Vim: Warning: Output is not to a terminal
- *                                   OpenMM                                   *
+[?1049h[?1h=[1;59r[?12;25h[?12l[?25h[27m[m[H[2J[?25l[59;1H"svn-commit.tmp" 15L, 601C[1;1H[33m  1 
- * -------------------------------------------------------------------------- *
+  2 [m[32m--This line, and those below, will be ignored--[m
- * This is part of the OpenMM molecular simulation toolkit originating from   *
+[33m  3 
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
+  4 [m[1m[35mM    plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp[m
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+[33m  5 [m[1m[35mM    plugins/freeEnergy/platforms/reference/src/gbsa/CpuGBVISoftcore.cpp[m
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+[33m  6 [m[1m[35mM    openmmapi/include/openmm/GBVIForce.h[m
- *                                                                            *
+[33m  7 [m[1m[35mM    openmmapi/src/GBVIForce.cpp[m
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
+[33m  8 [m[1m[35mM    olla/src/Platform.cpp[m
- * Authors: Scott Le Grand, Peter Eastman                                     *
+[33m  9 [m[1m[35mM    platforms/opencl/src/OpenCLContext.h[m
- * Contributors:                                                              *
+[33m 10 [m[1m[35mM    platforms/cuda/src/CudaKernels.cpp[m
- *                                                                            *
+[33m 11 [m[1m[35mM    platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu[m
- * Permission is hereby granted, free of charge, to any person obtaining a    *
+[33m 12 [m[1m[35mM    platforms/cuda/src/kernels/gputypes.h[m
- * copy of this software and associated documentation files (the "Software"), *
+[33m 13 [m[1m[35mM    platforms/cuda/src/kernels/cudatypes.h[m
- * to deal in the Software without restriction, including without limitation  *
+[33m 14 [m[1m[35mM    platforms/cuda/src/kernels/kForces.cu[m
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+[33m 15 [m[1m[35mM    platforms/cuda/src/kernels/gpu.cpp[m
- * and/or sell copies of the Software, and to permit persons to whom the      *
+[1m[34m~                                                                                                                                                                                                                                         [17;1H~                                                                                                                                                                                                                                         [18;1H~                                                                                                                                                                                                                                         [19;1H~                                                                                                                                                                                                                                         [20;1H~                                                                                                                                                                                                                                         [21;1H~                                                                                                                                                                                                                                         [22;1H~                                                                                                                                                                                                                                         [23;1H~                                                                                                                                                                                                                                         [24;1H~                                                                                                                                                                                                                                         [25;1H~                                                                                                                                                                                                                                         [26;1H~                                                                                                                                                                                                                                         [27;1H~                                                                                                                                                                                                                                         [28;1H~                                                                                                                                                                                                                                         [29;1H~                                                                                                                                                                                                                                         [30;1H~                                                                                                                                                                                                                                         [31;1H~                                                                                                                                                                                                                                         [32;1H~                                                                                                                                                                                                                                         [33;1H~                                                                                                                                                                                                                                         [34;1H~                                                                                                                                                                                                                                         [35;1H~                                                                                                                                                                                                                                         [36;1H~                                                                                                                                                                                                                                         [37;1H~                                                                                                                                                                                                                                         [38;1H~                                                                                                                                                                                                                                         [39;1H~                                                                                                                                                                                                                                         [40;1H~                                                                                                                                                                                                                                         [41;1H~                                                                                                                                                                                                                                         [42;1H~                                                                                                                                                                                                                                         [43;1H~                                                                                                                                                                                                                                         [44;1H~                                                                                                                                                                                                                                         [45;1H~                                                                                                                                                                                                                                         [46;1H~                                                                                                                                                                                                                                         [47;1H~                                                                                                                                                                                                                                         [48;1H~                                                                                                                                                                                                                                         [49;1H~                                                                                                                                                                                                                                         [50;1H~                                                                                                                                                                                                                                         [51;1H~                                                                                                                                                                                                                                         [52;1H~                                                                                                                                                                                                                                         [53;1H~                                                                                                                                                                                                                                         [54;1H~                                                                                                                                                                                                                                         [55;1H~                                                                                                                                                                                                                                         [56;1H~                                                                                                                                                                                                                                         [57;1H~                                                                                                                                                                                                                                         [58;1H~                                                                                                                                                                                                                                         [m[59;217H1,0-1[9CAll[1;5H[?12l[?25h[?25l[59;1H[K[59;1H:[?12l[?25hq![?25l[59;1H[K[59;1H[?1l>[?12l[?25h[?1049l
- * Software is furnished to do so, subject to the following conditions:       *
+Log message unchanged or not specified
- *                                                                            *
+a)bort, c)ontinue, e)dit
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-#include <stdio.h>
-#include <cuda.h>
-#include <vector_functions.h>
-#include <cstdlib>
-#include <string>
-#include <iostream>
-#include <fstream>
-using namespace std;
-#include "gputypes.h"
-#define UNROLLXX 0
-#define UNROLLXY 0
-struct Atom {
-    float x;
-    float y;
-    float z;
-    float r;
-    float sr;
-    float sum;
-    float gamma;
-};
-static __constant__ cudaGmxSimulation cSim;
-void SetCalculateGBVIBornSumSim(gpuContext gpu)
-{
-    cudaError_t status;
-    status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));     
-    RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
-}
-void GetCalculateGBVIBornSumSim(gpuContext gpu)
-{
-    cudaError_t status;
-    status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));     
-    RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
-}
-// Include versions of the kernels for N^2 calculations.
-#define METHOD_NAME(a, b) a##N2##b
-#include "kCalculateGBVIBornSum.h"
-#define USE_OUTPUT_BUFFER_PER_WARP
-#undef METHOD_NAME
-#define METHOD_NAME(a, b) a##N2ByWarp##b
-#include "kCalculateGBVIBornSum.h"
-// Include versions of the kernels with cutoffs.
-#undef METHOD_NAME
-#undef USE_OUTPUT_BUFFER_PER_WARP
-#define USE_CUTOFF
-#define METHOD_NAME(a, b) a##Cutoff##b
-#include "kCalculateGBVIBornSum.h"
-#define USE_OUTPUT_BUFFER_PER_WARP
-#undef METHOD_NAME
-#define METHOD_NAME(a, b) a##CutoffByWarp##b
-#include "kCalculateGBVIBornSum.h"
-// Include versions of the kernels with periodic boundary conditions.
-#undef METHOD_NAME
-#undef USE_OUTPUT_BUFFER_PER_WARP
-#define USE_PERIODIC
-#define METHOD_NAME(a, b) a##Periodic##b
-#include "kCalculateGBVIBornSum.h"
-#define USE_OUTPUT_BUFFER_PER_WARP
-#undef METHOD_NAME
-#define METHOD_NAME(a, b) a##PeriodicByWarp##b
-#include "kCalculateGBVIBornSum.h"
-__global__ void kReduceGBVIBornSum_kernel()
-{
-    unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
-    while (pos < cSim.atoms)
-    {
-        float sum = 0.0f;
-        float* pSt = cSim.pBornSum + pos;
-        float4 atom = cSim.pGBVIData[pos];
-        // Get summed Born data
-        for (int i = 0; i < cSim.nonbondOutputBuffers; i++)
-        {
-            sum += *pSt;
-       //     printf("%4d %4d A: %9.4f\n", pos, i, *pSt);
-            pSt += cSim.stride;
-        }
-        // Now calculate Born radius
-        float Rinv           = 1.0f/atom.x;
-        sum                  = Rinv*Rinv*Rinv - sum; 
-        cSim.pBornRadii[pos] = pow( sum, (-1.0f/3.0f) ); 
-        pos += gridDim.x * blockDim.x;
-    }   
-}
-void kReduceGBVIBornSum(gpuContext gpu)
-{
-    //printf("kReduceGBVIBornSum\n");
-#define GBVI_DEBUG 0
-#if ( GBVI_DEBUG == 1 )
-               gpu->psGBVIData->Download();
-               gpu->psBornSum->Download();
-               gpu->psPosq4->Download();
-                (void) fprintf( stderr, "\nkReduceGBVIBornSum: Post BornSum %s Born radii & params\n", 
-                               (gpu->bIncludeGBVI ? "GBVI" : "Obc") );
-                for( int ii = 0; ii < gpu->natoms; ii++ ){
-                   (void) fprintf( stderr, "%d bSum=%14.6e param[%14.6e %14.6e %14.6e] x[%14.6f %14.6f %14.6f %14.6f]\n",
-                                   ii, 
-                                   gpu->psBornSum->_pSysStream[0][ii],
-                                   gpu->psGBVIData->_pSysStream[0][ii].x,
-                                   gpu->psGBVIData->_pSysStream[0][ii].y,
-                                   gpu->psGBVIData->_pSysStream[0][ii].z,
-                                   gpu->psPosq4->_pSysStream[0][ii].x, gpu->psPosq4->_pSysStream[0][ii].y,
-                                   gpu->psPosq4->_pSysStream[0][ii].z, gpu->psPosq4->_pSysStream[0][ii].w
-                                 );  
-                }   
-#endif
-#undef GBVI_DEBUG
-    kReduceGBVIBornSum_kernel<<<gpu->sim.blocks, 384>>>();
-    gpu->bRecalculateBornRadii = false;
-    LAUNCHERROR("kReduceGBVIBornSum");
-}
-void kCalculateGBVIBornSum(gpuContext gpu)
-{
-    //printf("kCalculateGBVIBornSum\n");
-    //size_t numWithInteractions;
-    switch (gpu->sim.nonbondedMethod)
-    {
-        case NO_CUTOFF:
-#define GBVI 0
-#if GBVI == 1
-int maxPrint = 10;
-gpu->psWorkUnit->Download();
-fprintf( stderr, "kCalculateGBVIBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=%u wu=%u %u shrd=%u\n", gpu->bOutputBufferPerWarp,
-                 gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, gpu->sim.workUnits, gpu->psWorkUnit->_pSysStream[0][0],
-        sizeof(Atom)*gpu->sim.nonbond_threads_per_block );
-               gpu->psGBVIData->Download();
-               gpu->psBornSum->Download();
-               gpu->psPosq4->Download();
-                (void) fprintf( stderr, "\nkCalculateGBVIBornSum: pre BornSum %s Born radii & params\n",
-                               (gpu->bIncludeGBVI ? "GBVI" : "Obc") );
-                for( int ii = 0; ii < gpu->natoms; ii++ ){
-                   (void) fprintf( stderr, "%d bSum=%14.6e param[%14.6e %14.6e %14.6e] x[%14.6f %14.6f %14.6f %14.6f]\n",
-                                   ii, 
-                                   gpu->psBornSum->_pSysStream[0][ii],
-                                   gpu->psGBVIData->_pSysStream[0][ii].x,
-                                   gpu->psGBVIData->_pSysStream[0][ii].y,
-                                   gpu->psGBVIData->_pSysStream[0][ii].z,
-                                   gpu->psPosq4->_pSysStream[0][ii].x, gpu->psPosq4->_pSysStream[0][ii].y,
-                                   gpu->psPosq4->_pSysStream[0][ii].z, gpu->psPosq4->_pSysStream[0][ii].w
-                                 );
-                   if( (ii == maxPrint) && ( ii < (gpu->natoms - maxPrint)) ){
-                      ii = gpu->natoms - maxPrint;
-                   }
-                }
-#endif
-#undef GBVI
-            if (gpu->bOutputBufferPerWarp){
-                kCalculateGBVIN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
-            } else {
-                kCalculateGBVIN2BornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
-            }
-            break;
-        case CUTOFF:
-            if (gpu->bOutputBufferPerWarp)
-                kCalculateGBVICutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
-            else
-                kCalculateGBVICutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
-            break;
-        case PERIODIC:
-            if (gpu->bOutputBufferPerWarp)
-                kCalculateGBVIPeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
-            else
-                kCalculateGBVIPeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
-            break;
-    }
-    LAUNCHERROR("kCalculateGBVIBornSum");
-}

--- a/platforms/cuda/src/kernels/kForces.cu
+++ b/platforms/cuda/src/kernels/kForces.cu
-/* -------------------------------------------------------------------------- *
+Vim: Warning: Output is not to a terminal
- *                                   OpenMM                                   *
+[?1049h[?1h=[1;59r[?12;25h[?12l[?25h[27m[m[H[2J[?25l[59;1H"svn-commit.tmp" 15L, 601C[1;1H[33m  1 
- * -------------------------------------------------------------------------- *
+  2 [m[32m--This line, and those below, will be ignored--[m
- * This is part of the OpenMM molecular simulation toolkit originating from   *
+[33m  3 
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
+  4 [m[1m[35mM    plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp[m
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+[33m  5 [m[1m[35mM    plugins/freeEnergy/platforms/reference/src/gbsa/CpuGBVISoftcore.cpp[m
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+[33m  6 [m[1m[35mM    openmmapi/include/openmm/GBVIForce.h[m
- *                                                                            *
+[33m  7 [m[1m[35mM    openmmapi/src/GBVIForce.cpp[m
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
+[33m  8 [m[1m[35mM    olla/src/Platform.cpp[m
- * Authors: Scott Le Grand, Peter Eastman                                     *
+[33m  9 [m[1m[35mM    platforms/opencl/src/OpenCLContext.h[m
- * Contributors:                                                              *
+[33m 10 [m[1m[35mM    platforms/cuda/src/CudaKernels.cpp[m
- *                                                                            *
+[33m 11 [m[1m[35mM    platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu[m
- * This program is free software: you can redistribute it and/or modify       *
+[33m 12 [m[1m[35mM    platforms/cuda/src/kernels/gputypes.h[m
- * it under the terms of the GNU Lesser General Public License as published   *
+[33m 13 [m[1m[35mM    platforms/cuda/src/kernels/cudatypes.h[m
- * by the Free Software Foundation, either version 3 of the License, or       *
+[33m 14 [m[1m[35mM    platforms/cuda/src/kernels/kForces.cu[m
- * (at your option) any later version.                                        *
+[33m 15 [m[1m[35mM    platforms/cuda/src/kernels/gpu.cpp[m
- *                                                                            *
+[1m[34m~                                                                                                                                                                                                                                         [17;1H~                                                                                                                                                                                                                                         [18;1H~                                                                                                                                                                                                                                         [19;1H~                                                                                                                                                                                                                                         [20;1H~                                                                                                                                                                                                                                         [21;1H~                                                                                                                                                                                                                                         [22;1H~                                                                                                                                                                                                                                         [23;1H~                                                                                                                                                                                                                                         [24;1H~                                                                                                                                                                                                                                         [25;1H~                                                                                                                                                                                                                                         [26;1H~                                                                                                                                                                                                                                         [27;1H~                                                                                                                                                                                                                                         [28;1H~                                                                                                                                                                                                                                         [29;1H~                                                                                                                                                                                                                                         [30;1H~                                                                                                                                                                                                                                         [31;1H~                                                                                                                                                                                                                                         [32;1H~                                                                                                                                                                                                                                         [33;1H~                                                                                                                                                                                                                                         [34;1H~                                                                                                                                                                                                                                         [35;1H~                                                                                                                                                                                                                                         [36;1H~                                                                                                                                                                                                                                         [37;1H~                                                                                                                                                                                                                                         [38;1H~                                                                                                                                                                                                                                         [39;1H~                                                                                                                                                                                                                                         [40;1H~                                                                                                                                                                                                                                         [41;1H~                                                                                                                                                                                                                                         [42;1H~                                                                                                                                                                                                                                         [43;1H~                                                                                                                                                                                                                                         [44;1H~                                                                                                                                                                                                                                         [45;1H~                                                                                                                                                                                                                                         [46;1H~                                                                                                                                                                                                                                         [47;1H~                                                                                                                                                                                                                                         [48;1H~                                                                                                                                                                                                                                         [49;1H~                                                                                                                                                                                                                                         [50;1H~                                                                                                                                                                                                                                         [51;1H~                                                                                                                                                                                                                                         [52;1H~                                                                                                                                                                                                                                         [53;1H~                                                                                                                                                                                                                                         [54;1H~                                                                                                                                                                                                                                         [55;1H~                                                                                                                                                                                                                                         [56;1H~                                                                                                                                                                                                                                         [57;1H~                                                                                                                                                                                                                                         [58;1H~                                                                                                                                                                                                                                         [m[59;217H1,0-1[9CAll[1;5H[?12l[?25h[?25l[59;1H[K[59;1H:[?12l[?25hq![?25l[59;1H[K[59;1H[?1l>[?12l[?25h[?1049l
- * This program is distributed in the hope that it will be useful,            *
+Log message unchanged or not specified
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+a)bort, c)ontinue, e)dit
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-#include <stdio.h>
-#include <cuda.h>
-#include <vector_functions.h>
-#include <cstdlib>
-#include <string>
-#include <iostream>
-#include <fstream>
-using namespace std;
-#include "gputypes.h"
-#define FABS(a) ((a) > 0.0f ? (a) : -(a))
-static __constant__ cudaGmxSimulation cSim;
-void OPENMMCUDA_EXPORT SetForcesSim(gpuContext gpu)
-{
-    cudaError_t status;
-    status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));     
-    RTERROR(status, "cudaMemcpyToSymbol: SetForcesSim copy to cSim failed");
-}
-void GetForcesSim(gpuContext gpu)
-{
-    cudaError_t status;
-    status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));     
-    RTERROR(status, "cudaMemcpyFromSymbol: GetForcesSim copy from cSim failed");
-}
-__global__ 
-__launch_bounds__(384, 1)
-void kClearForces_kernel()
-{
-    unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
-    while (pos < cSim.stride * cSim.outputBuffers)
-    {
-        cSim.pForce4[pos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-        pos += gridDim.x * blockDim.x;
-    }
-}
-void OPENMMCUDA_EXPORT kClearForces(gpuContext gpu)
-{
-//    printf("kClearForces\n");
-    kClearForces_kernel<<<gpu->sim.blocks, 384>>>();
-    LAUNCHERROR("kClearForces");
-}
-__global__ 
-__launch_bounds__(384, 1)
-void kClearBornSumAndForces_kernel()
-{
-    unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
-    while (pos < cSim.stride * cSim.nonbondOutputBuffers)
-    {
-        cSim.pBornSum[pos] = 0.0f;
-        cSim.pBornForce[pos] = 0.0f;
-        cSim.pForce4[pos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-        pos += gridDim.x * blockDim.x;
-    }
-    while (pos < cSim.stride * cSim.outputBuffers)
-    {
-        cSim.pForce4[pos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-        pos += gridDim.x * blockDim.x;
-    }
-}
-void kClearBornSumAndForces(gpuContext gpu)
-{
-  //  printf("kClearBornSumAndForces\n");
-    kClearBornSumAndForces_kernel<<<gpu->sim.blocks, 384>>>();
-    LAUNCHERROR("kClearBornSumAndForces");
-}
-__global__ 
-__launch_bounds__(384, 1)
-void kClearEnergy_kernel()
-{
-    unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
-    while (pos < cSim.energyOutputBuffers)
-    {
-        ((float*)cSim.pEnergy)[pos] = 0.0f;
-        pos += gridDim.x * blockDim.x;
-    }
-}
-void kClearEnergy(gpuContext gpu)
-{
-  //  printf("kClearEnergy\n");
-    kClearEnergy_kernel<<<gpu->sim.blocks, 384>>>();
-    LAUNCHERROR("kClearEnergy");
-}
-__global__ 
-#if (__CUDA_ARCH__ >= 200)
-__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
-#elif (__CUDA_ARCH__ >= 120)
-__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
-#else
-__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
-#endif
-void kReduceBornSumAndForces_kernel()
-{
-    unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
-    // Reduce forces
-    while (pos < cSim.stride4)
-    {
-        float totalForce = 0.0f;
-        float* pFt = (float*)cSim.pForce4 + pos;
-        int i = cSim.outputBuffers;
-        while (i >= 4)
-        {
-            float f1    = *pFt;
-            pFt        += cSim.stride4;
-            float f2    = *pFt;
-            pFt        += cSim.stride4;
-            float f3    = *pFt;
-            pFt        += cSim.stride4;
-            float f4    = *pFt;
-            pFt        += cSim.stride4;
-            totalForce += f1 + f2 + f3 + f4;
-            i -= 4;
-        }
-        if (i >= 2)
-        {
-            float f1    = *pFt;
-            pFt        += cSim.stride4;
-            float f2    = *pFt;
-            pFt        += cSim.stride4;
-            totalForce += f1 + f2;
-            i -= 2;
-        }
-        if (i > 0)
-        {
-            totalForce += *pFt;
-        }
-        pFt = (float*)cSim.pForce4 + pos;
-        *pFt = totalForce;
-        pos += gridDim.x * blockDim.x;
-    }   
-    // Reduce Born Sum
-    while (pos - cSim.stride4 < cSim.atoms)
-    {
-        float sum = 0.0f;
-        float* pSt = cSim.pBornSum + pos - cSim.stride4;
-        float2 atom = cSim.pObcData[pos - cSim.stride4];
-        // Get summed Born data
-        int i = cSim.nonbondOutputBuffers;
-        while (i >= 4)
-        {
-            float f1    = *pSt;
-            pSt        += cSim.stride;
-            float f2    = *pSt;
-            pSt        += cSim.stride;
-            float f3    = *pSt;
-            pSt        += cSim.stride;
-            float f4    = *pSt;
-            pSt        += cSim.stride;
-            sum += f1 + f2 + f3 + f4;
-            i -= 4;
-        }
-        if (i >= 2)
-        {
-            float f1    = *pSt;
-            pSt        += cSim.stride;
-            float f2    = *pSt;
-            pSt        += cSim.stride;
-            sum += f1 + f2;
-            i -= 2;
-        }
-        if (i > 0)
-        {
-            sum += *pSt;
-        }
-        // Now calculate Born radius and OBC term.
-        cSim.pBornSum[pos - cSim.stride4] = sum; 
-        sum                    *= 0.5f * atom.x;
-        float sum2              = sum * sum;
-        float sum3              = sum * sum2;
-        float tanhSum           = tanh(cSim.alphaOBC * sum - cSim.betaOBC * sum2 + cSim.gammaOBC * sum3);
-        float nonOffsetRadii    = atom.x + cSim.dielectricOffset;
-        float bornRadius        = 1.0f / (1.0f / atom.x - tanhSum / nonOffsetRadii); 
-        float obcChain          = atom.x * (cSim.alphaOBC - 2.0f * cSim.betaOBC * sum + 3.0f * cSim.gammaOBC * sum2);
-        obcChain                = (1.0f - tanhSum * tanhSum) * obcChain / nonOffsetRadii;              
-        cSim.pBornRadii[pos - cSim.stride4] = bornRadius;
-        cSim.pObcChain[pos - cSim.stride4]  = obcChain;
-        pos += gridDim.x * blockDim.x;
-    }
-}
-void kReduceBornSumAndForces(gpuContext gpu)
-{
-    //printf("kReduceBornSumAndForces\n");
-    kReduceBornSumAndForces_kernel<<<gpu->sim.blocks, gpu->sim.bsf_reduce_threads_per_block>>>();
-    LAUNCHERROR("kReduceBornSumAndForces");
-}
-__global__ 
-#if (__CUDA_ARCH__ >= 200)
-__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
-#elif (__CUDA_ARCH__ >= 120)
-__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
-#else
-__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
-#endif
-void kReduceForces_kernel()
-{
-    unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
-    // Reduce forces
-    while (pos < cSim.stride4)
-    {
-        float totalForce = 0.0f;
-        float* pFt = (float*)cSim.pForce4 + pos;
-        int i = cSim.outputBuffers;
-        while (i >= 4)
-        {
-            float f1    = *pFt;
-            pFt        += cSim.stride4;
-            float f2    = *pFt;
-            pFt        += cSim.stride4;
-            float f3    = *pFt;
-            pFt        += cSim.stride4;
-            float f4    = *pFt;
-            pFt        += cSim.stride4;
-            totalForce += f1 + f2 + f3 + f4;
-            i -= 4;
-        }
-        if (i >= 2)
-        {
-            float f1    = *pFt;
-            pFt        += cSim.stride4;
-            float f2    = *pFt;
-            pFt        += cSim.stride4;
-            totalForce += f1 + f2;
-            i -= 2;
-        }
-        if (i > 0)
-        {
-            totalForce += *pFt;
-        }
-        pFt = (float*)cSim.pForce4 + pos;
-        *pFt = totalForce;
-        pos += gridDim.x * blockDim.x;
-    }   
-}
-void OPENMMCUDA_EXPORT kReduceForces(gpuContext gpu)
-{
- //   printf("kReduceForces\n");
-    kReduceForces_kernel<<<gpu->sim.blocks, gpu->sim.bsf_reduce_threads_per_block>>>();
-    LAUNCHERROR("kReduceForces");
-}
-double kReduceEnergy(gpuContext gpu)
-{
-    //printf("kReduceEnergy\n");
-    gpu->psEnergy->Download();
-    double sum = 0.0;
-    for (int i = 0; i < gpu->sim.energyOutputBuffers; i++){
-        sum += (*gpu->psEnergy)[i];
-    }
-    return sum;
-}
-__global__ 
-#if (__CUDA_ARCH__ >= 200)
-__launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1)
-#elif (__CUDA_ARCH__ >= 120)
-__launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1)
-#else
-__launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1)
-#endif
-void kReduceObcGbsaBornForces_kernel()
-{
-    unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
-    float energy = 0.0f;
-    while (pos < cSim.atoms)
-    {
-        float bornRadius = cSim.pBornRadii[pos];
-        float obcChain   = cSim.pObcChain[pos];
-        float2 obcData   = cSim.pObcData[pos];
-        float totalForce = 0.0f;
-        float* pFt = cSim.pBornForce + pos;
-        int i = cSim.nonbondOutputBuffers;
-        while (i >= 4)
-        {
-            float f1    = *pFt;
-            pFt        += cSim.stride;
-            float f2    = *pFt;
-            pFt        += cSim.stride;
-            float f3    = *pFt;
-            pFt        += cSim.stride;
-            float f4    = *pFt;
-            pFt        += cSim.stride;
-            totalForce += f1 + f2 + f3 + f4;
-            i -= 4;
-        }
-        if (i >= 2)
-        {
-            float f1    = *pFt;
-            pFt        += cSim.stride;
-            float f2    = *pFt;
-            pFt        += cSim.stride;
-            totalForce += f1 + f2;
-            i -= 2;
-        }
-        if (i > 0)
-        {
-            totalForce += *pFt;
-        }
-        float r            = (obcData.x + cSim.dielectricOffset + cSim.probeRadius);
-        float ratio6       = pow((obcData.x + cSim.dielectricOffset) / bornRadius, 6.0f);
-        float saTerm       = cSim.surfaceAreaFactor * r * r * ratio6;
-        totalForce        += saTerm / bornRadius;
-        totalForce        *= bornRadius * bornRadius * obcChain;
-        energy            += saTerm;
-        pFt                = cSim.pBornForce + pos;
-        *pFt               = totalForce;
-        pos               += gridDim.x * blockDim.x;
-    }
-    // correct for surface area factor of -6
-    cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += energy / -6.0f;
-}
-__global__ 
-#if (__CUDA_ARCH__ >= 200)
-__launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1)
-#elif (__CUDA_ARCH__ >= 120)
-__launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1)
-#else
-__launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1)
-#endif
-void kReduceGBVIBornForces_kernel()
-{
-    unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
-    float energy = 0.0f;
-    while (pos < cSim.atoms)
-    {
-        float bornRadius  = cSim.pBornRadii[pos];
-        float4 gbviData   = cSim.pGBVIData[pos];
-        float totalForce  = 0.0f;
-        float* pFt        = cSim.pBornForce + pos;
-        int i = cSim.nonbondOutputBuffers;
-        while (i >= 4)
-        {
-            float f1    = *pFt;
-            pFt        += cSim.stride;
-            float f2    = *pFt;
-            pFt        += cSim.stride;
-            float f3    = *pFt;
-            pFt        += cSim.stride;
-            float f4    = *pFt;
-            pFt        += cSim.stride;
-            totalForce += f1 + f2 + f3 + f4;
-            i -= 4;
-        }
-        if (i >= 2)
-        {
-            float f1    = *pFt;
-            pFt        += cSim.stride;
-            float f2    = *pFt;
-            pFt        += cSim.stride;
-            totalForce += f1 + f2;
-            i -= 2;
-        }
-        if (i > 0)
-        {
-            totalForce += *pFt;
-        }
-        float ratio         = (gbviData.x/bornRadius);
-        float ratio3        = ratio*ratio*ratio;
-        energy             -= gbviData.z*ratio3;
-        totalForce         += (3.0f*gbviData.z*ratio3)/bornRadius; // 'cavity' term
-        float br2           = bornRadius*bornRadius;
-        totalForce         *= (1.0f/3.0f)*br2*br2;
-        pFt = cSim.pBornForce + pos;
-        *pFt = totalForce;
-        pos += gridDim.x * blockDim.x;
-    }
-    cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += energy;
-}
-void kReduceObcGbsaBornForces(gpuContext gpu)
-{
-    //printf("kReduceObcGbsaBornForces\n");
-    if( gpu->bIncludeGBSA ){
-       kReduceObcGbsaBornForces_kernel<<<gpu->sim.blocks, gpu->sim.bsf_reduce_threads_per_block>>>();
-       LAUNCHERROR("kReduceObcGbsaBornForces");
-    } else if( gpu->bIncludeGBVI ){
-       kReduceGBVIBornForces_kernel<<<gpu->sim.blocks, gpu->sim.bsf_reduce_threads_per_block>>>();
-       LAUNCHERROR("kReduceGBVIBornForces");
-    }   
-}