Merge pull request #2112 from peastman/cleanup

Code cleanup

Merge pull request #2112 from peastman/cleanup
Code cleanup
8f8aa247 · peastman · GitHub · 0d13f9cd · c0e862f6 · 8f8aa247
Unverified Commit 8f8aa247 authored Jul 06, 2018 by peastman Committed by GitHub Jul 06, 2018
20 changed files
--- a/libraries/lepton/src/CompiledExpression.cpp
+++ b/libraries/lepton/src/CompiledExpression.cpp
@@ -188,8 +188,8 @@ double CompiledExpression::evaluate() const {

 #ifdef LEPTON_USE_JIT
 static double evaluateOperation(Operation* op, double* args) {
-    map<string, double>* dummyVariables = NULL;
-    return op->evaluate(args, *dummyVariables);
+    static map<string, double> dummyVariables;
+    return op->evaluate(args, dummyVariables);
 }

 void CompiledExpression::generateJitCode() {

--- a/platforms/cpu/include/CpuBondForce.h
+++ b/platforms/cpu/include/CpuBondForce.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2014-2017 Stanford University and the Authors.      *
+ * Portions copyright (c) 2014-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -50,22 +50,22 @@ public:
    /**
     * Analyze the set of bonds and decide which to compute with each thread.
     */
-    void initialize(int numAtoms, int numBonds, int numAtomsPerBond, int** bondAtoms, ThreadPool& threads);
+    void initialize(int numAtoms, int numBonds, int numAtomsPerBond, std::vector<std::vector<int> >& bondAtoms, ThreadPool& threads);
    /**
     * Compute the forces from all bonds.
     */
-    void calculateForce(std::vector<OpenMM::Vec3>& atomCoordinates, double** parameters, std::vector<OpenMM::Vec3>& forces, 
+    void calculateForce(std::vector<OpenMM::Vec3>& atomCoordinates, std::vector<std::vector<double> >& parameters, std::vector<OpenMM::Vec3>& forces, 
            double* totalEnergy, ReferenceBondIxn& referenceBondIxn);
    /**
     * This routine contains the code executed by each thread.
     */
-    void threadComputeForce(ThreadPool& threads, int threadIndex, std::vector<OpenMM::Vec3>& atomCoordinates, double** parameters,
+    void threadComputeForce(ThreadPool& threads, int threadIndex, std::vector<OpenMM::Vec3>& atomCoordinates, std::vector<std::vector<double> >& parameters,
            std::vector<OpenMM::Vec3>& forces, double* totalEnergy, ReferenceBondIxn& referenceBondIxn);
 private:
    bool canAssignBond(int bond, int thread, std::vector<int>& atomThread);
    void assignBond(int bond, int thread, std::vector<int>& atomThread, std::vector<int>& bondThread, std::vector<std::set<int> >& atomBonds, std::list<int>& candidateBonds);
    int numBonds, numAtomsPerBond;
-    int** bondAtoms;
+    std::vector<int>* bondAtoms;
    ThreadPool* threads;
    std::vector<std::vector<int> > threadBonds;
    std::vector<int> extraBonds;

--- a/platforms/cpu/include/CpuCustomGBForce.h
+++ b/platforms/cpu/include/CpuCustomGBForce.h

-/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
+/* Portions copyright (c) 2009-2018 Stanford University and Simbios.
 * Contributors: Peter Eastman
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -59,7 +59,7 @@ private:
    // The following variables are used to make information accessible to the individual threads.
    int numberOfAtoms;
    float* posq;
-    double** atomParameters;
+    std::vector<double>* atomParameters;
    const std::map<std::string, double>* globalParameters;
    std::vector<AlignedArray<float> >* threadForce;
    bool includeForce, includeEnergy;
@@ -81,7 +81,7 @@ private:
     * @param useExclusions    specifies whether to use exclusions
     */

-    void calculateParticlePairValue(int index, ThreadData& data, int numAtoms, float* posq, double** atomParameters,
+    void calculateParticlePairValue(int index, ThreadData& data, int numAtoms, float* posq, std::vector<double>* atomParameters,
                                    bool useExclusions, const fvec4& boxSize, const fvec4& invBoxSize);

    /**
@@ -95,7 +95,7 @@ private:
     * @param atomParameters   atomParameters[atomIndex][paramterIndex]
     */

-    void calculateOnePairValue(int index, int atom1, int atom2, ThreadData& data, float* posq, double** atomParameters,
+    void calculateOnePairValue(int index, int atom1, int atom2, ThreadData& data, float* posq, std::vector<double>* atomParameters,
                               std::vector<float>& valueArray, const fvec4& boxSize, const fvec4& invBoxSize);

    /**
@@ -110,7 +110,7 @@ private:
     * @param totalEnergy      the energy contribution is added to this
     */

-    void calculateSingleParticleEnergyTerm(int index, ThreadData& data, int numAtoms, float* posq, double** atomParameters, float* forces, double& totalEnergy);
+    void calculateSingleParticleEnergyTerm(int index, ThreadData& data, int numAtoms, float* posq, std::vector<double>* atomParameters, float* forces, double& totalEnergy);

    /**
     * Calculate an energy term that is based on particle pairs
@@ -125,7 +125,7 @@ private:
     * @param totalEnergy      the energy contribution is added to this
     */

-    void calculateParticlePairEnergyTerm(int index, ThreadData& data, int numAtoms, float* posq, double** atomParameters,
+    void calculateParticlePairEnergyTerm(int index, ThreadData& data, int numAtoms, float* posq, std::vector<double>* atomParameters,
                                    bool useExclusions, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);

    /**
@@ -141,7 +141,7 @@ private:
     * @param totalEnergy      the energy contribution is added to this
     */

-    void calculateOnePairEnergyTerm(int index, int atom1, int atom2, ThreadData& data, float* posq, double** atomParameters,
+    void calculateOnePairEnergyTerm(int index, int atom1, int atom2, ThreadData& data, float* posq, std::vector<double>* atomParameters,
                               float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);

    /**
@@ -154,7 +154,7 @@ private:
     * @param forces           forces on atoms are added to this
     */

-    void calculateChainRuleForces(ThreadData& data, int numAtoms, float* posq, double** atomParameters,
+    void calculateChainRuleForces(ThreadData& data, int numAtoms, float* posq, std::vector<double>* atomParameters,
                                    float* forces, const fvec4& boxSize, const fvec4& invBoxSize);

    /**
@@ -169,7 +169,7 @@ private:
     * @param isExcluded       specifies whether this is an excluded pair
     */

-    void calculateOnePairChainRule(int atom1, int atom2, ThreadData& data, float* posq, double** atomParameters,
+    void calculateOnePairChainRule(int atom1, int atom2, ThreadData& data, float* posq, std::vector<double>* atomParameters,
                               float* forces, bool isExcluded, const fvec4& boxSize, const fvec4& invBoxSize);

    /**
@@ -231,7 +231,7 @@ public:
     * @param energyParamDerivs  derivatives of the energy with respect to global parameters
     */

-    void calculateIxn(int numberOfAtoms, float* posq, double** atomParameters, std::map<std::string, double>& globalParameters,
+    void calculateIxn(int numberOfAtoms, float* posq, std::vector<std::vector<double> >& atomParameters, std::map<std::string, double>& globalParameters,
            std::vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs);
 };


--- a/platforms/cpu/include/CpuCustomManyParticleForce.h
+++ b/platforms/cpu/include/CpuCustomManyParticleForce.h

-/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
+/* Portions copyright (c) 2009-2018 Stanford University and Simbios.
 * Contributors: Peter Eastman
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -65,7 +65,7 @@ private:
    std::vector<ThreadData*> threadData;
    // The following variables are used to make information accessible to the individual threads.
    float* posq;
-    double** particleParameters;        
+    std::vector<double>* particleParameters;        
    const std::map<std::string, double>* globalParameters;
    std::vector<AlignedArray<float> >* threadForce;
    bool includeForces, includeEnergy;
@@ -81,7 +81,7 @@ private:
     * interaction for each one.
     */
    void loopOverInteractions(std::vector<int>& availableParticles, std::vector<int>& particleSet, int loopIndex, int startIndex,
-                              double** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize);
+                              std::vector<double>* particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize);

    /**---------------------------------------------------------------------------------------

@@ -104,7 +104,7 @@ private:
     * @param boxSize            the size of the periodic box
     * @param invBoxSize         the inverse size of the periodic box
     */
-    void calculateOneIxn(std::vector<int>& particleSet, double** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize);
+    void calculateOneIxn(std::vector<int>& particleSet, std::vector<double>* particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize);

    /**
     * Compute the displacement and squared distance between two points, optionally using
@@ -154,7 +154,7 @@ public:
     * @param includeEnergy      whether to compute energy
     * @param energy             the total energy is added to this
     */
-    void calculateIxn(AlignedArray<float>& posq, double** particleParameters, const std::map<std::string, double>& globalParameters,
+    void calculateIxn(AlignedArray<float>& posq, std::vector<std::vector<double> >& particleParameters, const std::map<std::string, double>& globalParameters,
                      std::vector<AlignedArray<float> >& threadForce, bool includeForces, bool includeEnergy, double& energy);
 };


--- a/platforms/cpu/include/CpuCustomNonbondedForce.h
+++ b/platforms/cpu/include/CpuCustomNonbondedForce.h

-/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
+/* Portions copyright (c) 2009-2018 Stanford University and Simbios.
 * Contributors: Peter Eastman
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -110,7 +110,6 @@ class CpuCustomNonbondedForce {
         @param posq             atom coordinates in float format
         @param atomCoordinates  atom coordinates
         @param atomParameters   atom parameters (charges, c6, c12, ...)     atomParameters[atomIndex][paramterIndex]
-         @param fixedParameters  non atom parameters (not currently used)
         @param globalParameters the values of global parameters
         @param forces           force array (forces added)
         @param totalEnergy      total energy
@@ -118,9 +117,9 @@ class CpuCustomNonbondedForce {

         --------------------------------------------------------------------------------------- */

-    void calculatePairIxn(int numberOfAtoms, float* posq, std::vector<OpenMM::Vec3>& atomCoordinates, double** atomParameters,
-                          double* fixedParameters, const std::map<std::string, double>& globalParameters,
-                          std::vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs);
+    void calculatePairIxn(int numberOfAtoms, float* posq, std::vector<OpenMM::Vec3>& atomCoordinates, std::vector<std::vector<double> >& atomParameters,
+                          const std::map<std::string, double>& globalParameters, std::vector<AlignedArray<float> >& threadForce,
+                          bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs);
 private:
    class ThreadData;

@@ -144,7 +143,7 @@ private:
    int numberOfAtoms;
    float* posq;
    Vec3 const* atomCoordinates;
-    double** atomParameters;        
+    std::vector<double>* atomParameters;        
    const std::map<std::string, double>* globalParameters;
    std::vector<AlignedArray<float> >* threadForce;
    bool includeForce, includeEnergy;

--- a/platforms/cpu/include/CpuKernels.h
+++ b/platforms/cpu/include/CpuKernels.h
@@ -100,9 +100,8 @@ private:
 class CpuCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel {
 public:
    CpuCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data) :
-            CalcHarmonicAngleForceKernel(name, platform), data(data), angleIndexArray(NULL), angleParamArray(NULL), usePeriodic(false) {
+            CalcHarmonicAngleForceKernel(name, platform), data(data), usePeriodic(false) {
    }
-    ~CpuCalcHarmonicAngleForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -129,8 +128,8 @@ public:
 private:
    CpuPlatform::PlatformData& data;
    int numAngles;
-    int **angleIndexArray;
-    double **angleParamArray;
+    std::vector<std::vector<int> > angleIndexArray;
+    std::vector<std::vector<double> > angleParamArray;
    CpuBondForce bondForce;
    bool usePeriodic;
 };
@@ -141,9 +140,8 @@ private:
 class CpuCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel {
 public:
    CpuCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data) :
-            CalcPeriodicTorsionForceKernel(name, platform), data(data), torsionIndexArray(NULL), torsionParamArray(NULL), usePeriodic(false) {
+            CalcPeriodicTorsionForceKernel(name, platform), data(data), usePeriodic(false) {
    }
-    ~CpuCalcPeriodicTorsionForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -170,8 +168,8 @@ public:
 private:
    CpuPlatform::PlatformData& data;
    int numTorsions;
-    int **torsionIndexArray;
-    double **torsionParamArray;
+    std::vector<std::vector<int> > torsionIndexArray;
+    std::vector<std::vector<double> > torsionParamArray;
    CpuBondForce bondForce;
    bool usePeriodic;
 };
@@ -182,9 +180,8 @@ private:
 class CpuCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel {
 public:
    CpuCalcRBTorsionForceKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data) :
-            CalcRBTorsionForceKernel(name, platform), data(data), torsionIndexArray(NULL), torsionParamArray(NULL), usePeriodic(false) {
+            CalcRBTorsionForceKernel(name, platform), data(data), usePeriodic(false) {
    }
-    ~CpuCalcRBTorsionForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -211,8 +208,8 @@ public:
 private:
    CpuPlatform::PlatformData& data;
    int numTorsions;
-    int **torsionIndexArray;
-    double **torsionParamArray;
+    std::vector<std::vector<int> > torsionIndexArray;
+    std::vector<std::vector<double> > torsionParamArray;
    CpuBondForce bondForce;
    bool usePeriodic;
 };
@@ -272,8 +269,8 @@ private:
    void computeParameters(ContextImpl& context, bool offsetsOnly);
    CpuPlatform::PlatformData& data;
    int numParticles, num14, posqIndex;
-    int **bonded14IndexArray;
-    double **bonded14ParamArray;
+    std::vector<std::vector<int> > bonded14IndexArray;
+    std::vector<std::vector<double> > bonded14ParamArray;
    double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldDispersionAlpha, ewaldSelfEnergy, dispersionCoefficient;
    int kmax[3], gridSize[3], dispersionGridSize[3];
    bool useSwitchingFunction, useOptimizedPme, hasInitializedPme, hasInitializedDispersionPme, hasParticleOffsets, hasExceptionOffsets;
@@ -324,7 +321,7 @@ public:
 private:   
    CpuPlatform::PlatformData& data;
    int numParticles;
-    double **particleParamArray;
+    std::vector<std::vector<double> > particleParamArray;
    double nonbondedCutoff, switchingDistance, periodicBoxSize[3], longRangeCoefficient;
    bool useSwitchingFunction, hasInitializedLongRangeCorrection;
    CustomNonbondedForce* forceCopy;
@@ -413,7 +410,7 @@ private:
    CpuPlatform::PlatformData& data;
    int numParticles;
    bool isPeriodic;
-    double **particleParamArray;
+    std::vector<std::vector<double> > particleParamArray;
    double nonbondedCutoff;
    CpuCustomGBForce* ixn;
    CpuNeighborList* neighborList;
@@ -460,7 +457,7 @@ private:
    CpuPlatform::PlatformData& data;
    int numParticles;
    double cutoffDistance;
-    double **particleParamArray;
+    std::vector<std::vector<double> > particleParamArray;
    CpuCustomManyParticleForce* ixn;
    std::vector<std::string> globalParameterNames;
    NonbondedMethod nonbondedMethod;

--- a/platforms/cpu/src/CpuBondForce.cpp
+++ b/platforms/cpu/src/CpuBondForce.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2014-2017 Stanford University and the Authors.      *
+ * Portions copyright (c) 2014-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -38,10 +38,10 @@ using namespace std;
 CpuBondForce::CpuBondForce() {
 }

-void CpuBondForce::initialize(int numAtoms, int numBonds, int numAtomsPerBond, int** bondAtoms, ThreadPool& threads) {
+void CpuBondForce::initialize(int numAtoms, int numBonds, int numAtomsPerBond, vector<vector<int> >& bondAtoms, ThreadPool& threads) {
    this->numBonds = numBonds;
    this->numAtomsPerBond = numAtomsPerBond;
-    this->bondAtoms = bondAtoms;
+    this->bondAtoms = &bondAtoms[0];
    this->threads = &threads;
    int numThreads = threads.getNumThreads();
    int targetBondsPerThread = numBonds/numThreads;
@@ -164,7 +164,7 @@ void CpuBondForce::assignBond(int bond, int thread, vector<int>& atomThread, vec
    }
 }

-void CpuBondForce::calculateForce(vector<Vec3>& atomCoordinates, double** parameters, vector<Vec3>& forces, 
+void CpuBondForce::calculateForce(vector<Vec3>& atomCoordinates, vector<vector<double> >& parameters, vector<Vec3>& forces, 
        double* totalEnergy, ReferenceBondIxn& referenceBondIxn) {
    // Have the worker threads compute their forces.
    
@@ -189,7 +189,7 @@ void CpuBondForce::calculateForce(vector<Vec3>& atomCoordinates, double** parame
            *totalEnergy += threadEnergy[i];
 }

-void CpuBondForce::threadComputeForce(ThreadPool& threads, int threadIndex, vector<Vec3>& atomCoordinates, double** parameters, vector<Vec3>& forces, 
+void CpuBondForce::threadComputeForce(ThreadPool& threads, int threadIndex, vector<Vec3>& atomCoordinates, vector<vector<double> >& parameters, vector<Vec3>& forces, 
            double* totalEnergy, ReferenceBondIxn& referenceBondIxn) {
    vector<int>& bonds = threadBonds[threadIndex];
    int numBonds = bonds.size();

--- a/platforms/cpu/src/CpuCustomGBForce.cpp
+++ b/platforms/cpu/src/CpuCustomGBForce.cpp

-/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
+/* Portions copyright (c) 2009-2018 Stanford University and Simbios.
 * Contributors: Peter Eastman
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -178,14 +178,14 @@ void CpuCustomGBForce::setPeriodic(Vec3& boxSize) {
    periodicBoxSize[2] = boxSize[2];
  }

-void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, double** atomParameters,
+void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, vector<vector<double> >& atomParameters,
                                           map<string, double>& globalParameters, vector<AlignedArray<float> >& threadForce,
                                           bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs) {
    // Record the parameters for the threads.
    
    this->numberOfAtoms = numberOfAtoms;
    this->posq = posq;
-    this->atomParameters = atomParameters;
+    this->atomParameters = &atomParameters[0];
    this->globalParameters = &globalParameters;
    this->threadForce = &threadForce;
    this->includeForce = includeForce;
@@ -352,7 +352,7 @@ void CpuCustomGBForce::threadComputeForce(ThreadPool& threads, int threadIndex)
    calculateChainRuleForces(data, numberOfAtoms, posq, atomParameters, forces, boxSize, invBoxSize);
 }

-void CpuCustomGBForce::calculateParticlePairValue(int index, ThreadData& data, int numAtoms, float* posq, double** atomParameters,
+void CpuCustomGBForce::calculateParticlePairValue(int index, ThreadData& data, int numAtoms, float* posq, vector<double>* atomParameters,
        bool useExclusions, const fvec4& boxSize, const fvec4& invBoxSize) {
    for (int i = 0; i < numAtoms; i++)
        values[index][i] = 0.0f;
@@ -399,7 +399,7 @@ void CpuCustomGBForce::calculateParticlePairValue(int index, ThreadData& data, i
    }
 }

-void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, ThreadData& data, float* posq, double** atomParameters,
+void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, ThreadData& data, float* posq, vector<double>* atomParameters,
        vector<float>& valueArray, const fvec4& boxSize, const fvec4& invBoxSize) {
    fvec4 deltaR;
    fvec4 pos1(posq+4*atom1);
@@ -426,7 +426,7 @@ void CpuCustomGBForce::calculateOnePairValue(int index, int atom1, int atom2, Th
 }

 void CpuCustomGBForce::calculateSingleParticleEnergyTerm(int index, ThreadData& data, int numAtoms, float* posq,
-        double** atomParameters, float* forces, double& totalEnergy) {
+        vector<double>* atomParameters, float* forces, double& totalEnergy) {
    for (int i = data.firstAtom; i < data.lastAtom; i++) {
        data.x = posq[4*i];
        data.y = posq[4*i+1];
@@ -450,7 +450,7 @@ void CpuCustomGBForce::calculateSingleParticleEnergyTerm(int index, ThreadData&
    }
 }

-void CpuCustomGBForce::calculateParticlePairEnergyTerm(int index, ThreadData& data, int numAtoms, float* posq, double** atomParameters,
+void CpuCustomGBForce::calculateParticlePairEnergyTerm(int index, ThreadData& data, int numAtoms, float* posq, vector<double>* atomParameters,
        bool useExclusions, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
    if (cutoff) {
        // Loop over all pairs in the neighbor list.
@@ -492,7 +492,7 @@ void CpuCustomGBForce::calculateParticlePairEnergyTerm(int index, ThreadData& da
    }
 }

-void CpuCustomGBForce::calculateOnePairEnergyTerm(int index, int atom1, int atom2, ThreadData& data, float* posq, double** atomParameters,
+void CpuCustomGBForce::calculateOnePairEnergyTerm(int index, int atom1, int atom2, ThreadData& data, float* posq, vector<double>* atomParameters,
        float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
    // Compute the displacement.

@@ -537,7 +537,7 @@ void CpuCustomGBForce::calculateOnePairEnergyTerm(int index, int atom1, int atom
        data.energyParamDerivs[i] += data.energyParamDerivExpressions[index][i].evaluate();
 }

-void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms, float* posq, double** atomParameters,
+void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms, float* posq, vector<double>* atomParameters,
        float* forces, const fvec4& boxSize, const fvec4& invBoxSize) {
    if (cutoff) {
        // Loop over all pairs in the neighbor list.
@@ -614,7 +614,7 @@ void CpuCustomGBForce::calculateChainRuleForces(ThreadData& data, int numAtoms,
                data.energyParamDerivs[k] += dEdV[j][i]*dValuedParam[j][k][i];
 }

-void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadData& data, float* posq, double** atomParameters,
+void CpuCustomGBForce::calculateOnePairChainRule(int atom1, int atom2, ThreadData& data, float* posq, vector<double>* atomParameters,
        float* forces, bool isExcluded, const fvec4& boxSize, const fvec4& invBoxSize) {
    // Compute the displacement.


--- a/platforms/cpu/src/CpuCustomManyParticleForce.cpp
+++ b/platforms/cpu/src/CpuCustomManyParticleForce.cpp

-/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
+/* Portions copyright (c) 2009-2018 Stanford University and Simbios.
 * Contributors: Peter Eastman
 *
 * Permission is hereby granted, free of charge, to any person obtaining
@@ -88,13 +88,13 @@ CpuCustomManyParticleForce::~CpuCustomManyParticleForce() {
        delete data;
 }

-void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, double** particleParameters,
+void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, vector<vector<double> >& particleParameters,
                                                  const map<string, double>& globalParameters, vector<AlignedArray<float> >& threadForce,
                                                  bool includeForces, bool includeEnergy, double& energy) {
    // Record the parameters for the threads.
    
    this->posq = &posq[0];
-    this->particleParameters = particleParameters;
+    this->particleParameters = &particleParameters[0];
    this->globalParameters = &globalParameters;
    this->threadForce = &threadForce;
    this->includeForces = includeForces;
@@ -209,7 +209,7 @@ void CpuCustomManyParticleForce::setPeriodic(Vec3* periodicBoxVectors) {
 }

 void CpuCustomManyParticleForce::loopOverInteractions(vector<int>& availableParticles, vector<int>& particleSet, int loopIndex, int startIndex,
-                                                          double** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
+                                                      vector<double>* particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
    int numParticles = availableParticles.size();
    double cutoff2 = cutoffDistance*cutoffDistance;
    int checkRange = (centralParticleMode ? 1 : loopIndex);
@@ -243,7 +243,7 @@ void CpuCustomManyParticleForce::loopOverInteractions(vector<int>& availablePart
    }
 }

-void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, double** particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
+void CpuCustomManyParticleForce::calculateOneIxn(vector<int>& particleSet, vector<double>* particleParameters, float* forces, ThreadData& data, const fvec4& boxSize, const fvec4& invBoxSize) {
    // Select the ordering to use for the particles.
    
    vector<int>& permutedParticles = data.permutedParticles;

--- a/platforms/cpu/src/CpuCustomNonbondedForce.cpp
+++ b/platforms/cpu/src/CpuCustomNonbondedForce.cpp
@@ -120,15 +120,15 @@ void CpuCustomNonbondedForce::setPeriodic(Vec3* periodicBoxVectors) {
 }


-void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, vector<Vec3>& atomCoordinates, double** atomParameters,
-                                               double* fixedParameters, const map<string, double>& globalParameters,
-                                               vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs) {
+void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, vector<Vec3>& atomCoordinates, vector<vector<double> >& atomParameters,
+                                               const map<string, double>& globalParameters, vector<AlignedArray<float> >& threadForce,
+                                               bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs) {
    // Record the parameters for the threads.
    
    this->numberOfAtoms = numberOfAtoms;
    this->posq = posq;
    this->atomCoordinates = &atomCoordinates[0];
-    this->atomParameters = atomParameters;
+    this->atomParameters = &atomParameters[0];
    this->globalParameters = &globalParameters;
    this->threadForce = &threadForce;
    this->includeForce = includeForce;

--- a/platforms/cpu/src/CpuKernels.cpp
+++ b/platforms/cpu/src/CpuKernels.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013-2016 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -302,25 +302,10 @@ double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, boo
    return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups, valid);
 }

-CpuCalcHarmonicAngleForceKernel::~CpuCalcHarmonicAngleForceKernel() {
-    if (angleIndexArray != NULL) {
-        for (int i = 0; i < numAngles; i++) {
-            delete[] angleIndexArray[i];
-            delete[] angleParamArray[i];
-        }
-        delete[] angleIndexArray;
-        delete[] angleParamArray;
-    }
-}
-
 void CpuCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
    numAngles = force.getNumAngles();
-    angleIndexArray = new int*[numAngles];
-    for (int i = 0; i < numAngles; i++)
-        angleIndexArray[i] = new int[3];
-    angleParamArray = new double*[numAngles];
-    for (int i = 0; i < numAngles; i++)
-        angleParamArray[i] = new double[2];
+    angleIndexArray.resize(numAngles, vector<int>(3));
+    angleParamArray.resize(numAngles, vector<double>(2));
    for (int i = 0; i < numAngles; ++i) {
        int particle1, particle2, particle3;
        double angle, k;
@@ -363,25 +348,10 @@ void CpuCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& conte
    }
 }

-CpuCalcPeriodicTorsionForceKernel::~CpuCalcPeriodicTorsionForceKernel() {
-    if (torsionIndexArray != NULL) {
-        for (int i = 0; i < numTorsions; i++) {
-            delete[] torsionIndexArray[i];
-            delete[] torsionParamArray[i];
-        }
-        delete[] torsionIndexArray;
-        delete[] torsionParamArray;
-    }
-}
-
 void CpuCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
    numTorsions = force.getNumTorsions();
-    torsionIndexArray = new int*[numTorsions];
-    for (int i = 0; i < numTorsions; i++)
-        torsionIndexArray[i] = new int[4];
-    torsionParamArray = new double*[numTorsions];
-    for (int i = 0; i < numTorsions; i++)
-        torsionParamArray[i] = new double[3];
+    torsionIndexArray.resize(numTorsions, vector<int>(4));
+    torsionParamArray.resize(numTorsions, vector<double>(3));
    for (int i = 0; i < numTorsions; ++i) {
        int particle1, particle2, particle3, particle4, periodicity;
        double phase, k;
@@ -427,25 +397,10 @@ void CpuCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& con
    }
 }

-CpuCalcRBTorsionForceKernel::~CpuCalcRBTorsionForceKernel() {
-    if (torsionIndexArray != NULL) {
-        for (int i = 0; i < numTorsions; i++) {
-            delete[] torsionIndexArray[i];
-            delete[] torsionParamArray[i];
-        }
-        delete[] torsionIndexArray;
-        delete[] torsionParamArray;
-    }
-}
-
 void CpuCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
    numTorsions = force.getNumTorsions();
-    torsionIndexArray = new int*[numTorsions];
-    for (int i = 0; i < numTorsions; i++)
-        torsionIndexArray[i] = new int[4];
-    torsionParamArray = new double*[numTorsions];
-    for (int i = 0; i < numTorsions; i++)
-        torsionParamArray[i] = new double[6];
+    torsionIndexArray.resize(numTorsions, vector<int>(4));
+    torsionParamArray.resize(numTorsions, vector<double>(6));
    for (int i = 0; i < numTorsions; ++i) {
        int particle1, particle2, particle3, particle4;
        double c0, c1, c2, c3, c4, c5;
@@ -522,7 +477,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4();
 CpuNonbondedForce* createCpuNonbondedForceVec8();

 CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform),
-        data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), hasInitializedDispersionPme(false), nonbonded(NULL) {
+        data(data), hasInitializedPme(false), hasInitializedDispersionPme(false), nonbonded(NULL) {
    if (isVec8Supported())
        nonbonded = createCpuNonbondedForceVec8();
    else
@@ -530,14 +485,6 @@ CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Plat
 }

 CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
-    if (bonded14ParamArray != NULL) {
-        for (int i = 0; i < num14; i++) {
-            delete[] bonded14IndexArray[i];
-            delete[] bonded14ParamArray[i];
-        }
-        delete[] bonded14IndexArray;
-        delete[] bonded14ParamArray;
-    }
    if (nonbonded != NULL)
        delete nonbonded;
 }
@@ -563,12 +510,8 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
    // Record the particle parameters.

    num14 = nb14s.size();
-    bonded14IndexArray = new int*[num14];
-    for (int i = 0; i < num14; i++)
-        bonded14IndexArray[i] = new int[2];
-    bonded14ParamArray = new double*[num14];
-    for (int i = 0; i < num14; i++)
-        bonded14ParamArray[i] = new double[3];
+    bonded14IndexArray.resize(num14, vector<int>(2));
+    bonded14ParamArray.resize(num14, vector<double>(3));
    particleParams.resize(numParticles);
    charges.resize(numParticles);
    C6params.resize(numParticles);
@@ -871,11 +814,6 @@ CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name
 }

 CpuCalcCustomNonbondedForceKernel::~CpuCalcCustomNonbondedForceKernel() {
-    if (particleParamArray != NULL) {
-        for (int i = 0; i < numParticles; i++)
-            delete[] particleParamArray[i];
-        delete[] particleParamArray;
-    }
    if (nonbonded != NULL)
        delete nonbonded;
    if (forceCopy != NULL)
@@ -898,15 +836,9 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C
    // Build the arrays.

    int numParameters = force.getNumPerParticleParameters();
-    particleParamArray = new double*[numParticles];
-    for (int i = 0; i < numParticles; i++)
-        particleParamArray[i] = new double[numParameters];
-    for (int i = 0; i < numParticles; ++i) {
-        vector<double> parameters;
-        force.getParticleParameters(i, parameters);
-        for (int j = 0; j < numParameters; j++)
-            particleParamArray[i][j] = parameters[j];
-    }
+    particleParamArray.resize(numParticles);
+    for (int i = 0; i < numParticles; ++i)
+        force.getParticleParameters(i, particleParamArray[i]);
    nonbondedMethod = CalcCustomNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
    nonbondedCutoff = force.getCutoffDistance();
    if (nonbondedMethod == NoCutoff)
@@ -1002,7 +934,7 @@ double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool inc
    if (useSwitchingFunction)
        nonbonded->setUseSwitchingFunction(switchingDistance);
    vector<double> energyParamDerivValues(energyParamDerivNames.size()+1, 0.0);
-    nonbonded->calculatePairIxn(numParticles, &data.posq[0], posData, particleParamArray, 0, globalParamValues, data.threadForce, includeForces, includeEnergy, energy, &energyParamDerivValues[0]);
+    nonbonded->calculatePairIxn(numParticles, &data.posq[0], posData, particleParamArray, globalParamValues, data.threadForce, includeForces, includeEnergy, energy, &energyParamDerivValues[0]);
    map<string, double>& energyParamDerivs = extractEnergyParameterDerivatives(context);
    for (int i = 0; i < energyParamDerivNames.size(); i++)
        energyParamDerivs[energyParamDerivNames[i]] += energyParamDerivValues[i];
@@ -1099,11 +1031,6 @@ void CpuCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, co
 }

 CpuCalcCustomGBForceKernel::~CpuCalcCustomGBForceKernel() {
-    if (particleParamArray != NULL) {
-        for (int i = 0; i < numParticles; i++)
-            delete[] particleParamArray[i];
-        delete[] particleParamArray;
-    }
    if (ixn != NULL)
        delete ixn;
    if (neighborList != NULL)
@@ -1138,15 +1065,9 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
    // Build the arrays.

    int numPerParticleParameters = force.getNumPerParticleParameters();
-    particleParamArray = new double*[numParticles];
-    for (int i = 0; i < numParticles; i++)
-        particleParamArray[i] = new double[numPerParticleParameters];
-    for (int i = 0; i < numParticles; ++i) {
-        vector<double> parameters;
-        force.getParticleParameters(i, parameters);
-        for (int j = 0; j < numPerParticleParameters; j++)
-            particleParamArray[i][j] = parameters[j];
-    }
+    particleParamArray.resize(numParticles);
+    for (int i = 0; i < numParticles; ++i)
+        force.getParticleParameters(i, particleParamArray[i]);
    for (int i = 0; i < numPerParticleParameters; i++)
        particleParameterNames.push_back(force.getPerParticleParameterName(i));
    for (int i = 0; i < force.getNumGlobalParameters(); i++)
@@ -1292,11 +1213,6 @@ void CpuCalcCustomGBForceKernel::copyParametersToContext(ContextImpl& context, c
 }

 CpuCalcCustomManyParticleForceKernel::~CpuCalcCustomManyParticleForceKernel() {
-    if (particleParamArray != NULL) {
-        for (int i = 0; i < numParticles; i++)
-            delete[] particleParamArray[i];
-        delete[] particleParamArray;
-    }
    if (ixn != NULL)
        delete ixn;
 }
@@ -1307,15 +1223,10 @@ void CpuCalcCustomManyParticleForceKernel::initialize(const System& system, cons

    numParticles = system.getNumParticles();
    int numParticleParameters = force.getNumPerParticleParameters();
-    particleParamArray = new double*[numParticles];
-    for (int i = 0; i < numParticles; i++)
-        particleParamArray[i] = new double[numParticleParameters];
+    particleParamArray.resize(numParticles);
    for (int i = 0; i < numParticles; ++i) {
-        vector<double> parameters;
        int type;
-        force.getParticleParameters(i, parameters, type);
-        for (int j = 0; j < numParticleParameters; j++)
-            particleParamArray[i][j] = parameters[j];
+        force.getParticleParameters(i, particleParamArray[i], type);
    }
    for (int i = 0; i < force.getNumGlobalParameters(); i++)
        globalParameterNames.push_back(force.getGlobalParameterName(i));

--- a/platforms/cuda/include/CudaArray.h
+++ b/platforms/cuda/include/CudaArray.h
@@ -130,7 +130,27 @@ public:
     * Copy the values in a vector to the device memory.
     */
    template <class T>
-    void upload(const std::vector<T>& data) {
+    void upload(const std::vector<T>& data, bool convert = true) {
+        if (convert && data.size() == size && sizeof(T) != elementSize) {
+            if (sizeof(T) == 2*elementSize) {
+                // Convert values from double to single precision.
+                const double* d = reinterpret_cast<const double*>(&data[0]);
+                std::vector<float> v(elementSize*size/sizeof(float));
+                for (int i = 0; i < v.size(); i++)
+                    v[i] = (float) d[i];
+                upload(&v[0], true);
+                return;
+            }
+            if (2*sizeof(T) == elementSize) {
+                // Convert values from single to double precision.
+                const float* d = reinterpret_cast<const float*>(&data[0]);
+                std::vector<double> v(elementSize*size/sizeof(double));
+                for (int i = 0; i < v.size(); i++)
+                    v[i] = (double) d[i];
+                upload(&v[0], true);
+                return;
+            }
+        }
        if (sizeof(T) != elementSize || data.size() != size)
            throw OpenMMException("Error uploading array "+name+": The specified vector does not match the size of the array");
        upload(&data[0], true);

--- a/platforms/cuda/include/CudaKernels.h
+++ b/platforms/cuda/include/CudaKernels.h
@@ -1602,10 +1602,8 @@ private:
    mutable std::vector<std::vector<double4> > localPerDofValuesDouble;
    std::map<std::string, double> energyParamDerivs;
    std::vector<std::string> perDofEnergyParamDerivNames;
-    std::vector<float> localPerDofEnergyParamDerivsFloat;
-    std::vector<double> localPerDofEnergyParamDerivsDouble;
-    std::vector<float> globalValuesFloat;
-    std::vector<double> globalValuesDouble;
+    std::vector<double> localPerDofEnergyParamDerivs;
+    std::vector<double> localGlobalValues;
    std::vector<double> initialGlobalVariables;
    std::vector<std::vector<CUfunction> > kernels;
    std::vector<std::vector<std::vector<void*> > > kernelArgs;

--- a/platforms/cuda/src/CudaContext.cpp
+++ b/platforms/cuda/src/CudaContext.cpp
@@ -728,58 +728,9 @@ string CudaContext::intToString(int value) const {
 }

 std::string CudaContext::getErrorString(CUresult result) {
-    switch (result) {
-        case CUDA_SUCCESS: return "CUDA_SUCCESS";
-        case CUDA_ERROR_INVALID_VALUE: return "CUDA_ERROR_INVALID_VALUE";
-        case CUDA_ERROR_OUT_OF_MEMORY: return "CUDA_ERROR_OUT_OF_MEMORY";
-        case CUDA_ERROR_NOT_INITIALIZED: return "CUDA_ERROR_NOT_INITIALIZED";
-        case CUDA_ERROR_DEINITIALIZED: return "CUDA_ERROR_DEINITIALIZED";
-        case CUDA_ERROR_PROFILER_DISABLED: return "CUDA_ERROR_PROFILER_DISABLED";
-        case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
-        case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
-        case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
-        case CUDA_ERROR_NO_DEVICE: return "CUDA_ERROR_NO_DEVICE";
-        case CUDA_ERROR_INVALID_DEVICE: return "CUDA_ERROR_INVALID_DEVICE";
-        case CUDA_ERROR_INVALID_IMAGE: return "CUDA_ERROR_INVALID_IMAGE";
-        case CUDA_ERROR_INVALID_CONTEXT: return "CUDA_ERROR_INVALID_CONTEXT";
-        case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
-        case CUDA_ERROR_MAP_FAILED: return "CUDA_ERROR_MAP_FAILED";
-        case CUDA_ERROR_UNMAP_FAILED: return "CUDA_ERROR_UNMAP_FAILED";
-        case CUDA_ERROR_ARRAY_IS_MAPPED: return "CUDA_ERROR_ARRAY_IS_MAPPED";
-        case CUDA_ERROR_ALREADY_MAPPED: return "CUDA_ERROR_ALREADY_MAPPED";
-        case CUDA_ERROR_NO_BINARY_FOR_GPU: return "CUDA_ERROR_NO_BINARY_FOR_GPU";
-        case CUDA_ERROR_ALREADY_ACQUIRED: return "CUDA_ERROR_ALREADY_ACQUIRED";
-        case CUDA_ERROR_NOT_MAPPED: return "CUDA_ERROR_NOT_MAPPED";
-        case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
-        case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
-        case CUDA_ERROR_ECC_UNCORRECTABLE: return "CUDA_ERROR_ECC_UNCORRECTABLE";
-        case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUDA_ERROR_UNSUPPORTED_LIMIT";
-        case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
-        case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
-        case CUDA_ERROR_INVALID_SOURCE: return "CUDA_ERROR_INVALID_SOURCE";
-        case CUDA_ERROR_FILE_NOT_FOUND: return "CUDA_ERROR_FILE_NOT_FOUND";
-        case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
-        case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
-        case CUDA_ERROR_OPERATING_SYSTEM: return "CUDA_ERROR_OPERATING_SYSTEM";
-        case CUDA_ERROR_INVALID_HANDLE: return "CUDA_ERROR_INVALID_HANDLE";
-        case CUDA_ERROR_NOT_FOUND: return "CUDA_ERROR_NOT_FOUND";
-        case CUDA_ERROR_NOT_READY: return "CUDA_ERROR_NOT_READY";
-        case CUDA_ERROR_LAUNCH_FAILED: return "CUDA_ERROR_LAUNCH_FAILED";
-        case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
-        case CUDA_ERROR_LAUNCH_TIMEOUT: return "CUDA_ERROR_LAUNCH_TIMEOUT";
-        case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING";
-        case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
-        case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
-        case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
-        case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
-        case CUDA_ERROR_ASSERT: return "CUDA_ERROR_ASSERT";
-        case CUDA_ERROR_TOO_MANY_PEERS: return "CUDA_ERROR_TOO_MANY_PEERS";
-        case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
-        case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
-        case CUDA_ERROR_NOT_PERMITTED: return "CUDA_ERROR_NOT_PERMITTED";
-        case CUDA_ERROR_NOT_SUPPORTED: return "CUDA_ERROR_NOT_SUPPORTED";
-        case CUDA_ERROR_UNKNOWN: return "CUDA_ERROR_UNKNOWN";
-    }
+    const char* message;
+    if (cuGetErrorName(result, &message) == CUDA_SUCCESS)
+        return string(message);
    return "CUDA error";
 }

@@ -886,18 +837,10 @@ double CudaContext::reduceEnergy() {
 void CudaContext::setCharges(const vector<double>& charges) {
    if (!chargeBuffer.isInitialized())
        chargeBuffer.initialize(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
-    if (getUseDoublePrecision()) {
    vector<double> c(numAtoms);
    for (int i = 0; i < numAtoms; i++)
        c[i] = charges[i];
-        chargeBuffer.upload(c);
-    }
-    else {
-        vector<float> c(numAtoms);
-        for (int i = 0; i < numAtoms; i++)
-            c[i] = (float) charges[i];
-        chargeBuffer.upload(c);
-    }
+    chargeBuffer.upload(c, true);
    void* args[] = {&chargeBuffer.getDevicePointer(), &posq.getDevicePointer(), &atomIndexDevice.getDevicePointer(), &numAtoms};
    executeKernel(setChargesKernel, args, numAtoms);
 }
@@ -916,32 +859,33 @@ public:
    VirtualSiteInfo(const System& system) {
        for (int i = 0; i < system.getNumParticles(); i++) {
            if (system.isVirtualSite(i)) {
-                siteTypes.push_back(&typeid(system.getVirtualSite(i)));
+                const VirtualSite& vsite = system.getVirtualSite(i);
+                siteTypes.push_back(&typeid(vsite));
                vector<int> particles;
                particles.push_back(i);
-                for (int j = 0; j < system.getVirtualSite(i).getNumParticles(); j++)
-                    particles.push_back(system.getVirtualSite(i).getParticle(j));
+                for (int j = 0; j < vsite.getNumParticles(); j++)
+                    particles.push_back(vsite.getParticle(j));
                siteParticles.push_back(particles);
                vector<double> weights;
-                if (dynamic_cast<const TwoParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
+                if (dynamic_cast<const TwoParticleAverageSite*>(&vsite) != NULL) {
                    // A two particle average.

-                    const TwoParticleAverageSite& site = dynamic_cast<const TwoParticleAverageSite&>(system.getVirtualSite(i));
+                    const TwoParticleAverageSite& site = dynamic_cast<const TwoParticleAverageSite&>(vsite);
                    weights.push_back(site.getWeight(0));
                    weights.push_back(site.getWeight(1));
                }
-                else if (dynamic_cast<const ThreeParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
+                else if (dynamic_cast<const ThreeParticleAverageSite*>(&vsite) != NULL) {
                    // A three particle average.

-                    const ThreeParticleAverageSite& site = dynamic_cast<const ThreeParticleAverageSite&>(system.getVirtualSite(i));
+                    const ThreeParticleAverageSite& site = dynamic_cast<const ThreeParticleAverageSite&>(vsite);
                    weights.push_back(site.getWeight(0));
                    weights.push_back(site.getWeight(1));
                    weights.push_back(site.getWeight(2));
                }
-                else if (dynamic_cast<const OutOfPlaneSite*>(&system.getVirtualSite(i)) != NULL) {
+                else if (dynamic_cast<const OutOfPlaneSite*>(&vsite) != NULL) {
                    // An out of plane site.

-                    const OutOfPlaneSite& site = dynamic_cast<const OutOfPlaneSite&>(system.getVirtualSite(i));
+                    const OutOfPlaneSite& site = dynamic_cast<const OutOfPlaneSite&>(vsite);
                    weights.push_back(site.getWeight12());
                    weights.push_back(site.getWeight13());
                    weights.push_back(site.getWeightCross());

--- a/platforms/cuda/src/CudaIntegrationUtilities.cpp
+++ b/platforms/cuda/src/CudaIntegrationUtilities.cpp
@@ -376,12 +376,12 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
        vector<int> atomConstraintsVec(ccmaAtomConstraints.getSize());
        vector<int> numAtomConstraintsVec(ccmaNumAtomConstraints.getSize());
        vector<int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn.getSize());
-        if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) {
-            ccmaDistance.initialize<double4>(context, numCCMA, "CcmaDistance");
-            ccmaDelta1.initialize<double>(context, numCCMA, "CcmaDelta1");
-            ccmaDelta2.initialize<double>(context, numCCMA, "CcmaDelta2");
-            ccmaReducedMass.initialize<double>(context, numCCMA, "CcmaReducedMass");
-            ccmaConstraintMatrixValue.initialize<double>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
+        int elementSize = (context.getUseDoublePrecision() || context.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
+        ccmaDistance.initialize(context, numCCMA, 4*elementSize, "CcmaDistance");
+        ccmaDelta1.initialize(context, numCCMA, elementSize, "CcmaDelta1");
+        ccmaDelta2.initialize(context, numCCMA, elementSize, "CcmaDelta2");
+        ccmaReducedMass.initialize(context, numCCMA, elementSize, "CcmaReducedMass");
+        ccmaConstraintMatrixValue.initialize(context, numCCMA*maxRowElements, elementSize, "ConstraintMatrixValue");
        vector<double4> distanceVec(ccmaDistance.getSize());
        vector<double> reducedMassVec(ccmaReducedMass.getSize());
        vector<double> constraintMatrixValueVec(ccmaConstraintMatrixValue.getSize());
@@ -398,36 +398,9 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
            }
            constraintMatrixColumnVec[i+matrix[index].size()*numCCMA] = numCCMA;
        }
-            ccmaDistance.upload(distanceVec);
-            ccmaReducedMass.upload(reducedMassVec);
-            ccmaConstraintMatrixValue.upload(constraintMatrixValueVec);
-        }
-        else {
-            ccmaDistance.initialize<float4>(context, numCCMA, "CcmaDistance");
-            ccmaDelta1.initialize<float>(context, numCCMA, "CcmaDelta1");
-            ccmaDelta2.initialize<float>(context, numCCMA, "CcmaDelta2");
-            ccmaReducedMass.initialize<float>(context, numCCMA, "CcmaReducedMass");
-            ccmaConstraintMatrixValue.initialize<float>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
-            vector<float4> distanceVec(ccmaDistance.getSize());
-            vector<float> reducedMassVec(ccmaReducedMass.getSize());
-            vector<float> constraintMatrixValueVec(ccmaConstraintMatrixValue.getSize());
-            for (int i = 0; i < numCCMA; i++) {
-                int index = constraintOrder[i];
-                int c = ccmaConstraints[index];
-                atomsVec[i].x = atom1[c];
-                atomsVec[i].y = atom2[c];
-                distanceVec[i].w = (float) distance[c];
-                reducedMassVec[i] = (float) (0.5/(1.0/system.getParticleMass(atom1[c])+1.0/system.getParticleMass(atom2[c])));
-                for (unsigned int j = 0; j < matrix[index].size(); j++) {
-                    constraintMatrixColumnVec[i+j*numCCMA] = matrix[index][j].first;
-                    constraintMatrixValueVec[i+j*numCCMA] = (float) matrix[index][j].second;
-                }
-                constraintMatrixColumnVec[i+matrix[index].size()*numCCMA] = numCCMA;
-            }
-            ccmaDistance.upload(distanceVec);
-            ccmaReducedMass.upload(reducedMassVec);
-            ccmaConstraintMatrixValue.upload(constraintMatrixValueVec);
-        }
+        ccmaDistance.upload(distanceVec, true);
+        ccmaReducedMass.upload(reducedMassVec, true);
+        ccmaConstraintMatrixValue.upload(constraintMatrixValueVec, true);
        for (unsigned int i = 0; i < atomConstraints.size(); i++) {
            numAtomConstraintsVec[i] = atomConstraints[i].size();
            for (unsigned int j = 0; j < atomConstraints[i].size(); j++) {
@@ -521,57 +494,21 @@ CudaIntegrationUtilities::CudaIntegrationUtilities(CudaContext& context, const S
        vsiteLocalCoordsAtoms.upload(vsiteLocalCoordsAtomVec);
        vsiteLocalCoordsStartIndex.upload(vsiteLocalCoordsStartVec);
    }
-    if (context.getUseDoublePrecision()) {
-        vsite2AvgWeights.initialize<double2>(context, max(1, num2Avg), "vsite2AvgWeights");
-        vsite3AvgWeights.initialize<double4>(context, max(1, num3Avg), "vsite3AvgWeights");
-        vsiteOutOfPlaneWeights.initialize<double4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
-        vsiteLocalCoordsWeights.initialize<double>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights");
-        vsiteLocalCoordsPos.initialize<double4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos");
+    int elementSize = (context.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
+    vsite2AvgWeights.initialize(context, max(1, num2Avg), 2*elementSize, "vsite2AvgWeights");
+    vsite3AvgWeights.initialize(context, max(1, num3Avg), 4*elementSize, "vsite3AvgWeights");
+    vsiteOutOfPlaneWeights.initialize(context, max(1, numOutOfPlane), 4*elementSize, "vsiteOutOfPlaneWeights");
+    vsiteLocalCoordsWeights.initialize(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), elementSize, "vsiteLocalCoordsWeights");
+    vsiteLocalCoordsPos.initialize(context, max(1, (int) vsiteLocalCoordsPosVec.size()), 4*elementSize, "vsiteLocalCoordsPos");
    if (num2Avg > 0)
-            vsite2AvgWeights.upload(vsite2AvgWeightVec);
+        vsite2AvgWeights.upload(vsite2AvgWeightVec, true);
    if (num3Avg > 0)
-            vsite3AvgWeights.upload(vsite3AvgWeightVec);
+        vsite3AvgWeights.upload(vsite3AvgWeightVec, true);
    if (numOutOfPlane > 0)
-            vsiteOutOfPlaneWeights.upload(vsiteOutOfPlaneWeightVec);
-        if (numLocalCoords > 0) {
-            vsiteLocalCoordsWeights.upload(vsiteLocalCoordsWeightVec);
-            vsiteLocalCoordsPos.upload(vsiteLocalCoordsPosVec);
-        }
-    }
-    else {
-        vsite2AvgWeights.initialize<float2>(context, max(1, num2Avg), "vsite2AvgWeights");
-        vsite3AvgWeights.initialize<float4>(context, max(1, num3Avg), "vsite3AvgWeights");
-        vsiteOutOfPlaneWeights.initialize<float4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
-        vsiteLocalCoordsWeights.initialize<float>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights");
-        vsiteLocalCoordsPos.initialize<float4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos");
-        if (num2Avg > 0) {
-            vector<float2> floatWeights(num2Avg);
-            for (int i = 0; i < num2Avg; i++)
-                floatWeights[i] = make_float2((float) vsite2AvgWeightVec[i].x, (float) vsite2AvgWeightVec[i].y);
-            vsite2AvgWeights.upload(floatWeights);
-        }
-        if (num3Avg > 0) {
-            vector<float4> floatWeights(num3Avg);
-            for (int i = 0; i < num3Avg; i++)
-                floatWeights[i] = make_float4((float) vsite3AvgWeightVec[i].x, (float) vsite3AvgWeightVec[i].y, (float) vsite3AvgWeightVec[i].z, 0.0f);
-            vsite3AvgWeights.upload(floatWeights);
-        }
-        if (numOutOfPlane > 0) {
-            vector<float4> floatWeights(numOutOfPlane);
-            for (int i = 0; i < numOutOfPlane; i++)
-                floatWeights[i] = make_float4((float) vsiteOutOfPlaneWeightVec[i].x, (float) vsiteOutOfPlaneWeightVec[i].y, (float) vsiteOutOfPlaneWeightVec[i].z, 0.0f);
-            vsiteOutOfPlaneWeights.upload(floatWeights);
-        }
+        vsiteOutOfPlaneWeights.upload(vsiteOutOfPlaneWeightVec, true);
    if (numLocalCoords > 0) {
-            vector<float> floatWeights(vsiteLocalCoordsWeightVec.size());
-            for (int i = 0; i < (int) vsiteLocalCoordsWeightVec.size(); i++)
-                floatWeights[i] = (float) vsiteLocalCoordsWeightVec[i];
-            vsiteLocalCoordsWeights.upload(floatWeights);
-            vector<float4> floatPos(vsiteLocalCoordsPosVec.size());
-            for (int i = 0; i < (int) vsiteLocalCoordsPosVec.size(); i++)
-                floatPos[i] = make_float4((float) vsiteLocalCoordsPosVec[i].x, (float) vsiteLocalCoordsPosVec[i].y, (float) vsiteLocalCoordsPosVec[i].z, 0.0f);
-            vsiteLocalCoordsPos.upload(floatPos);
-        }
+        vsiteLocalCoordsWeights.upload(vsiteLocalCoordsWeightVec, true);
+        vsiteLocalCoordsPos.upload(vsiteLocalCoordsPosVec, true);
    }

    // Create the kernels used by this class.

--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
@@ -1909,25 +1909,12 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
                        for (int i = 0; i < ndata; i++)
                            if (moduli[i] < 1.0e-7)
                                moduli[i] = (moduli[i-1]+moduli[i+1])*0.5;
-                        if (cu.getUseDoublePrecision()) {
-                            if (dim == 0)
-                                xmoduli->upload(moduli);
-                            else if (dim == 1)
-                                ymoduli->upload(moduli);
-                            else
-                                zmoduli->upload(moduli);
-                        }
-                        else {
-                            vector<float> modulif(ndata);
-                            for (int i = 0; i < ndata; i++)
-                                modulif[i] = (float) moduli[i];
                        if (dim == 0)
-                                xmoduli->upload(modulif);
+                            xmoduli->upload(moduli, true);
                        else if (dim == 1)
-                                ymoduli->upload(modulif);
+                            ymoduli->upload(moduli, true);
                        else
-                                zmoduli->upload(modulif);
-                        }
+                            zmoduli->upload(moduli, true);
                    }
                }
            }
@@ -2072,14 +2059,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
    }
    if (paramChanged) {
        recomputeParams = true;
-        if (cu.getUseDoublePrecision())
-            globalParams.upload(paramValues);
-        else {
-            vector<float> v(paramValues.size());
-            for (int i = 0; i < v.size(); i++)
-                v[i] = paramValues[i];
-            globalParams.upload(v);
-        }
+        globalParams.upload(paramValues, true);
    }
    double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
    if (recomputeParams || hasOffsets) {
@@ -2986,14 +2966,7 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
        chargeVec[i] = charge;
        paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
    }
-    if (cu.getUseDoublePrecision())
-        charges.upload(chargeVec);
-    else {
-        vector<float> c(charges.getSize());
-        for (int i = 0; i < c.size(); i++)
-            c[i] = (float) chargeVec[i];
-        charges.upload(c);
-    }
+    charges.upload(chargeVec, true);
    params.upload(paramsVector);
    prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
    surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
@@ -3141,14 +3114,7 @@ void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, c
    }
    for (int i = numParticles; i < cu.getPaddedNumAtoms(); i++)
        paramsVector[i] = make_float2(1, 1);
-    if (cu.getUseDoublePrecision())
-        charges.upload(chargeVector);
-    else {
-        vector<float> c(charges.getSize());
-        for (int i = 0; i < c.size(); i++)
-            c[i] = (float) chargeVector[i];
-        charges.upload(c);
-    }
+    charges.upload(chargeVector, true);
    params.upload(paramsVector);
    
    // Mark that the current reordering may be invalid.
@@ -4878,8 +4844,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
    
    numGroups = force.getNumGroups();
    vector<int> groupParticleVec;
-    vector<float> groupWeightVecFloat;
-    vector<double> groupWeightVecDouble;
+    vector<double> groupWeightVec;
    vector<int> groupOffsetVec;
    groupOffsetVec.push_back(0);
    for (int i = 0; i < numGroups; i++) {
@@ -4891,27 +4856,19 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
    }
    vector<vector<double> > normalizedWeights;
    CustomCentroidBondForceImpl::computeNormalizedWeights(force, system, normalizedWeights);
-    if (cu.getUseDoublePrecision()) {
    for (int i = 0; i < numGroups; i++)
-            groupWeightVecDouble.insert(groupWeightVecDouble.end(), normalizedWeights[i].begin(), normalizedWeights[i].end());
-    }
-    else {
-        for (int i = 0; i < numGroups; i++)
-            for (int j = 0; j < normalizedWeights[i].size(); j++)
-                groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
-    }
+        groupWeightVec.insert(groupWeightVec.end(), normalizedWeights[i].begin(), normalizedWeights[i].end());
    groupParticles.initialize<int>(cu, groupParticleVec.size(), "groupParticles");
    groupParticles.upload(groupParticleVec);
    if (cu.getUseDoublePrecision()) {
        groupWeights.initialize<double>(cu, groupParticleVec.size(), "groupWeights");
-        groupWeights.upload(groupWeightVecDouble);
        centerPositions.initialize<double4>(cu, numGroups, "centerPositions");
    }
    else {
        groupWeights.initialize<float>(cu, groupParticleVec.size(), "groupWeights");
-        groupWeights.upload(groupWeightVecFloat);
        centerPositions.initialize<float4>(cu, numGroups, "centerPositions");
    }
+    groupWeights.upload(groupWeightVec, true);
    groupOffsets.initialize<int>(cu, groupOffsetVec.size(), "groupOffsets");
    groupOffsets.upload(groupOffsetVec);
    groupForces.initialize<long long>(cu, numGroups*3, "groupForces");
@@ -6833,18 +6790,10 @@ void CudaCalcRMSDForceKernel::recordParameters(const RMSDForce& force) {
    // Upload them to the device.

    particles.upload(particleVec);
-    if (cu.getUseDoublePrecision()) {
    vector<double4> pos;
    for (Vec3 p : centeredPositions)
        pos.push_back(make_double4(p[0], p[1], p[2], 0));
-        referencePos.upload(pos);
-    }
-    else {
-        vector<float4> pos;
-        for (Vec3 p : centeredPositions)
-            pos.push_back(make_float4(p[0], p[1], p[2], 0));
-        referencePos.upload(pos);
-    }
+    referencePos.upload(pos, true);

    // Record the sum of the norms of the reference positions.

@@ -7027,20 +6976,11 @@ void CudaIntegrateLangevinStepKernel::execute(ContextImpl& context, const Langev
        double vscale = exp(-stepSize*friction);
        double fscale = (friction == 0 ? stepSize : (1-vscale)/friction);
        double noisescale = sqrt(kT*(1-vscale*vscale));
-        if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
        vector<double> p(params.getSize());
        p[0] = vscale;
        p[1] = fscale;
        p[2] = noisescale;
-            params.upload(p);
-        }
-        else {
-            vector<float> p(params.getSize());
-            p[0] = (float) vscale;
-            p[1] = (float) fscale;
-            p[2] = (float) noisescale;
-            params.upload(p);
-        }
+        params.upload(p, true);
        prevTemp = temperature;
        prevFriction = friction;
        prevStepSize = stepSize;
@@ -7539,22 +7479,20 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
        
        // Allocate space for storing global values, both on the host and the device.
        
-        globalValuesFloat.resize(expressionSet.getNumVariables());
-        globalValuesDouble.resize(expressionSet.getNumVariables());
+        localGlobalValues.resize(expressionSet.getNumVariables());
        int elementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
        globalValues.initialize(cu, expressionSet.getNumVariables(), elementSize, "globalValues");
        for (int i = 0; i < integrator.getNumGlobalVariables(); i++) {
-            globalValuesDouble[globalVariableIndex[i]] = initialGlobalVariables[i];
+            localGlobalValues[globalVariableIndex[i]] = initialGlobalVariables[i];
            expressionSet.setVariable(globalVariableIndex[i], initialGlobalVariables[i]);
        }
        for (int i = 0; i < (int) parameterVariableIndex.size(); i++) {
            double value = context.getParameter(parameterNames[i]);
-            globalValuesDouble[parameterVariableIndex[i]] = value;
+            localGlobalValues[parameterVariableIndex[i]] = value;
            expressionSet.setVariable(parameterVariableIndex[i], value);
        }
        int numContextParams = context.getParameters().size();
-        localPerDofEnergyParamDerivsFloat.resize(numContextParams);
-        localPerDofEnergyParamDerivsDouble.resize(numContextParams);
+        localPerDofEnergyParamDerivs.resize(numContextParams);
        perDofEnergyParamDerivs.initialize(cu, max(1, numContextParams), elementSize, "perDofEnergyParamDerivs");
        
        // Record information about the targets of steps that will be stored in global variables.
@@ -7829,8 +7767,8 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
    recordGlobalValue(stepSize, GlobalTarget(DT, dtVariableIndex), integrator);
    for (int i = 0; i < (int) parameterNames.size(); i++) {
        double value = context.getParameter(parameterNames[i]);
-        if (value != globalValuesDouble[parameterVariableIndex[i]]) {
-            globalValuesDouble[parameterVariableIndex[i]] = value;
+        if (value != localGlobalValues[parameterVariableIndex[i]]) {
+            localGlobalValues[parameterVariableIndex[i]] = value;
            deviceGlobalsAreCurrent = false;
        }
    }
@@ -7927,16 +7865,9 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
                if (needsEnergyParamDerivs) {
                    context.getEnergyParameterDerivatives(energyParamDerivs);
                    if (perDofEnergyParamDerivNames.size() > 0) {
-                        if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
-                            for (int i = 0; i < perDofEnergyParamDerivNames.size(); i++)
-                                localPerDofEnergyParamDerivsDouble[i] = energyParamDerivs[perDofEnergyParamDerivNames[i]];
-                            perDofEnergyParamDerivs.upload(localPerDofEnergyParamDerivsDouble);
-                        }
-                        else {
                        for (int i = 0; i < perDofEnergyParamDerivNames.size(); i++)
-                                localPerDofEnergyParamDerivsFloat[i] = (float) energyParamDerivs[perDofEnergyParamDerivNames[i]];
-                            perDofEnergyParamDerivs.upload(localPerDofEnergyParamDerivsFloat);
-                        }
+                            localPerDofEnergyParamDerivs[i] = energyParamDerivs[perDofEnergyParamDerivNames[i]];
+                        perDofEnergyParamDerivs.upload(localPerDofEnergyParamDerivs, true);
                    }
                }
            }
@@ -7949,13 +7880,7 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
        if (needsGlobals[step] && !deviceGlobalsAreCurrent) {
            // Upload the global values to the device.
            
-            if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision())
-                globalValues.upload(globalValuesDouble);
-            else {
-                for (int j = 0; j < (int) globalValuesDouble.size(); j++)
-                    globalValuesFloat[j] = (float) globalValuesDouble[j];
-                globalValues.upload(globalValuesFloat);
-            }
+            globalValues.upload(localGlobalValues, true);
        }
        bool stepInvalidatesForces = invalidatesForces[step];
        if (stepType[step] == CustomIntegrator::ComputePerDof && !merged[step]) {
@@ -8102,17 +8027,17 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
 void CudaIntegrateCustomStepKernel::recordGlobalValue(double value, GlobalTarget target, CustomIntegrator& integrator) {
    switch (target.type) {
        case DT:
-            if (value != globalValuesDouble[dtVariableIndex])
+            if (value != localGlobalValues[dtVariableIndex])
                deviceGlobalsAreCurrent = false;
            expressionSet.setVariable(dtVariableIndex, value);
-            globalValuesDouble[dtVariableIndex] = value;
+            localGlobalValues[dtVariableIndex] = value;
            cu.getIntegrationUtilities().setNextStepSize(value);
            integrator.setStepSize(value);
            break;
        case VARIABLE:
        case PARAMETER:
            expressionSet.setVariable(target.variableIndex, value);
-            globalValuesDouble[target.variableIndex] = value;
+            localGlobalValues[target.variableIndex] = value;
            deviceGlobalsAreCurrent = false;
            break;
    }
@@ -8123,8 +8048,8 @@ void CudaIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& context
        return;
    for (int i = 0; i < (int) parameterNames.size(); i++) {
        double value = context.getParameter(parameterNames[i]);
-        if (value != globalValuesDouble[parameterVariableIndex[i]])
-            context.setParameter(parameterNames[i], globalValuesDouble[parameterVariableIndex[i]]);
+        if (value != localGlobalValues[parameterVariableIndex[i]])
+            context.setParameter(parameterNames[i], localGlobalValues[parameterVariableIndex[i]]);
    }
 }

@@ -8137,7 +8062,7 @@ void CudaIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vec
    }
    values.resize(numGlobalVariables);
    for (int i = 0; i < numGlobalVariables; i++)
-        values[i] = globalValuesDouble[globalVariableIndex[i]];
+        values[i] = localGlobalValues[globalVariableIndex[i]];
 }

 void CudaIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) {
@@ -8150,7 +8075,7 @@ void CudaIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, con
        return;
    }
    for (int i = 0; i < numGlobalVariables; i++) {
-        globalValuesDouble[globalVariableIndex[i]] = values[i];
+        localGlobalValues[globalVariableIndex[i]] = values[i];
        expressionSet.setVariable(globalVariableIndex[i], values[i]);
    }
    deviceGlobalsAreCurrent = false;

--- a/platforms/opencl/include/OpenCLArray.h
+++ b/platforms/opencl/include/OpenCLArray.h
@@ -182,7 +182,27 @@ public:
     * Copy the values in a vector to the Buffer.
     */
    template <class T>
-    void upload(const std::vector<T>& data, bool blocking = true) {
+    void upload(const std::vector<T>& data, bool blocking = true, bool convert = false) {
+        if (convert && data.size() == size && sizeof(T) != elementSize) {
+            if (sizeof(T) == 2*elementSize) {
+                // Convert values from double to single precision.
+                const double* d = reinterpret_cast<const double*>(&data[0]);
+                std::vector<float> v(elementSize*size/sizeof(float));
+                for (int i = 0; i < v.size(); i++)
+                    v[i] = (float) d[i];
+                upload(&v[0], blocking);
+                return;
+            }
+            if (2*sizeof(T) == elementSize) {
+                // Convert values from single to double precision.
+                const float* d = reinterpret_cast<const float*>(&data[0]);
+                std::vector<double> v(elementSize*size/sizeof(double));
+                for (int i = 0; i < v.size(); i++)
+                    v[i] = (double) d[i];
+                upload(&v[0], blocking);
+                return;
+            }
+        }
        if (sizeof(T) != elementSize || data.size() != size)
            throw OpenMMException("Error uploading array "+name+": The specified vector does not match the size of the array");
        upload(&data[0], blocking);

--- a/platforms/opencl/include/OpenCLKernels.h
+++ b/platforms/opencl/include/OpenCLKernels.h
@@ -1591,10 +1591,8 @@ private:
    mutable std::vector<std::vector<mm_double4> > localPerDofValuesDouble;
    std::map<std::string, double> energyParamDerivs;
    std::vector<std::string> perDofEnergyParamDerivNames;
-    std::vector<cl_float> localPerDofEnergyParamDerivsFloat;
-    std::vector<cl_double> localPerDofEnergyParamDerivsDouble;
-    std::vector<float> globalValuesFloat;
-    std::vector<double> globalValuesDouble;
+    std::vector<cl_double> localPerDofEnergyParamDerivs;
+    std::vector<double> localGlobalValues;
    std::vector<double> initialGlobalVariables;
    std::vector<std::vector<cl::Kernel> > kernels;
    cl::Kernel randomKernel, kineticEnergyKernel, sumKineticEnergyKernel;

--- a/platforms/opencl/src/OpenCLContext.cpp
+++ b/platforms/opencl/src/OpenCLContext.cpp
@@ -769,18 +769,10 @@ double OpenCLContext::reduceEnergy() {
 void OpenCLContext::setCharges(const vector<double>& charges) {
    if (!chargeBuffer.isInitialized())
        chargeBuffer.initialize(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
-    if (getUseDoublePrecision()) {
    vector<double> c(numAtoms);
    for (int i = 0; i < numAtoms; i++)
        c[i] = charges[i];
-        chargeBuffer.upload(c);
-    }
-    else {
-        vector<float> c(numAtoms);
-        for (int i = 0; i < numAtoms; i++)
-            c[i] = (float) charges[i];
-        chargeBuffer.upload(c);
-    }
+    chargeBuffer.upload(c, true, true);
    setChargesKernel.setArg<cl::Buffer>(0, chargeBuffer.getDeviceBuffer());
    setChargesKernel.setArg<cl::Buffer>(1, posq.getDeviceBuffer());
    setChargesKernel.setArg<cl::Buffer>(2, atomIndexDevice.getDeviceBuffer());
@@ -802,32 +794,33 @@ public:
    VirtualSiteInfo(const System& system) : OpenCLForceInfo(0) {
        for (int i = 0; i < system.getNumParticles(); i++) {
            if (system.isVirtualSite(i)) {
-                siteTypes.push_back(&typeid(system.getVirtualSite(i)));
+                const VirtualSite& vsite = system.getVirtualSite(i);
+                siteTypes.push_back(&typeid(vsite));
                vector<int> particles;
                particles.push_back(i);
-                for (int j = 0; j < system.getVirtualSite(i).getNumParticles(); j++)
-                    particles.push_back(system.getVirtualSite(i).getParticle(j));
+                for (int j = 0; j < vsite.getNumParticles(); j++)
+                    particles.push_back(vsite.getParticle(j));
                siteParticles.push_back(particles);
                vector<double> weights;
-                if (dynamic_cast<const TwoParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
+                if (dynamic_cast<const TwoParticleAverageSite*>(&vsite) != NULL) {
                    // A two particle average.

-                    const TwoParticleAverageSite& site = dynamic_cast<const TwoParticleAverageSite&>(system.getVirtualSite(i));
+                    const TwoParticleAverageSite& site = dynamic_cast<const TwoParticleAverageSite&>(vsite);
                    weights.push_back(site.getWeight(0));
                    weights.push_back(site.getWeight(1));
                }
-                else if (dynamic_cast<const ThreeParticleAverageSite*>(&system.getVirtualSite(i)) != NULL) {
+                else if (dynamic_cast<const ThreeParticleAverageSite*>(&vsite) != NULL) {
                    // A three particle average.

-                    const ThreeParticleAverageSite& site = dynamic_cast<const ThreeParticleAverageSite&>(system.getVirtualSite(i));
+                    const ThreeParticleAverageSite& site = dynamic_cast<const ThreeParticleAverageSite&>(vsite);
                    weights.push_back(site.getWeight(0));
                    weights.push_back(site.getWeight(1));
                    weights.push_back(site.getWeight(2));
                }
-                else if (dynamic_cast<const OutOfPlaneSite*>(&system.getVirtualSite(i)) != NULL) {
+                else if (dynamic_cast<const OutOfPlaneSite*>(&vsite) != NULL) {
                    // An out of plane site.

-                    const OutOfPlaneSite& site = dynamic_cast<const OutOfPlaneSite&>(system.getVirtualSite(i));
+                    const OutOfPlaneSite& site = dynamic_cast<const OutOfPlaneSite&>(vsite);
                    weights.push_back(site.getWeight12());
                    weights.push_back(site.getWeight13());
                    weights.push_back(site.getWeightCross());

--- a/platforms/opencl/src/OpenCLIntegrationUtilities.cpp
+++ b/platforms/opencl/src/OpenCLIntegrationUtilities.cpp
@@ -395,12 +395,12 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
        vector<cl_int> atomConstraintsVec(ccmaAtomConstraints.getSize());
        vector<cl_int> numAtomConstraintsVec(ccmaNumAtomConstraints.getSize());
        vector<cl_int> constraintMatrixColumnVec(ccmaConstraintMatrixColumn.getSize());
-        if (context.getUseDoublePrecision() || context.getUseMixedPrecision()) {
-            ccmaDistance.initialize<mm_double4>(context, numCCMA, "CcmaDistance");
-            ccmaDelta1.initialize<cl_double>(context, numCCMA, "CcmaDelta1");
-            ccmaDelta2.initialize<cl_double>(context, numCCMA, "CcmaDelta2");
-            ccmaReducedMass.initialize<cl_double>(context, numCCMA, "CcmaReducedMass");
-            ccmaConstraintMatrixValue.initialize<cl_double>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
+        int elementSize = (context.getUseDoublePrecision() || context.getUseMixedPrecision() ? sizeof(cl_double) : sizeof(cl_float));
+        ccmaDistance.initialize(context, numCCMA, 4*elementSize, "CcmaDistance");
+        ccmaDelta1.initialize(context, numCCMA, elementSize, "CcmaDelta1");
+        ccmaDelta2.initialize(context, numCCMA, elementSize, "CcmaDelta2");
+        ccmaReducedMass.initialize(context, numCCMA, elementSize, "CcmaReducedMass");
+        ccmaConstraintMatrixValue.initialize(context, numCCMA*maxRowElements, elementSize, "ConstraintMatrixValue");
        vector<mm_double4> distanceVec(ccmaDistance.getSize());
        vector<cl_double> reducedMassVec(ccmaReducedMass.getSize());
        vector<cl_double> constraintMatrixValueVec(ccmaConstraintMatrixValue.getSize());
@@ -424,43 +424,9 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
                atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1);
            }
        }
-            ccmaDistance.upload(distanceVec);
-            ccmaReducedMass.upload(reducedMassVec);
-            ccmaConstraintMatrixValue.upload(constraintMatrixValueVec);
-        }
-        else {
-            ccmaDistance.initialize<mm_float4>(context, numCCMA, "CcmaDistance");
-            ccmaDelta1.initialize<cl_float>(context, numCCMA, "CcmaDelta1");
-            ccmaDelta2.initialize<cl_float>(context, numCCMA, "CcmaDelta2");
-            ccmaReducedMass.initialize<cl_float>(context, numCCMA, "CcmaReducedMass");
-            ccmaConstraintMatrixValue.initialize<cl_float>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
-            vector<mm_float4> distanceVec(ccmaDistance.getSize());
-            vector<cl_float> reducedMassVec(ccmaReducedMass.getSize());
-            vector<cl_float> constraintMatrixValueVec(ccmaConstraintMatrixValue.getSize());
-            for (int i = 0; i < numCCMA; i++) {
-                int index = constraintOrder[i];
-                int c = ccmaConstraints[index];
-                atomsVec[i].x = atom1[c];
-                atomsVec[i].y = atom2[c];
-                distanceVec[i].w = (float) distance[c];
-                reducedMassVec[i] = (float) (0.5/(1.0/system.getParticleMass(atom1[c])+1.0/system.getParticleMass(atom2[c])));
-                for (unsigned int j = 0; j < matrix[index].size(); j++) {
-                    constraintMatrixColumnVec[i+j*numCCMA] = matrix[index][j].first;
-                    constraintMatrixValueVec[i+j*numCCMA] = (float) matrix[index][j].second;
-                }
-                constraintMatrixColumnVec[i+matrix[index].size()*numCCMA] = numCCMA;
-            }
-            for (unsigned int i = 0; i < atomConstraints.size(); i++) {
-                numAtomConstraintsVec[i] = atomConstraints[i].size();
-                for (unsigned int j = 0; j < atomConstraints[i].size(); j++) {
-                    bool forward = (atom1[ccmaConstraints[atomConstraints[i][j]]] == i);
-                    atomConstraintsVec[i+j*numAtoms] = (forward ? inverseOrder[atomConstraints[i][j]]+1 : -inverseOrder[atomConstraints[i][j]]-1);
-                }
-            }
-            ccmaDistance.upload(distanceVec);
-            ccmaReducedMass.upload(reducedMassVec);
-            ccmaConstraintMatrixValue.upload(constraintMatrixValueVec);
-        }
+        ccmaDistance.upload(distanceVec, true, true);
+        ccmaReducedMass.upload(reducedMassVec, true, true);
+        ccmaConstraintMatrixValue.upload(constraintMatrixValueVec, true, true);
        ccmaAtoms.upload(atomsVec);
        ccmaAtomConstraints.upload(atomConstraintsVec);
        ccmaNumAtomConstraints.upload(numAtomConstraintsVec);
@@ -563,57 +529,21 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
        vsiteLocalCoordsAtoms.upload(vsiteLocalCoordsAtomVec);
        vsiteLocalCoordsStartIndex.upload(vsiteLocalCoordsStartVec);
    }
-    if (context.getUseDoublePrecision()) {
-        vsite2AvgWeights.initialize<mm_double2>(context, max(1, num2Avg), "vsite2AvgWeights");
-        vsite3AvgWeights.initialize<mm_double4>(context, max(1, num3Avg), "vsite3AvgWeights");
-        vsiteOutOfPlaneWeights.initialize<mm_double4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
-        vsiteLocalCoordsWeights.initialize<cl_double>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights");
-        vsiteLocalCoordsPos.initialize<mm_double4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos");
+    int elementSize = (context.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
+    vsite2AvgWeights.initialize(context, max(1, num2Avg), 2*elementSize, "vsite2AvgWeights");
+    vsite3AvgWeights.initialize(context, max(1, num3Avg), 4*elementSize, "vsite3AvgWeights");
+    vsiteOutOfPlaneWeights.initialize(context, max(1, numOutOfPlane), 4*elementSize, "vsiteOutOfPlaneWeights");
+    vsiteLocalCoordsWeights.initialize(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), elementSize, "vsiteLocalCoordsWeights");
+    vsiteLocalCoordsPos.initialize(context, max(1, (int) vsiteLocalCoordsPosVec.size()), 4*elementSize, "vsiteLocalCoordsPos");
    if (num2Avg > 0)
-            vsite2AvgWeights.upload(vsite2AvgWeightVec);
+        vsite2AvgWeights.upload(vsite2AvgWeightVec, true, true);
    if (num3Avg > 0)
-            vsite3AvgWeights.upload(vsite3AvgWeightVec);
+        vsite3AvgWeights.upload(vsite3AvgWeightVec, true, true);
    if (numOutOfPlane > 0)
-            vsiteOutOfPlaneWeights.upload(vsiteOutOfPlaneWeightVec);
+        vsiteOutOfPlaneWeights.upload(vsiteOutOfPlaneWeightVec, true, true);
    if (numLocalCoords > 0) {
-            vsiteLocalCoordsWeights.upload(vsiteLocalCoordsWeightVec);
-            vsiteLocalCoordsPos.upload(vsiteLocalCoordsPosVec);
-        }
-    }
-    else {
-        vsite2AvgWeights.initialize<mm_float2>(context, max(1, num2Avg), "vsite2AvgWeights");
-        vsite3AvgWeights.initialize<mm_float4>(context, max(1, num3Avg), "vsite3AvgWeights");
-        vsiteOutOfPlaneWeights.initialize<mm_float4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
-        vsiteLocalCoordsWeights.initialize<cl_float>(context, max(1, (int) vsiteLocalCoordsWeightVec.size()), "vsiteLocalCoordsWeights");
-        vsiteLocalCoordsPos.initialize<mm_float4>(context, max(1, (int) vsiteLocalCoordsPosVec.size()), "vsiteLocalCoordsPos");
-        if (num2Avg > 0) {
-            vector<mm_float2> floatWeights(num2Avg);
-            for (int i = 0; i < num2Avg; i++)
-                floatWeights[i] = mm_float2((float) vsite2AvgWeightVec[i].x, (float) vsite2AvgWeightVec[i].y);
-            vsite2AvgWeights.upload(floatWeights);
-        }
-        if (num3Avg > 0) {
-            vector<mm_float4> floatWeights(num3Avg);
-            for (int i = 0; i < num3Avg; i++)
-                floatWeights[i] = mm_float4((float) vsite3AvgWeightVec[i].x, (float) vsite3AvgWeightVec[i].y, (float) vsite3AvgWeightVec[i].z, 0.0f);
-            vsite3AvgWeights.upload(floatWeights);
-        }
-        if (numOutOfPlane > 0) {
-            vector<mm_float4> floatWeights(numOutOfPlane);
-            for (int i = 0; i < numOutOfPlane; i++)
-                floatWeights[i] = mm_float4((float) vsiteOutOfPlaneWeightVec[i].x, (float) vsiteOutOfPlaneWeightVec[i].y, (float) vsiteOutOfPlaneWeightVec[i].z, 0.0f);
-            vsiteOutOfPlaneWeights.upload(floatWeights);
-        }
-        if (numLocalCoords > 0) {
-            vector<cl_float> floatWeights(vsiteLocalCoordsWeightVec.size());
-            for (int i = 0; i < (int) vsiteLocalCoordsWeightVec.size(); i++)
-                floatWeights[i] = (cl_float) vsiteLocalCoordsWeightVec[i];
-            vsiteLocalCoordsWeights.upload(floatWeights);
-            vector<mm_float4> floatPos(vsiteLocalCoordsPosVec.size());
-            for (int i = 0; i < (int) vsiteLocalCoordsPosVec.size(); i++)
-                floatPos[i] = mm_float4((float) vsiteLocalCoordsPosVec[i].x, (float) vsiteLocalCoordsPosVec[i].y, (float) vsiteLocalCoordsPosVec[i].z, 0.0f);
-            vsiteLocalCoordsPos.upload(floatPos);
-        }
+        vsiteLocalCoordsWeights.upload(vsiteLocalCoordsWeightVec, true, true);
+        vsiteLocalCoordsPos.upload(vsiteLocalCoordsPosVec, true, true);
    }
    
    // If multiple virtual sites depend on the same particle, make sure the force distribution