Continuing to convert CudaArrays.

f15f591b · Peter Eastman · b8c86406 · f15f591b · f15f591b · f15f591b
Commit f15f591b authored Feb 14, 2018 by Peter Eastman
3 changed files
--- a/platforms/cuda/include/CudaKernels.h
+++ b/platforms/cuda/include/CudaKernels.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2017 Stanford University and the Authors.      *
+ * Portions copyright (c) 2008-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -265,9 +265,8 @@ private:
 class CudaCalcHarmonicBondForceKernel : public CalcHarmonicBondForceKernel {
 public:
    CudaCalcHarmonicBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcHarmonicBondForceKernel(name, platform),
-            hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
+            hasInitializedKernel(false), cu(cu), system(system) {
    }
-    ~CudaCalcHarmonicBondForceKernel();
    /**
     * Initialize the kernel.
     *
@@ -298,7 +297,7 @@ private:
    CudaContext& cu;
    ForceInfo* info;
    const System& system;
-    CudaArray* params;
+    CudaArray params;
 };

 /**
@@ -307,7 +306,7 @@ private:
 class CudaCalcCustomBondForceKernel : public CalcCustomBondForceKernel {
 public:
    CudaCalcCustomBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomBondForceKernel(name, platform),
-            hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) {
+            hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
    }
    ~CudaCalcCustomBondForceKernel();
    /**
@@ -341,7 +340,7 @@ private:
    ForceInfo* info;
    const System& system;
    CudaParameterSet* params;
-    CudaArray* globals;
+    CudaArray globals;
    std::vector<std::string> globalParamNames;
    std::vector<float> globalParamValues;
 };
@@ -352,9 +351,8 @@ private:
 class CudaCalcHarmonicAngleForceKernel : public CalcHarmonicAngleForceKernel {
 public:
    CudaCalcHarmonicAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcHarmonicAngleForceKernel(name, platform),
-            hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
+            hasInitializedKernel(false), cu(cu), system(system) {
    }
-    ~CudaCalcHarmonicAngleForceKernel();
    /**
     * Initialize the kernel.
     *
@@ -385,7 +383,7 @@ private:
    CudaContext& cu;
    ForceInfo* info;
    const System& system;
-    CudaArray* params;
+    CudaArray params;
 };

 /**
@@ -394,7 +392,7 @@ private:
 class CudaCalcCustomAngleForceKernel : public CalcCustomAngleForceKernel {
 public:
    CudaCalcCustomAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomAngleForceKernel(name, platform),
-            hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) {
+            hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
    }
    ~CudaCalcCustomAngleForceKernel();
    /**
@@ -428,7 +426,7 @@ private:
    ForceInfo* info;
    const System& system;
    CudaParameterSet* params;
-    CudaArray* globals;
+    CudaArray globals;
    std::vector<std::string> globalParamNames;
    std::vector<float> globalParamValues;
 };
@@ -439,9 +437,8 @@ private:
 class CudaCalcPeriodicTorsionForceKernel : public CalcPeriodicTorsionForceKernel {
 public:
    CudaCalcPeriodicTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcPeriodicTorsionForceKernel(name, platform),
-            hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
+            hasInitializedKernel(false), cu(cu), system(system) {
    }
-    ~CudaCalcPeriodicTorsionForceKernel();
    /**
     * Initialize the kernel.
     *
@@ -472,7 +469,7 @@ private:
    CudaContext& cu;
    ForceInfo* info;
    const System& system;
-    CudaArray* params;
+    CudaArray params;
 };

 /**
@@ -481,9 +478,8 @@ private:
 class CudaCalcRBTorsionForceKernel : public CalcRBTorsionForceKernel {
 public:
    CudaCalcRBTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcRBTorsionForceKernel(name, platform),
-            hasInitializedKernel(false), cu(cu), system(system), params1(NULL), params2(NULL) {
+            hasInitializedKernel(false), cu(cu), system(system) {
    }
-    ~CudaCalcRBTorsionForceKernel();
    /**
     * Initialize the kernel.
     *
@@ -514,8 +510,8 @@ private:
    CudaContext& cu;
    ForceInfo* info;
    const System& system;
-    CudaArray* params1;
-    CudaArray* params2;
+    CudaArray params1;
+    CudaArray params2;
 };

 /**
@@ -524,9 +520,8 @@ private:
 class CudaCalcCMAPTorsionForceKernel : public CalcCMAPTorsionForceKernel {
 public:
    CudaCalcCMAPTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCMAPTorsionForceKernel(name, platform),
-            hasInitializedKernel(false), cu(cu), system(system), coefficients(NULL), mapPositions(NULL), torsionMaps(NULL) {
+            hasInitializedKernel(false), cu(cu), system(system) {
    }
-    ~CudaCalcCMAPTorsionForceKernel();
    /**
     * Initialize the kernel.
     *
@@ -558,9 +553,9 @@ private:
    ForceInfo* info;
    const System& system;
    std::vector<int2> mapPositionsVec;
-    CudaArray* coefficients;
-    CudaArray* mapPositions;
-    CudaArray* torsionMaps;
+    CudaArray coefficients;
+    CudaArray mapPositions;
+    CudaArray torsionMaps;
 };

 /**
@@ -569,7 +564,7 @@ private:
 class CudaCalcCustomTorsionForceKernel : public CalcCustomTorsionForceKernel {
 public:
    CudaCalcCustomTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomTorsionForceKernel(name, platform),
-            hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) {
+            hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
    }
    ~CudaCalcCustomTorsionForceKernel();
    /**
@@ -603,7 +598,7 @@ private:
    ForceInfo* info;
    const System& system;
    CudaParameterSet* params;
-    CudaArray* globals;
+    CudaArray globals;
    std::vector<std::string> globalParamNames;
    std::vector<float> globalParamValues;
 };
@@ -614,9 +609,7 @@ private:
 class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
 public:
    CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform),
-            cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL),
-            pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeDispersionBsplineModuliX(NULL), pmeDispersionBsplineModuliY(NULL),
-            pmeDispersionBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), dispersionFft(NULL), fft(NULL), pmeio(NULL) {
+            cu(cu), hasInitializedFFT(false), sort(NULL), dispersionFft(NULL), fft(NULL), pmeio(NULL) {
    }
    ~CudaCalcNonbondedForceKernel();
    /**
@@ -682,20 +675,20 @@ private:
    CudaContext& cu;
    ForceInfo* info;
    bool hasInitializedFFT;
-    CudaArray* sigmaEpsilon;
-    CudaArray* exceptionParams;
-    CudaArray* cosSinSums;
-    CudaArray* directPmeGrid;
-    CudaArray* reciprocalPmeGrid;
-    CudaArray* pmeBsplineModuliX;
-    CudaArray* pmeBsplineModuliY;
-    CudaArray* pmeBsplineModuliZ;
-    CudaArray* pmeDispersionBsplineModuliX;
-    CudaArray* pmeDispersionBsplineModuliY;
-    CudaArray* pmeDispersionBsplineModuliZ;
-    CudaArray* pmeAtomRange;
-    CudaArray* pmeAtomGridIndex;
-    CudaArray* pmeEnergyBuffer;
+    CudaArray sigmaEpsilon;
+    CudaArray exceptionParams;
+    CudaArray cosSinSums;
+    CudaArray directPmeGrid;
+    CudaArray reciprocalPmeGrid;
+    CudaArray pmeBsplineModuliX;
+    CudaArray pmeBsplineModuliY;
+    CudaArray pmeBsplineModuliZ;
+    CudaArray pmeDispersionBsplineModuliX;
+    CudaArray pmeDispersionBsplineModuliY;
+    CudaArray pmeDispersionBsplineModuliZ;
+    CudaArray pmeAtomRange;
+    CudaArray pmeAtomGridIndex;
+    CudaArray pmeEnergyBuffer;
    CudaSort* sort;
    Kernel cpuPme;
    PmeIO* pmeio;
@@ -737,7 +730,7 @@ private:
 class CudaCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
 public:
    CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomNonbondedForceKernel(name, platform),
-            cu(cu), params(NULL), globals(NULL), interactionGroupData(NULL), forceCopy(NULL), system(system), hasInitializedKernel(false) {
+            cu(cu), params(NULL), forceCopy(NULL), system(system), hasInitializedKernel(false) {
    }
    ~CudaCalcCustomNonbondedForceKernel();
    /**
@@ -769,13 +762,13 @@ private:
    CudaContext& cu;
    ForceInfo* info;
    CudaParameterSet* params;
-    CudaArray* globals;
-    CudaArray* interactionGroupData;
+    CudaArray globals;
+    CudaArray interactionGroupData;
    CUfunction interactionGroupKernel;
    std::vector<void*> interactionGroupArgs;
    std::vector<std::string> globalParamNames;
    std::vector<float> globalParamValues;
-    std::vector<CudaArray*> tabulatedFunctions;
+    std::vector<CudaArray> tabulatedFunctions;
    double longRangeCoefficient;
    std::vector<double> longRangeCoefficientDerivs;
    bool hasInitializedLongRangeCorrection, hasInitializedKernel, hasParamDerivs;
@@ -790,9 +783,8 @@ private:
 class CudaCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel {
 public:
    CudaCalcGBSAOBCForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcGBSAOBCForceKernel(name, platform), cu(cu),
-            hasCreatedKernels(false), params(NULL), bornSum(NULL), bornRadii(NULL), bornForce(NULL), obcChain(NULL) {
+            hasCreatedKernels(false) {
    }
-    ~CudaCalcGBSAOBCForceKernel();
    /**
     * Initialize the kernel.
     *
@@ -823,11 +815,11 @@ private:
    int maxTiles;
    CudaContext& cu;
    ForceInfo* info;
-    CudaArray* params;
-    CudaArray* bornSum;
-    CudaArray* bornRadii;
-    CudaArray* bornForce;
-    CudaArray* obcChain;
+    CudaArray params;
+    CudaArray bornSum;
+    CudaArray bornRadii;
+    CudaArray bornForce;
+    CudaArray obcChain;
    CUfunction computeBornSumKernel;
    CUfunction reduceBornSumKernel;
    CUfunction force1Kernel;
@@ -841,8 +833,7 @@ private:
 class CudaCalcCustomGBForceKernel : public CalcCustomGBForceKernel {
 public:
    CudaCalcCustomGBForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomGBForceKernel(name, platform),
-            hasInitializedKernels(false), cu(cu), params(NULL), computedValues(NULL), energyDerivs(NULL), energyDerivChain(NULL), longEnergyDerivs(NULL), globals(NULL),
-            valueBuffers(NULL), system(system) {
+            hasInitializedKernels(false), cu(cu), params(NULL), computedValues(NULL), energyDerivs(NULL), energyDerivChain(NULL), system(system) {
    }
    ~CudaCalcCustomGBForceKernel();
    /**
@@ -880,13 +871,13 @@ private:
    CudaParameterSet* energyDerivs;
    CudaParameterSet* energyDerivChain;
    std::vector<CudaParameterSet*> dValuedParam;
-    std::vector<CudaArray*> dValue0dParam;
-    CudaArray* longEnergyDerivs;
-    CudaArray* globals;
-    CudaArray* valueBuffers;
+    std::vector<CudaArray> dValue0dParam;
+    CudaArray longEnergyDerivs;
+    CudaArray globals;
+    CudaArray valueBuffers;
    std::vector<std::string> globalParamNames;
    std::vector<float> globalParamValues;
-    std::vector<CudaArray*> tabulatedFunctions;
+    std::vector<CudaArray> tabulatedFunctions;
    std::vector<bool> pairValueUsesParam, pairEnergyUsesParam, pairEnergyUsesValue;
    const System& system;
    CUfunction pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel;
@@ -901,7 +892,7 @@ private:
 class CudaCalcCustomExternalForceKernel : public CalcCustomExternalForceKernel {
 public:
    CudaCalcCustomExternalForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomExternalForceKernel(name, platform),
-            hasInitializedKernel(false), cu(cu), system(system), params(NULL), globals(NULL) {
+            hasInitializedKernel(false), cu(cu), system(system), params(NULL) {
    }
    ~CudaCalcCustomExternalForceKernel();
    /**
@@ -935,7 +926,7 @@ private:
    ForceInfo* info;
    const System& system;
    CudaParameterSet* params;
-    CudaArray* globals;
+    CudaArray globals;
    std::vector<std::string> globalParamNames;
    std::vector<float> globalParamValues;
 };
@@ -946,8 +937,7 @@ private:
 class CudaCalcCustomHbondForceKernel : public CalcCustomHbondForceKernel {
 public:
    CudaCalcCustomHbondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomHbondForceKernel(name, platform),
-            hasInitializedKernel(false), cu(cu), donorParams(NULL), acceptorParams(NULL), donors(NULL), acceptors(NULL),
-            globals(NULL), donorExclusions(NULL), acceptorExclusions(NULL), system(system) {
+            hasInitializedKernel(false), cu(cu), donorParams(NULL), acceptorParams(NULL), system(system) {
    }
    ~CudaCalcCustomHbondForceKernel();
    /**
@@ -981,14 +971,14 @@ private:
    ForceInfo* info;
    CudaParameterSet* donorParams;
    CudaParameterSet* acceptorParams;
-    CudaArray* globals;
-    CudaArray* donors;
-    CudaArray* acceptors;
-    CudaArray* donorExclusions;
-    CudaArray* acceptorExclusions;
+    CudaArray globals;
+    CudaArray donors;
+    CudaArray acceptors;
+    CudaArray donorExclusions;
+    CudaArray acceptorExclusions;
    std::vector<std::string> globalParamNames;
    std::vector<float> globalParamValues;
-    std::vector<CudaArray*> tabulatedFunctions;
+    std::vector<CudaArray> tabulatedFunctions;
    std::vector<void*> donorArgs, acceptorArgs;
    const System& system;
    CUfunction donorKernel, acceptorKernel;
@@ -1000,7 +990,7 @@ private:
 class CudaCalcCustomCentroidBondForceKernel : public CalcCustomCentroidBondForceKernel {
 public:
    CudaCalcCustomCentroidBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomCentroidBondForceKernel(name, platform),
-            cu(cu), params(NULL), globals(NULL), groupParticles(NULL), groupWeights(NULL), groupOffsets(NULL), groupForces(NULL), bondGroups(NULL), centerPositions(NULL), system(system) {
+            cu(cu), params(NULL), system(system) {
    }
    ~CudaCalcCustomCentroidBondForceKernel();
    /**
@@ -1034,16 +1024,16 @@ private:
    CudaContext& cu;
    ForceInfo* info;
    CudaParameterSet* params;
-    CudaArray* globals;
-    CudaArray* groupParticles;
-    CudaArray* groupWeights;
-    CudaArray* groupOffsets;
-    CudaArray* groupForces;
-    CudaArray* bondGroups;
-    CudaArray* centerPositions;
+    CudaArray globals;
+    CudaArray groupParticles;
+    CudaArray groupWeights;
+    CudaArray groupOffsets;
+    CudaArray groupForces;
+    CudaArray bondGroups;
+    CudaArray centerPositions;
    std::vector<std::string> globalParamNames;
    std::vector<float> globalParamValues;
-    std::vector<CudaArray*> tabulatedFunctions;
+    std::vector<CudaArray> tabulatedFunctions;
    std::vector<void*> groupForcesArgs;
    CUfunction computeCentersKernel, groupForcesKernel, applyForcesKernel;
    const System& system;
@@ -1055,7 +1045,7 @@ private:
 class CudaCalcCustomCompoundBondForceKernel : public CalcCustomCompoundBondForceKernel {
 public:
    CudaCalcCustomCompoundBondForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomCompoundBondForceKernel(name, platform),
-            cu(cu), params(NULL), globals(NULL), system(system) {
+            cu(cu), params(NULL), system(system) {
    }
    ~CudaCalcCustomCompoundBondForceKernel();
    /**
@@ -1088,10 +1078,10 @@ private:
    CudaContext& cu;
    ForceInfo* info;
    CudaParameterSet* params;
-    CudaArray* globals;
+    CudaArray globals;
    std::vector<std::string> globalParamNames;
    std::vector<float> globalParamValues;
-    std::vector<CudaArray*> tabulatedFunctions;
+    std::vector<CudaArray> tabulatedFunctions;
    const System& system;
 };

@@ -1101,9 +1091,7 @@ private:
 class CudaCalcCustomManyParticleForceKernel : public CalcCustomManyParticleForceKernel {
 public:
    CudaCalcCustomManyParticleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomManyParticleForceKernel(name, platform),
-            hasInitializedKernel(false), cu(cu), params(NULL), particleTypes(NULL), orderIndex(NULL), particleOrder(NULL), exclusions(NULL),
-            exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighborPairs(NULL), numNeighborPairs(NULL), neighborStartIndex(NULL),
-            numNeighborsForAtom(NULL), neighbors(NULL), system(system) {
+            hasInitializedKernel(false), cu(cu), params(NULL), system(system) {
    }
    ~CudaCalcCustomManyParticleForceKernel();
    /**
@@ -1138,21 +1126,21 @@ private:
    NonbondedMethod nonbondedMethod;
    int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize;
    CudaParameterSet* params;
-    CudaArray* particleTypes;
-    CudaArray* orderIndex;
-    CudaArray* particleOrder;
-    CudaArray* exclusions;
-    CudaArray* exclusionStartIndex;
-    CudaArray* blockCenter;
-    CudaArray* blockBoundingBox;
-    CudaArray* neighborPairs;
-    CudaArray* numNeighborPairs;
-    CudaArray* neighborStartIndex;
-    CudaArray* numNeighborsForAtom;
-    CudaArray* neighbors;
+    CudaArray particleTypes;
+    CudaArray orderIndex;
+    CudaArray particleOrder;
+    CudaArray exclusions;
+    CudaArray exclusionStartIndex;
+    CudaArray blockCenter;
+    CudaArray blockBoundingBox;
+    CudaArray neighborPairs;
+    CudaArray numNeighborPairs;
+    CudaArray neighborStartIndex;
+    CudaArray numNeighborsForAtom;
+    CudaArray neighbors;
    std::vector<std::string> globalParamNames;
    std::vector<float> globalParamValues;
-    std::vector<CudaArray*> tabulatedFunctions;
+    std::vector<CudaArray> tabulatedFunctions;
    std::vector<void*> forceArgs, blockBoundsArgs, neighborsArgs, startIndicesArgs, copyPairsArgs;
    const System& system;
    CUfunction forceKernel, blockBoundsKernel, neighborsKernel, startIndicesKernel, copyPairsKernel;

--- a/platforms/cuda/src/CudaBondedUtilities.cpp
+++ b/platforms/cuda/src/CudaBondedUtilities.cpp
@@ -84,8 +84,10 @@ void CudaBondedUtilities::initialize(const System& system) {
    for (int i = 0; i < numForces; i++) {
        int numBonds = forceAtoms[i].size();
        int numAtoms = forceAtoms[i][0].size();
+        int numArrays = (numAtoms+3)/4;
        int startAtom = 0;
-        while (startAtom < numAtoms) {
+        atomIndices[i].resize(numArrays);
+        for (int j = 0; j < numArrays; j++) {
            int width = min(numAtoms-startAtom, 4);
            int paddedWidth = (width == 3 ? 4 : width);
            vector<unsigned int> indexVec(paddedWidth*numBonds);
@@ -93,9 +95,8 @@ void CudaBondedUtilities::initialize(const System& system) {
                for (int atom = 0; atom < width; atom++)
                    indexVec[bond*paddedWidth+atom] = forceAtoms[i][bond][startAtom+atom];
            }
-            atomIndices[i].push_back(CudaArray());
-            atomIndices[i].back().initialize(context, numBonds, 4*paddedWidth, "bondedIndices");
-            atomIndices[i].back().upload(&indexVec[0]);
+            atomIndices[i][j].initialize(context, numBonds, 4*paddedWidth, "bondedIndices");
+            atomIndices[i][j].upload(&indexVec[0]);
            startAtom += width;
        }
    }

--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2017 Stanford University and the Authors.      *
+ * Portions copyright (c) 2008-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -505,12 +505,6 @@ private:
    const HarmonicBondForce& force;
 };

-CudaCalcHarmonicBondForceKernel::~CudaCalcHarmonicBondForceKernel() {
-    cu.setAsCurrent();
-    if (params != NULL)
-        delete params;
-}
-
 void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const HarmonicBondForce& force) {
    cu.setAsCurrent();
    int numContexts = cu.getPlatformData().contexts.size();
@@ -520,18 +514,18 @@ void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const Har
    if (numBonds == 0)
        return;
    vector<vector<int> > atoms(numBonds, vector<int>(2));
-    params = CudaArray::create<float2>(cu, numBonds, "bondParams");
+    params.initialize<float2>(cu, numBonds, "bondParams");
    vector<float2> paramVector(numBonds);
    for (int i = 0; i < numBonds; i++) {
        double length, k;
        force.getBondParameters(startIndex+i, atoms[i][0], atoms[i][1], length, k);
        paramVector[i] = make_float2((float) length, (float) k);
    }
-    params->upload(paramVector);
+    params.upload(paramVector);
    map<string, string> replacements;
    replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
    replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicBondForce;
-    replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2");
+    replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params.getDevicePointer(), "float2");
    cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup());
    info = new ForceInfo(force);
    cu.addForce(info);
@@ -560,7 +554,7 @@ void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& conte
        force.getBondParameters(startIndex+i, atom1, atom2, length, k);
        paramVector[i] = make_float2((float) length, (float) k);
    }
-    params->upload(paramVector);
+    params.upload(paramVector);
    
    // Mark that the current reordering may be invalid.
    
@@ -600,8 +594,6 @@ CudaCalcCustomBondForceKernel::~CudaCalcCustomBondForceKernel() {
    cu.setAsCurrent();
    if (params != NULL)
        delete params;
-    if (globals != NULL)
-        delete globals;
 }

 void CudaCalcCustomBondForceKernel::initialize(const System& system, const CustomBondForce& force) {
@@ -649,9 +641,9 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
        variables[name] = "bondParams"+params->getParameterSuffix(i);
    }
    if (force.getNumGlobalParameters() > 0) {
-        globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customBondGlobals");
-        globals->upload(globalParamValues);
-        string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
+        globals.initialize<float>(cu, force.getNumGlobalParameters(), "customBondGlobals");
+        globals.upload(globalParamValues);
+        string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
        for (int i = 0; i < force.getNumGlobalParameters(); i++) {
            const string& name = force.getGlobalParameterName(i);
            string value = argName+"["+cu.intToString(i)+"]";
@@ -680,7 +672,7 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
 }

 double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    if (globals != NULL) {
+    if (globals.isInitialized()) {
        bool changed = false;
        for (int i = 0; i < (int) globalParamNames.size(); i++) {
            float value = (float) context.getParameter(globalParamNames[i]);
@@ -689,7 +681,7 @@ double CudaCalcCustomBondForceKernel::execute(ContextImpl& context, bool include
            globalParamValues[i] = value;
        }
        if (changed)
-            globals->upload(globalParamValues);
+            globals.upload(globalParamValues);
    }
    return 0.0;
 }
@@ -749,12 +741,6 @@ private:
    const HarmonicAngleForce& force;
 };

-CudaCalcHarmonicAngleForceKernel::~CudaCalcHarmonicAngleForceKernel() {
-    cu.setAsCurrent();
-    if (params != NULL)
-        delete params;
-}
-
 void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const HarmonicAngleForce& force) {
    cu.setAsCurrent();
    int numContexts = cu.getPlatformData().contexts.size();
@@ -764,7 +750,7 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
    if (numAngles == 0)
        return;
    vector<vector<int> > atoms(numAngles, vector<int>(3));
-    params = CudaArray::create<float2>(cu, numAngles, "angleParams");
+    params.initialize<float2>(cu, numAngles, "angleParams");
    vector<float2> paramVector(numAngles);
    for (int i = 0; i < numAngles; i++) {
        double angle, k;
@@ -772,11 +758,11 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
        paramVector[i] = make_float2((float) angle, (float) k);

    }
-    params->upload(paramVector);
+    params.upload(paramVector);
    map<string, string> replacements;
    replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
    replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicAngleForce;
-    replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2");
+    replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params.getDevicePointer(), "float2");
    cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup());
    info = new ForceInfo(force);
    cu.addForce(info);
@@ -805,7 +791,7 @@ void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& cont
        force.getAngleParameters(startIndex+i, atom1, atom2, atom3, angle, k);
        paramVector[i] = make_float2((float) angle, (float) k);
    }
-    params->upload(paramVector);
+    params.upload(paramVector);
    
    // Mark that the current reordering may be invalid.
    
@@ -846,8 +832,6 @@ CudaCalcCustomAngleForceKernel::~CudaCalcCustomAngleForceKernel() {
    cu.setAsCurrent();
    if (params != NULL)
        delete params;
-    if (globals != NULL)
-        delete globals;
 }

 void CudaCalcCustomAngleForceKernel::initialize(const System& system, const CustomAngleForce& force) {
@@ -895,9 +879,9 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
        variables[name] = "angleParams"+params->getParameterSuffix(i);
    }
    if (force.getNumGlobalParameters() > 0) {
-        globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customAngleGlobals");
-        globals->upload(globalParamValues);
-        string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
+        globals.initialize<float>(cu, force.getNumGlobalParameters(), "customAngleGlobals");
+        globals.upload(globalParamValues);
+        string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
        for (int i = 0; i < force.getNumGlobalParameters(); i++) {
            const string& name = force.getGlobalParameterName(i);
            string value = argName+"["+cu.intToString(i)+"]";
@@ -926,7 +910,7 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
 }

 double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    if (globals != NULL) {
+    if (globals.isInitialized()) {
        bool changed = false;
        for (int i = 0; i < (int) globalParamNames.size(); i++) {
            float value = (float) context.getParameter(globalParamNames[i]);
@@ -935,7 +919,7 @@ double CudaCalcCustomAngleForceKernel::execute(ContextImpl& context, bool includ
            globalParamValues[i] = value;
        }
        if (changed)
-            globals->upload(globalParamValues);
+            globals.upload(globalParamValues);
    }
    return 0.0;
 }
@@ -996,12 +980,6 @@ private:
    const PeriodicTorsionForce& force;
 };

-CudaCalcPeriodicTorsionForceKernel::~CudaCalcPeriodicTorsionForceKernel() {
-    cu.setAsCurrent();
-    if (params != NULL)
-        delete params;
-}
-
 void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const PeriodicTorsionForce& force) {
    cu.setAsCurrent();
    int numContexts = cu.getPlatformData().contexts.size();
@@ -1011,7 +989,7 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
    if (numTorsions == 0)
        return;
    vector<vector<int> > atoms(numTorsions, vector<int>(4));
-    params = CudaArray::create<float4>(cu, numTorsions, "periodicTorsionParams");
+    params.initialize<float4>(cu, numTorsions, "periodicTorsionParams");
    vector<float4> paramVector(numTorsions);
    for (int i = 0; i < numTorsions; i++) {
        int periodicity;
@@ -1019,11 +997,11 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
        force.getTorsionParameters(startIndex+i, atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], periodicity, phase, k);
        paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
    }
-    params->upload(paramVector);
+    params.upload(paramVector);
    map<string, string> replacements;
    replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
    replacements["COMPUTE_FORCE"] = CudaKernelSources::periodicTorsionForce;
-    replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float4");
+    replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params.getDevicePointer(), "float4");
    cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
    info = new ForceInfo(force);
    cu.addForce(info);
@@ -1052,7 +1030,7 @@ void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& co
        force.getTorsionParameters(startIndex+i, atom1, atom2, atom3, atom4, periodicity, phase, k);
        paramVector[i] = make_float4((float) k, (float) phase, (float) periodicity, 0.0f);
    }
-    params->upload(paramVector);
+    params.upload(paramVector);
    
    // Mark that the current reordering may be invalid.
    
@@ -1087,14 +1065,6 @@ private:
    const RBTorsionForce& force;
 };

-CudaCalcRBTorsionForceKernel::~CudaCalcRBTorsionForceKernel() {
-    cu.setAsCurrent();
-    if (params1 != NULL)
-        delete params1;
-    if (params2 != NULL)
-        delete params2;
-}
-
 void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsionForce& force) {
    cu.setAsCurrent();
    int numContexts = cu.getPlatformData().contexts.size();
@@ -1104,8 +1074,8 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
    if (numTorsions == 0)
        return;
    vector<vector<int> > atoms(numTorsions, vector<int>(4));
-    params1 = CudaArray::create<float4>(cu, numTorsions, "rbTorsionParams1");
-    params2 = CudaArray::create<float2>(cu, numTorsions, "rbTorsionParams2");
+    params1.initialize<float4>(cu, numTorsions, "rbTorsionParams1");
+    params2.initialize<float2>(cu, numTorsions, "rbTorsionParams2");
    vector<float4> paramVector1(numTorsions);
    vector<float2> paramVector2(numTorsions);
    for (int i = 0; i < numTorsions; i++) {
@@ -1115,13 +1085,13 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
        paramVector2[i] = make_float2((float) c4, (float) c5);

    }
-    params1->upload(paramVector1);
-    params2->upload(paramVector2);
+    params1.upload(paramVector1);
+    params2.upload(paramVector2);
    map<string, string> replacements;
    replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
    replacements["COMPUTE_FORCE"] = CudaKernelSources::rbTorsionForce;
-    replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1->getDevicePointer(), "float4");
-    replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2->getDevicePointer(), "float2");
+    replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1.getDevicePointer(), "float4");
+    replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2.getDevicePointer(), "float2");
    cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
    info = new ForceInfo(force);
    cu.addForce(info);
@@ -1152,8 +1122,8 @@ void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context,
        paramVector1[i] = make_float4((float) c0, (float) c1, (float) c2, (float) c3);
        paramVector2[i] = make_float2((float) c4, (float) c5);
    }
-    params1->upload(paramVector1);
-    params2->upload(paramVector2);
+    params1.upload(paramVector1);
+    params2.upload(paramVector2);
    
    // Mark that the current reordering may be invalid.
    
@@ -1190,15 +1160,6 @@ private:
    const CMAPTorsionForce& force;
 };

-CudaCalcCMAPTorsionForceKernel::~CudaCalcCMAPTorsionForceKernel() {
-    if (coefficients != NULL)
-        delete coefficients;
-    if (mapPositions != NULL)
-        delete mapPositions;
-    if (torsionMaps != NULL)
-        delete torsionMaps;
-}
-
 void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAPTorsionForce& force) {
    cu.setAsCurrent();
    int numContexts = cu.getPlatformData().contexts.size();
@@ -1230,17 +1191,17 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP
    vector<int> torsionMapsVec(numTorsions);
    for (int i = 0; i < numTorsions; i++)
        force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
-    coefficients = CudaArray::create<float4>(cu, coeffVec.size(), "cmapTorsionCoefficients");
-    mapPositions = CudaArray::create<int2>(cu, numMaps, "cmapTorsionMapPositions");
-    torsionMaps = CudaArray::create<int>(cu, numTorsions, "cmapTorsionMaps");
-    coefficients->upload(coeffVec);
-    mapPositions->upload(mapPositionsVec);
-    torsionMaps->upload(torsionMapsVec);
+    coefficients.initialize<float4>(cu, coeffVec.size(), "cmapTorsionCoefficients");
+    mapPositions.initialize<int2>(cu, numMaps, "cmapTorsionMapPositions");
+    torsionMaps.initialize<int>(cu, numTorsions, "cmapTorsionMaps");
+    coefficients.upload(coeffVec);
+    mapPositions.upload(mapPositionsVec);
+    torsionMaps.upload(torsionMapsVec);
    map<string, string> replacements;
    replacements["APPLY_PERIODIC"] = (force.usesPeriodicBoundaryConditions() ? "1" : "0");
-    replacements["COEFF"] = cu.getBondedUtilities().addArgument(coefficients->getDevicePointer(), "float4");
-    replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions->getDevicePointer(), "int2");
-    replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps->getDevicePointer(), "int");
+    replacements["COEFF"] = cu.getBondedUtilities().addArgument(coefficients.getDevicePointer(), "float4");
+    replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions.getDevicePointer(), "int2");
+    replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps.getDevicePointer(), "int");
    cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
    info = new ForceInfo(force);
    cu.addForce(info);
@@ -1256,9 +1217,9 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
    int startIndex = cu.getContextIndex()*force.getNumTorsions()/numContexts;
    int endIndex = (cu.getContextIndex()+1)*force.getNumTorsions()/numContexts;
    numTorsions = endIndex-startIndex;
-    if (mapPositions->getSize() != numMaps)
+    if (mapPositions.getSize() != numMaps)
        throw OpenMMException("updateParametersInContext: The number of maps has changed");
-    if (torsionMaps->getSize() != numTorsions)
+    if (torsionMaps.getSize() != numTorsions)
        throw OpenMMException("updateParametersInContext: The number of CMAP torsions has changed");

    // Update the maps.
@@ -1281,7 +1242,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
            coeffVec.push_back(make_float4((float) c[j][12], (float) c[j][13], (float) c[j][14], (float) c[j][15]));
        }
    }
-    coefficients->upload(coeffVec);
+    coefficients.upload(coeffVec);

    // Update the indices.

@@ -1290,7 +1251,7 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
        int index[8];
        force.getTorsionParameters(i, torsionMapsVec[i], index[0], index[1], index[2], index[3], index[4], index[5], index[6], index[7]);
    }
-    torsionMaps->upload(torsionMapsVec);
+    torsionMaps.upload(torsionMapsVec);
 }

 class CudaCalcCustomTorsionForceKernel::ForceInfo : public CudaForceInfo {
@@ -1327,8 +1288,6 @@ private:
 CudaCalcCustomTorsionForceKernel::~CudaCalcCustomTorsionForceKernel() {
    if (params != NULL)
        delete params;
-    if (globals != NULL)
-        delete globals;
 }

 void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const CustomTorsionForce& force) {
@@ -1376,9 +1335,9 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
        variables[name] = "torsionParams"+params->getParameterSuffix(i);
    }
    if (force.getNumGlobalParameters() > 0) {
-        globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customTorsionGlobals");
-        globals->upload(globalParamValues);
-        string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
+        globals.initialize<float>(cu, force.getNumGlobalParameters(), "customTorsionGlobals");
+        globals.upload(globalParamValues);
+        string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
        for (int i = 0; i < force.getNumGlobalParameters(); i++) {
            const string& name = force.getGlobalParameterName(i);
            string value = argName+"["+cu.intToString(i)+"]";
@@ -1407,7 +1366,7 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
 }

 double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    if (globals != NULL) {
+    if (globals.isInitialized()) {
        bool changed = false;
        for (int i = 0; i < (int) globalParamNames.size(); i++) {
            float value = (float) context.getParameter(globalParamNames[i]);
@@ -1416,7 +1375,7 @@ double CudaCalcCustomTorsionForceKernel::execute(ContextImpl& context, bool incl
            globalParamValues[i] = value;
        }
        if (changed)
-            globals->upload(globalParamValues);
+            globals.upload(globalParamValues);
    }
    return 0.0;
 }
@@ -1576,34 +1535,6 @@ private:

 CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
    cu.setAsCurrent();
-    if (sigmaEpsilon != NULL)
-        delete sigmaEpsilon;
-    if (exceptionParams != NULL)
-        delete exceptionParams;
-    if (cosSinSums != NULL)
-        delete cosSinSums;
-    if (directPmeGrid != NULL)
-        delete directPmeGrid;
-    if (reciprocalPmeGrid != NULL)
-        delete reciprocalPmeGrid;
-    if (pmeBsplineModuliX != NULL)
-        delete pmeBsplineModuliX;
-    if (pmeBsplineModuliY != NULL)
-        delete pmeBsplineModuliY;
-    if (pmeBsplineModuliZ != NULL)
-        delete pmeBsplineModuliZ;
-    if (pmeDispersionBsplineModuliX != NULL)
-        delete pmeDispersionBsplineModuliX;
-    if (pmeDispersionBsplineModuliY != NULL)
-        delete pmeDispersionBsplineModuliY;
-    if (pmeDispersionBsplineModuliZ != NULL)
-        delete pmeDispersionBsplineModuliZ;
-    if (pmeAtomRange != NULL)
-        delete pmeAtomRange;
-    if (pmeAtomGridIndex != NULL)
-        delete pmeAtomGridIndex;
-    if (pmeEnergyBuffer != NULL)
-        delete pmeEnergyBuffer;
    if (sort != NULL)
        delete sort;
    if (fft != NULL)
@@ -1647,7 +1578,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
    // Initialize nonbonded interactions.

    int numParticles = force.getNumParticles();
-    sigmaEpsilon = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
+    sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
    CudaArray& posq = cu.getPosq();
    vector<double4> temp(posq.getSize());
    float4* posqf = (float4*) &temp[0];
@@ -1682,7 +1613,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
        exclusionList[exclusion.second].push_back(exclusion.first);
    }
    posq.upload(&temp[0]);
-    sigmaEpsilon->upload(sigmaEpsilonVector);
+    sigmaEpsilon.upload(sigmaEpsilonVector);
    nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
    bool useCutoff = (nonbondedMethod != NoCutoff);
    bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
@@ -1740,7 +1671,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
            ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
            ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
            int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
-            cosSinSums = new CudaArray(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
+            cosSinSums.initialize(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
        }
    }
    else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
@@ -1844,22 +1775,22 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
                int gridElements = gridSizeX*gridSizeY*gridSizeZ;
                if (doLJPME)
                    gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
-                directPmeGrid = new CudaArray(cu, gridElements, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
-                reciprocalPmeGrid = new CudaArray(cu, gridElements, 2*elementSize, "reciprocalPmeGrid");
-                cu.addAutoclearBuffer(*directPmeGrid);
-                pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
-                pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
-                pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
+                directPmeGrid.initialize(cu, gridElements, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
+                reciprocalPmeGrid.initialize(cu, gridElements, 2*elementSize, "reciprocalPmeGrid");
+                cu.addAutoclearBuffer(directPmeGrid);
+                pmeBsplineModuliX.initialize(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
+                pmeBsplineModuliY.initialize(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
+                pmeBsplineModuliZ.initialize(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
                if (doLJPME) {
-                    pmeDispersionBsplineModuliX = new CudaArray(cu, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
-                    pmeDispersionBsplineModuliY = new CudaArray(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
-                    pmeDispersionBsplineModuliZ = new CudaArray(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
+                    pmeDispersionBsplineModuliX.initialize(cu, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
+                    pmeDispersionBsplineModuliY.initialize(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
+                    pmeDispersionBsplineModuliZ.initialize(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
                }
-                pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
-                pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
+                pmeAtomRange.initialize<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
+                pmeAtomGridIndex.initialize<int2>(cu, numParticles, "pmeAtomGridIndex");
                int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
-                pmeEnergyBuffer = new CudaArray(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
-                cu.clearBuffer(*pmeEnergyBuffer);
+                pmeEnergyBuffer.initialize(cu, cu.getNumThreadBlocks()*CudaContext::ThreadBlockSize, energyElementSize, "pmeEnergyBuffer");
+                cu.clearBuffer(pmeEnergyBuffer);
                sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
                int cufftVersion;
                cufftGetVersion(&cufftVersion);
@@ -1903,7 +1834,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
                    if (recipForceGroup < 0)
                        recipForceGroup = force.getForceGroup();
                    cu.addPreComputation(new SyncStreamPreComputation(cu, pmeStream, pmeSyncEvent, recipForceGroup));
-                    cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), *pmeEnergyBuffer, recipForceGroup));
+                    cu.addPostComputation(new SyncStreamPostComputation(cu, pmeSyncEvent, cu.getKernel(module, "addEnergy"), pmeEnergyBuffer, recipForceGroup));
                }
                hasInitializedFFT = true;

@@ -1916,9 +1847,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
                        xsize = gridSizeX;
                        ysize = gridSizeY;
                        zsize = gridSizeZ;
-                        xmoduli = pmeBsplineModuliX;
-                        ymoduli = pmeBsplineModuliY;
-                        zmoduli = pmeBsplineModuliZ;
+                        xmoduli = &pmeBsplineModuliX;
+                        ymoduli = &pmeBsplineModuliY;
+                        zmoduli = &pmeBsplineModuliZ;
                    }
                    else {
                        if (!doLJPME)
@@ -1926,9 +1857,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
                        xsize = dispersionGridSizeX;
                        ysize = dispersionGridSizeY;
                        zsize = dispersionGridSizeZ;
-                        xmoduli = pmeDispersionBsplineModuliX;
-                        ymoduli = pmeDispersionBsplineModuliY;
-                        zmoduli = pmeDispersionBsplineModuliZ;
+                        xmoduli = &pmeDispersionBsplineModuliX;
+                        ymoduli = &pmeDispersionBsplineModuliY;
+                        zmoduli = &pmeDispersionBsplineModuliZ;
                    }
                    int maxSize = max(max(xsize, ysize), zsize);
                    vector<double> data(PmeOrder);
@@ -2008,7 +1939,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
    cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
    if (hasLJ)
        cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
-                                                sizeof(float2), sigmaEpsilon->getDevicePointer()));
+                                                sizeof(float2), sigmaEpsilon.getDevicePointer()));

    // Initialize the exceptions.

@@ -2019,7 +1950,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
    if (numExceptions > 0) {
        exceptionAtoms.resize(numExceptions);
        vector<vector<int> > atoms(numExceptions, vector<int>(2));
-        exceptionParams = CudaArray::create<float4>(cu, numExceptions, "exceptionParams");
+        exceptionParams.initialize<float4>(cu, numExceptions, "exceptionParams");
        vector<float4> exceptionParamsVector(numExceptions);
        for (int i = 0; i < numExceptions; i++) {
            double chargeProd, sigma, epsilon;
@@ -2027,9 +1958,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
            exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
            exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
        }
-        exceptionParams->upload(exceptionParamsVector);
+        exceptionParams.upload(exceptionParamsVector);
        map<string, string> replacements;
-        replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams->getDevicePointer(), "float4");
+        replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4");
        cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
    }
    info = new ForceInfo(force);
@@ -2037,13 +1968,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
 }

 double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
-    if (cosSinSums != NULL && includeReciprocal) {
-        void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums->getDevicePointer(), cu.getPeriodicBoxSizePointer()};
-        cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums->getSize());
-        void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums->getDevicePointer(), cu.getPeriodicBoxSizePointer()};
+    if (cosSinSums.isInitialized() && includeReciprocal) {
+        void* sumsArgs[] = {&cu.getEnergyBuffer().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
+        cu.executeKernel(ewaldSumsKernel, sumsArgs, cosSinSums.getSize());
+        void* forcesArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &cosSinSums.getDevicePointer(), cu.getPeriodicBoxSizePointer()};
        cu.executeKernel(ewaldForcesKernel, forcesArgs, cu.getNumAtoms());
    }
-    if (directPmeGrid != NULL && includeReciprocal) {
+    if (directPmeGrid.isInitialized()&& includeReciprocal) {
        if (usePmeStream)
            cu.setCurrentStream(pmeStream);

@@ -2075,118 +2006,118 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF

        // Execute the reciprocal space kernels.

-        void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
+        void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
                cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
                recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
        cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms());

-        sort->sort(*pmeAtomGridIndex);
+        sort->sort(pmeAtomGridIndex);

-        void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
+        void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
                cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
-                recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
+                recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer()};
        cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);

        if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
-            void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()};
+            void* finishSpreadArgs[] = {&directPmeGrid.getDevicePointer()};
            cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
        }

        if (useCudaFFT) {
            if (cu.getUseDoublePrecision())
-                cufftExecD2Z(fftForward, (double*) directPmeGrid->getDevicePointer(), (double2*) reciprocalPmeGrid->getDevicePointer());
+                cufftExecD2Z(fftForward, (double*) directPmeGrid.getDevicePointer(), (double2*) reciprocalPmeGrid.getDevicePointer());
            else
-                cufftExecR2C(fftForward, (float*) directPmeGrid->getDevicePointer(), (float2*) reciprocalPmeGrid->getDevicePointer());
+                cufftExecR2C(fftForward, (float*) directPmeGrid.getDevicePointer(), (float2*) reciprocalPmeGrid.getDevicePointer());
        }
        else {
-            fft->execFFT(*directPmeGrid, *reciprocalPmeGrid, true);
+            fft->execFFT(directPmeGrid, reciprocalPmeGrid, true);
        }

        if (includeEnergy) {
-            void* computeEnergyArgs[] = {&reciprocalPmeGrid->getDevicePointer(), usePmeStream ? &pmeEnergyBuffer->getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
-                    &pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(),
+            void* computeEnergyArgs[] = {&reciprocalPmeGrid.getDevicePointer(), usePmeStream ? &pmeEnergyBuffer.getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
+                    &pmeBsplineModuliX.getDevicePointer(), &pmeBsplineModuliY.getDevicePointer(), &pmeBsplineModuliZ.getDevicePointer(),
                    cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
            cu.executeKernel(pmeEvalEnergyKernel, computeEnergyArgs, gridSizeX*gridSizeY*gridSizeZ);
        }

-        void* convolutionArgs[] = {&reciprocalPmeGrid->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
-                &pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(), &pmeBsplineModuliZ->getDevicePointer(),
+        void* convolutionArgs[] = {&reciprocalPmeGrid.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
+                &pmeBsplineModuliX.getDevicePointer(), &pmeBsplineModuliY.getDevicePointer(), &pmeBsplineModuliZ.getDevicePointer(),
                cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
        cu.executeKernel(pmeConvolutionKernel, convolutionArgs, gridSizeX*gridSizeY*gridSizeZ, 256);

        if (useCudaFFT) {
            if (cu.getUseDoublePrecision())
-                cufftExecZ2D(fftBackward, (double2*) reciprocalPmeGrid->getDevicePointer(), (double*) directPmeGrid->getDevicePointer());
+                cufftExecZ2D(fftBackward, (double2*) reciprocalPmeGrid.getDevicePointer(), (double*) directPmeGrid.getDevicePointer());
            else
-                cufftExecC2R(fftBackward, (float2*) reciprocalPmeGrid->getDevicePointer(), (float*)  directPmeGrid->getDevicePointer());
+                cufftExecC2R(fftBackward, (float2*) reciprocalPmeGrid.getDevicePointer(), (float*)  directPmeGrid.getDevicePointer());
        }
        else {
-            fft->execFFT(*reciprocalPmeGrid, *directPmeGrid, false);
+            fft->execFFT(reciprocalPmeGrid, directPmeGrid, false);
        }

-        void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
+        void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
                cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
-                recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
+                recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer()};
        cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);

        // As written, we check only the Electrostatic grid pointer to get here.  We could separate them out, but for
        // now we assume that LJPME can only be used if electrostatic PME is also active.
        if (doLJPME) {
-            void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
+            void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
                    cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
                    recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
            cu.executeKernel(pmeDispersionGridIndexKernel, gridIndexArgs, cu.getNumAtoms());

-            sort->sort(*pmeAtomGridIndex);
+            sort->sort(pmeAtomGridIndex);

-            cu.clearBuffer(*directPmeGrid);
-            void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
+            cu.clearBuffer(directPmeGrid);
+            void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
                    cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
-                    recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer(),
-                    &sigmaEpsilon->getDevicePointer()};
+                    recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
+                    &sigmaEpsilon.getDevicePointer()};
            cu.executeKernel(pmeDispersionSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);

            if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
-                void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()};
+                void* finishSpreadArgs[] = {&directPmeGrid.getDevicePointer()};
                cu.executeKernel(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
            }

            if (useCudaFFT) {
                if (cu.getUseDoublePrecision())
-                    cufftExecD2Z(dispersionFftForward, (double*) directPmeGrid->getDevicePointer(), (double2*) reciprocalPmeGrid->getDevicePointer());
+                    cufftExecD2Z(dispersionFftForward, (double*) directPmeGrid.getDevicePointer(), (double2*) reciprocalPmeGrid.getDevicePointer());
                else
-                    cufftExecR2C(dispersionFftForward, (float*) directPmeGrid->getDevicePointer(), (float2*) reciprocalPmeGrid->getDevicePointer());
+                    cufftExecR2C(dispersionFftForward, (float*) directPmeGrid.getDevicePointer(), (float2*) reciprocalPmeGrid.getDevicePointer());
            }
            else {
-                dispersionFft->execFFT(*directPmeGrid, *reciprocalPmeGrid, true);
+                dispersionFft->execFFT(directPmeGrid, reciprocalPmeGrid, true);
            }

            if (includeEnergy) {
-                void* computeEnergyArgs[] = {&reciprocalPmeGrid->getDevicePointer(), usePmeStream ? &pmeEnergyBuffer->getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
-                        &pmeDispersionBsplineModuliX->getDevicePointer(), &pmeDispersionBsplineModuliY->getDevicePointer(), &pmeDispersionBsplineModuliZ->getDevicePointer(),
+                void* computeEnergyArgs[] = {&reciprocalPmeGrid.getDevicePointer(), usePmeStream ? &pmeEnergyBuffer.getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
+                        &pmeDispersionBsplineModuliX.getDevicePointer(), &pmeDispersionBsplineModuliY.getDevicePointer(), &pmeDispersionBsplineModuliZ.getDevicePointer(),
                        cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
                cu.executeKernel(pmeEvalDispersionEnergyKernel, computeEnergyArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
            }

-            void* convolutionArgs[] = {&reciprocalPmeGrid->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
-                    &pmeDispersionBsplineModuliX->getDevicePointer(), &pmeDispersionBsplineModuliY->getDevicePointer(), &pmeDispersionBsplineModuliZ->getDevicePointer(),
+            void* convolutionArgs[] = {&reciprocalPmeGrid.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
+                    &pmeDispersionBsplineModuliX.getDevicePointer(), &pmeDispersionBsplineModuliY.getDevicePointer(), &pmeDispersionBsplineModuliZ.getDevicePointer(),
                    cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
            cu.executeKernel(pmeDispersionConvolutionKernel, convolutionArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);

            if (useCudaFFT) {
                if (cu.getUseDoublePrecision())
-                    cufftExecZ2D(dispersionFftBackward, (double2*) reciprocalPmeGrid->getDevicePointer(), (double*) directPmeGrid->getDevicePointer());
+                    cufftExecZ2D(dispersionFftBackward, (double2*) reciprocalPmeGrid.getDevicePointer(), (double*) directPmeGrid.getDevicePointer());
                else
-                    cufftExecC2R(dispersionFftBackward, (float2*) reciprocalPmeGrid->getDevicePointer(), (float*)  directPmeGrid->getDevicePointer());
+                    cufftExecC2R(dispersionFftBackward, (float2*) reciprocalPmeGrid.getDevicePointer(), (float*)  directPmeGrid.getDevicePointer());
            }
            else {
-                dispersionFft->execFFT(*reciprocalPmeGrid, *directPmeGrid, false);
+                dispersionFft->execFFT(reciprocalPmeGrid, directPmeGrid, false);
            }

-            void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
+            void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
                    cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
-                    recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer(),
-                    &sigmaEpsilon->getDevicePointer()};
+                    recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
+                    &sigmaEpsilon.getDevicePointer()};
            cu.executeKernel(pmeInterpolateDispersionForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
        }
        if (usePmeStream) {
@@ -2253,7 +2184,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
        sumSquaredCharges += charge*charge;
    }
    cu.setCharges(chargeVector);
-    sigmaEpsilon->upload(sigmaEpsilonVector);
+    sigmaEpsilon.upload(sigmaEpsilonVector);
    
    // Record the exceptions.
    
@@ -2265,7 +2196,7 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
            force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
            exceptionParamsVector[i] = make_float4((float) (ONE_4PI_EPS0*chargeProd), (float) sigma, (float) (4.0*epsilon), 0.0f);
        }
-        exceptionParams->upload(exceptionParamsVector);
+        exceptionParams.upload(exceptionParamsVector);
    }
    
    // Compute other values.
@@ -2355,12 +2286,6 @@ CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
    cu.setAsCurrent();
    if (params != NULL)
        delete params;
-    if (globals != NULL)
-        delete globals;
-    if (interactionGroupData != NULL)
-        delete interactionGroupData;
-    for (auto function : tabulatedFunctions)
-        delete function;
    if (forceCopy != NULL)
        delete forceCopy;
 }
@@ -2377,7 +2302,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
    int numParticles = force.getNumParticles();
    params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
    if (force.getNumGlobalParameters() > 0)
-        globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals");
+        globals.initialize<float>(cu, force.getNumGlobalParameters(), "customNonbondedGlobals");
    vector<vector<float> > paramVector(numParticles);
    vector<vector<int> > exclusionList(numParticles);
    for (int i = 0; i < numParticles; i++) {
@@ -2402,6 +2327,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
    vector<pair<string, string> > functionDefinitions;
    vector<const TabulatedFunction*> functionList;
    vector<string> tableTypes;
+    tabulatedFunctions.resize(force.getNumTabulatedFunctions());
    for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
        functionList.push_back(&force.getTabulatedFunction(i));
        string name = force.getTabulatedFunctionName(i);
@@ -2410,9 +2336,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
        functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
        int width;
        vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
-        tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
-        tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
-        cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer()));
+        tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
+        tabulatedFunctions[i].upload(f);
+        cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[i].getDevicePointer()));
        if (width == 1)
            tableTypes.push_back("float");
        else
@@ -2427,8 +2353,8 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
        globalParamNames[i] = force.getGlobalParameterName(i);
        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
    }
-    if (globals != NULL)
-        globals->upload(globalParamValues);
+    if (globals.isInitialized())
+        globals.upload(globalParamValues);
    bool useCutoff = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff);
    bool usePeriodic = (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff && force.getNonbondedMethod() != CustomNonbondedForce::CutoffNonPeriodic);
    Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
@@ -2482,9 +2408,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
            CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
            cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
        }
-        if (globals != NULL) {
-            globals->upload(globalParamValues);
-            cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer()));
+        if (globals.isInitialized()) {
+            globals.upload(globalParamValues);
+            cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals.getDevicePointer()));
        }
    }
    info = new ForceInfo(force);
@@ -2660,8 +2586,8 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
        for (; indexInTileSet < 32; indexInTileSet++)
            groupData.push_back(make_int4(0, 0, 0, 0));
    }
-    interactionGroupData = CudaArray::create<int4>(cu, groupData.size(), "interactionGroupData");
-    interactionGroupData->upload(groupData);
+    interactionGroupData.initialize<int4>(cu, groupData.size(), "interactionGroupData");
+    interactionGroupData.upload(groupData);
    
    // Create the kernel.
    
@@ -2687,7 +2613,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
        args<<", const "<<buffers[i].getType()<<"* __restrict__ global_params"<<(i+1);
    for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
        args << ", const " << tableTypes[i]<< "* __restrict__ table" << i;
-    if (globals != NULL)
+    if (globals.isInitialized())
        args<<", const float* __restrict__ globals";
    if (hasParamDerivs)
        args << ", mixed* __restrict__ energyParamDerivs";
@@ -2758,7 +2684,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
 }

 double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    if (globals != NULL) {
+    if (globals.isInitialized()) {
        bool changed = false;
        for (int i = 0; i < (int) globalParamNames.size(); i++) {
            float value = (float) context.getParameter(globalParamNames[i]);
@@ -2767,7 +2693,7 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
            globalParamValues[i] = value;
        }
        if (changed) {
-            globals->upload(globalParamValues);
+            globals.upload(globalParamValues);
            if (forceCopy != NULL) {
                CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
                hasInitializedLongRangeCorrection = true;
@@ -2778,13 +2704,13 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
        CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner(), longRangeCoefficient, longRangeCoefficientDerivs);
        hasInitializedLongRangeCorrection = true;
    }
-    if (interactionGroupData != NULL) {
+    if (interactionGroupData.isInitialized()) {
        if (!hasInitializedKernel) {
            hasInitializedKernel = true;
            interactionGroupArgs.push_back(&cu.getForce().getDevicePointer());
            interactionGroupArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
            interactionGroupArgs.push_back(&cu.getPosq().getDevicePointer());
-            interactionGroupArgs.push_back(&interactionGroupData->getDevicePointer());
+            interactionGroupArgs.push_back(&interactionGroupData.getDevicePointer());
            interactionGroupArgs.push_back(cu.getPeriodicBoxSizePointer());
            interactionGroupArgs.push_back(cu.getInvPeriodicBoxSizePointer());
            interactionGroupArgs.push_back(cu.getPeriodicBoxVecXPointer());
@@ -2792,10 +2718,10 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
            interactionGroupArgs.push_back(cu.getPeriodicBoxVecZPointer());
            for (auto& buffer : params->getBuffers())
                interactionGroupArgs.push_back(&buffer.getMemory());
-            for (auto function : tabulatedFunctions)
-                interactionGroupArgs.push_back(&function->getDevicePointer());
-            if (globals != NULL)
-                interactionGroupArgs.push_back(&globals->getDevicePointer());
+            for (auto& function : tabulatedFunctions)
+                interactionGroupArgs.push_back(&function.getDevicePointer());
+            if (globals.isInitialized())
+                interactionGroupArgs.push_back(&globals.getDevicePointer());
            if (hasParamDerivs)
                interactionGroupArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
        }
@@ -2855,38 +2781,24 @@ private:
    const GBSAOBCForce& force;
 };

-CudaCalcGBSAOBCForceKernel::~CudaCalcGBSAOBCForceKernel() {
-    cu.setAsCurrent();
-    if (params != NULL)
-        delete params;
-    if (bornSum != NULL)
-        delete bornSum;
-    if (bornRadii != NULL)
-        delete bornRadii;
-    if (bornForce != NULL)
-        delete bornForce;
-    if (obcChain != NULL)
-        delete obcChain;
-}
-
 void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
    cu.setAsCurrent();
    if (cu.getPlatformData().contexts.size() > 1)
        throw OpenMMException("GBSAOBCForce does not support using multiple CUDA devices");
    CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
-    params = CudaArray::create<float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
+    params.initialize<float2>(cu, cu.getPaddedNumAtoms(), "gbsaObcParams");
    if (cu.getUseDoublePrecision()) {
-        bornRadii = CudaArray::create<double>(cu, cu.getPaddedNumAtoms(), "bornRadii");
-        obcChain = CudaArray::create<double>(cu, cu.getPaddedNumAtoms(), "obcChain");
+        bornRadii.initialize<double>(cu, cu.getPaddedNumAtoms(), "bornRadii");
+        obcChain.initialize<double>(cu, cu.getPaddedNumAtoms(), "obcChain");
    }
    else {
-        bornRadii = CudaArray::create<float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
-        obcChain = CudaArray::create<float>(cu, cu.getPaddedNumAtoms(), "obcChain");
+        bornRadii.initialize<float>(cu, cu.getPaddedNumAtoms(), "bornRadii");
+        obcChain.initialize<float>(cu, cu.getPaddedNumAtoms(), "obcChain");
    }
-    bornSum = CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "bornSum");
-    bornForce = CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "bornForce");
-    cu.addAutoclearBuffer(*bornSum);
-    cu.addAutoclearBuffer(*bornForce);
+    bornSum.initialize<long long>(cu, cu.getPaddedNumAtoms(), "bornSum");
+    bornForce.initialize<long long>(cu, cu.getPaddedNumAtoms(), "bornForce");
+    cu.addAutoclearBuffer(bornSum);
+    cu.addAutoclearBuffer(bornForce);
    CudaArray& posq = cu.getPosq();
    vector<double4> temp(posq.getSize());
    float4* posqf = (float4*) &temp[0];
@@ -2904,7 +2816,7 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
            posqf[i] = make_float4(0, 0, 0, (float) charge);
    }
    posq.upload(&temp[0]);
-    params->upload(paramsVector);
+    params.upload(paramsVector);
    prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
    surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
    bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
@@ -2912,8 +2824,8 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
    cutoff = force.getCutoffDistance();
    string source = CudaKernelSources::gbsaObc2;
    nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
-    nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params->getDevicePointer()));
-    nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce->getDevicePointer()));
+    nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params.getDevicePointer()));
+    nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce.getDevicePointer()));
    info = new ForceInfo(force);
    cu.addForce(info);
 }
@@ -2951,9 +2863,9 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
        map<string, string> replacements;
        CUmodule module = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::gbsaObc1, replacements), defines);
        computeBornSumKernel = cu.getKernel(module, "computeBornSum");
-        computeSumArgs.push_back(&bornSum->getDevicePointer());
+        computeSumArgs.push_back(&bornSum.getDevicePointer());
        computeSumArgs.push_back(&cu.getPosq().getDevicePointer());
-        computeSumArgs.push_back(&params->getDevicePointer());
+        computeSumArgs.push_back(&params.getDevicePointer());
        if (nb.getUseCutoff()) {
            computeSumArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
            computeSumArgs.push_back(&nb.getInteractionCount().getDevicePointer());
@@ -2972,10 +2884,10 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
        computeSumArgs.push_back(&nb.getExclusionTiles().getDevicePointer());
        force1Kernel = cu.getKernel(module, "computeGBSAForce1");
        force1Args.push_back(&cu.getForce().getDevicePointer());
-        force1Args.push_back(&bornForce->getDevicePointer());
+        force1Args.push_back(&bornForce.getDevicePointer());
        force1Args.push_back(&cu.getEnergyBuffer().getDevicePointer());
        force1Args.push_back(&cu.getPosq().getDevicePointer());
-        force1Args.push_back(&bornRadii->getDevicePointer());
+        force1Args.push_back(&bornRadii.getDevicePointer());
        force1Args.push_back(NULL);
        if (nb.getUseCutoff()) {
            force1Args.push_back(&nb.getInteractingTiles().getDevicePointer());
@@ -3008,12 +2920,12 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
    }
    cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
    float alpha = 1.0f, beta = 0.8f, gamma = 4.85f;
-    void* reduceSumArgs[] = {&alpha, &beta, &gamma, &bornSum->getDevicePointer(), &params->getDevicePointer(),
-            &bornRadii->getDevicePointer(), &obcChain->getDevicePointer()};
+    void* reduceSumArgs[] = {&alpha, &beta, &gamma, &bornSum.getDevicePointer(), &params.getDevicePointer(),
+            &bornRadii.getDevicePointer(), &obcChain.getDevicePointer()};
    cu.executeKernel(reduceBornSumKernel, reduceSumArgs, cu.getPaddedNumAtoms());
    cu.executeKernel(force1Kernel, &force1Args[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
-    void* reduceForceArgs[] = {&bornForce->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), &params->getDevicePointer(),
-            &bornRadii->getDevicePointer(), &obcChain->getDevicePointer()};
+    void* reduceForceArgs[] = {&bornForce.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), &params.getDevicePointer(),
+            &bornRadii.getDevicePointer(), &obcChain.getDevicePointer()};
    cu.executeKernel(reduceBornForceKernel, &reduceForceArgs[0], cu.getPaddedNumAtoms());
    return 0.0;
 }
@@ -3039,7 +2951,7 @@ void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, c
        paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
    }
    cu.setCharges(chargeVector);
-    params->upload(paramsVector);
+    params.upload(paramsVector);
    
    // Mark that the current reordering may be invalid.
    
@@ -3087,16 +2999,6 @@ CudaCalcCustomGBForceKernel::~CudaCalcCustomGBForceKernel() {
        delete energyDerivs;
    if (energyDerivChain != NULL)
        delete energyDerivChain;
-    if (longEnergyDerivs != NULL)
-        delete longEnergyDerivs;
-    if (globals != NULL)
-        delete globals;
-    if (valueBuffers != NULL)
-        delete valueBuffers;
-    for (auto function : tabulatedFunctions)
-        delete function;
-    for (auto d : dValue0dParam)
-        delete d;
    for (auto d : dValuedParam)
        delete d;
 }
@@ -3135,7 +3037,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
    params = new CudaParameterSet(cu, force.getNumPerParticleParameters(), paddedNumParticles, "customGBParameters", true);
    computedValues = new CudaParameterSet(cu, force.getNumComputedValues(), paddedNumParticles, "customGBComputedValues", true, cu.getUseDoublePrecision());
    if (force.getNumGlobalParameters() > 0)
-        globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customGBGlobals");
+        globals.initialize<float>(cu, force.getNumGlobalParameters(), "customGBGlobals");
    vector<vector<float> > paramVector(paddedNumParticles, vector<float>(numParams, 0));
    vector<vector<int> > exclusionList(numParticles);
    for (int i = 0; i < numParticles; i++) {
@@ -3159,6 +3061,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
    vector<pair<string, string> > functionDefinitions;
    vector<const TabulatedFunction*> functionList;
    stringstream tableArgs;
+    tabulatedFunctions.resize(force.getNumTabulatedFunctions());
    for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
        functionList.push_back(&force.getTabulatedFunction(i));
        string name = force.getTabulatedFunctionName(i);
@@ -3167,9 +3070,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
        functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
        int width;
        vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
-        tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
-        tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
-        cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[tabulatedFunctions.size()-1]->getDevicePointer()));
+        tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
+        tabulatedFunctions[i].upload(f);
+        cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(arrayName, "float", width, width*sizeof(float), tabulatedFunctions[i].getDevicePointer()));
        tableArgs << ", const float";
        if (width > 1)
            tableArgs << width;
@@ -3184,8 +3087,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
        globalParamNames[i] = force.getGlobalParameterName(i);
        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
    }
-    if (globals != NULL)
-        globals->upload(globalParamValues);
+    if (globals.isInitialized())
+        globals.upload(globalParamValues);

    // Record derivatives of expressions needed for the chain rule terms.

@@ -3233,15 +3136,16 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
        for (int j = 0; j < force.getNumEnergyParameterDerivatives(); j++)
            energyParamDerivExpressions[i].push_back(ex.differentiate(force.getEnergyParameterDerivativeName(j)).optimize());
    }
-    longEnergyDerivs = CudaArray::create<long long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
+    longEnergyDerivs.initialize<long long>(cu, force.getNumComputedValues()*cu.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
    energyDerivs = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
    energyDerivChain = new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "customGBEnergyDerivativeChain", true);
    int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
    needEnergyParamDerivs = (force.getNumEnergyParameterDerivatives() > 0);
+    dValue0dParam.resize(force.getNumEnergyParameterDerivatives());
    for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
        dValuedParam.push_back(new CudaParameterSet(cu, force.getNumComputedValues(), cu.getPaddedNumAtoms(), "dValuedParam", true, cu.getUseDoublePrecision()));
-        dValue0dParam.push_back(CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "dValue0dParam"));
-        cu.addAutoclearBuffer(*dValue0dParam.back());
+        dValue0dParam[i].initialize<long long>(cu, cu.getPaddedNumAtoms(), "dValue0dParam");
+        cu.addAutoclearBuffer(dValue0dParam[i]);
        string name = force.getEnergyParameterDerivativeName(i);
        cu.addEnergyParameterDerivative(name);
    }
@@ -3826,9 +3730,9 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
                parameters.push_back(CudaNonbondedUtilities::ParameterInfo(paramName, buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
            }
        }
-        if (globals != NULL) {
-            globals->upload(globalParamValues);
-            arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer()));
+        if (globals.isInitialized()) {
+            globals.upload(globalParamValues);
+            arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals.getDevicePointer()));
        }
        cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup());
        for (auto param : parameters)
@@ -3838,7 +3742,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
    }
    info = new ForceInfo(force);
    cu.addForce(info);
-    cu.addAutoclearBuffer(*longEnergyDerivs);
+    cu.addAutoclearBuffer(longEnergyDerivs);
 }

 double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
@@ -3881,13 +3785,13 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
        // Set arguments for kernels.
        
        maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : cu.getNumAtomBlocks()*(cu.getNumAtomBlocks()+1)/2);
-        valueBuffers = CudaArray::create<long long>(cu, cu.getPaddedNumAtoms(), "customGBValueBuffers");
-        cu.addAutoclearBuffer(*valueBuffers);
-        cu.clearBuffer(valueBuffers->getDevicePointer(), sizeof(long long)*valueBuffers->getSize());
+        valueBuffers.initialize<long long>(cu, cu.getPaddedNumAtoms(), "customGBValueBuffers");
+        cu.addAutoclearBuffer(valueBuffers);
+        cu.clearBuffer(valueBuffers.getDevicePointer(), sizeof(long long)*valueBuffers.getSize());
        pairValueArgs.push_back(&cu.getPosq().getDevicePointer());
        pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusions().getDevicePointer());
        pairValueArgs.push_back(&cu.getNonbondedUtilities().getExclusionTiles().getDevicePointer());
-        pairValueArgs.push_back(&valueBuffers->getDevicePointer());
+        pairValueArgs.push_back(&valueBuffers.getDevicePointer());
        if (nb.getUseCutoff()) {
            pairValueArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
            pairValueArgs.push_back(&nb.getInteractionCount().getDevicePointer());
@@ -3903,31 +3807,31 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
        }
        else
            pairValueArgs.push_back(&maxTiles);
-        if (globals != NULL)
-            pairValueArgs.push_back(&globals->getDevicePointer());
+        if (globals.isInitialized())
+            pairValueArgs.push_back(&globals.getDevicePointer());
        for (int i = 0; i < (int) params->getBuffers().size(); i++) {
            if (pairValueUsesParam[i])
                pairValueArgs.push_back(&params->getBuffers()[i].getMemory());
        }
-        for (auto d : dValue0dParam)
-            pairValueArgs.push_back(&d->getDevicePointer());
-        for (auto function : tabulatedFunctions)
-            pairValueArgs.push_back(&function->getDevicePointer());
+        for (auto& d : dValue0dParam)
+            pairValueArgs.push_back(&d.getDevicePointer());
+        for (auto& function : tabulatedFunctions)
+            pairValueArgs.push_back(&function.getDevicePointer());
        perParticleValueArgs.push_back(&cu.getPosq().getDevicePointer());
-        perParticleValueArgs.push_back(&valueBuffers->getDevicePointer());
-        if (globals != NULL)
-            perParticleValueArgs.push_back(&globals->getDevicePointer());
+        perParticleValueArgs.push_back(&valueBuffers.getDevicePointer());
+        if (globals.isInitialized())
+            perParticleValueArgs.push_back(&globals.getDevicePointer());
        for (auto& buffer : params->getBuffers())
            perParticleValueArgs.push_back(&buffer.getMemory());
        for (auto& buffer : computedValues->getBuffers())
            perParticleValueArgs.push_back(&buffer.getMemory());
        for (int i = 0; i < dValuedParam.size(); i++) {
-            perParticleValueArgs.push_back(&dValue0dParam[i]->getDevicePointer());
+            perParticleValueArgs.push_back(&dValue0dParam[i].getDevicePointer());
            for (int j = 0; j < dValuedParam[i]->getBuffers().size(); j++)
                perParticleValueArgs.push_back(&dValuedParam[i]->getBuffers()[j].getMemory());
        }
-        for (auto function : tabulatedFunctions)
-            perParticleValueArgs.push_back(&function->getDevicePointer());
+        for (auto& function : tabulatedFunctions)
+            perParticleValueArgs.push_back(&function.getDevicePointer());
        pairEnergyArgs.push_back(&cu.getForce().getDevicePointer());
        pairEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
        pairEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
@@ -3949,8 +3853,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
        }
        else
            pairEnergyArgs.push_back(&maxTiles);
-        if (globals != NULL)
-            pairEnergyArgs.push_back(&globals->getDevicePointer());
+        if (globals.isInitialized())
+            pairEnergyArgs.push_back(&globals.getDevicePointer());
        for (int i = 0; i < (int) params->getBuffers().size(); i++) {
            if (pairEnergyUsesParam[i])
                pairEnergyArgs.push_back(&params->getBuffers()[i].getMemory());
@@ -3959,16 +3863,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
            if (pairEnergyUsesValue[i])
                pairEnergyArgs.push_back(&computedValues->getBuffers()[i].getMemory());
        }
-        pairEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer());
+        pairEnergyArgs.push_back(&longEnergyDerivs.getDevicePointer());
        if (needEnergyParamDerivs)
            pairEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
-        for (auto function : tabulatedFunctions)
-            pairEnergyArgs.push_back(&function->getDevicePointer());
+        for (auto& function : tabulatedFunctions)
+            pairEnergyArgs.push_back(&function.getDevicePointer());
        perParticleEnergyArgs.push_back(&cu.getForce().getDevicePointer());
        perParticleEnergyArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
        perParticleEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
-        if (globals != NULL)
-            perParticleEnergyArgs.push_back(&globals->getDevicePointer());
+        if (globals.isInitialized())
+            perParticleEnergyArgs.push_back(&globals.getDevicePointer());
        for (auto& buffer : params->getBuffers())
            perParticleEnergyArgs.push_back(&buffer.getMemory());
        for (auto& buffer : computedValues->getBuffers())
@@ -3977,16 +3881,16 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
            perParticleEnergyArgs.push_back(&buffer.getMemory());
        for (auto& buffer : energyDerivChain->getBuffers())
            perParticleEnergyArgs.push_back(&buffer.getMemory());
-        perParticleEnergyArgs.push_back(&longEnergyDerivs->getDevicePointer());
+        perParticleEnergyArgs.push_back(&longEnergyDerivs.getDevicePointer());
        if (needEnergyParamDerivs)
            perParticleEnergyArgs.push_back(&cu.getEnergyParamDerivBuffer().getDevicePointer());
-        for (auto function : tabulatedFunctions)
-            perParticleEnergyArgs.push_back(&function->getDevicePointer());
+        for (auto& function : tabulatedFunctions)
+            perParticleEnergyArgs.push_back(&function.getDevicePointer());
        if (needParameterGradient || needEnergyParamDerivs) {
            gradientChainRuleArgs.push_back(&cu.getForce().getDevicePointer());
            gradientChainRuleArgs.push_back(&cu.getPosq().getDevicePointer());
-            if (globals != NULL)
-                gradientChainRuleArgs.push_back(&globals->getDevicePointer());
+            if (globals.isInitialized())
+                gradientChainRuleArgs.push_back(&globals.getDevicePointer());
            for (auto& buffer : params->getBuffers())
                gradientChainRuleArgs.push_back(&buffer.getMemory());
            for (auto& buffer : computedValues->getBuffers())
@@ -4001,7 +3905,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
            }
        }
    }
-    if (globals != NULL) {
+    if (globals.isInitialized()) {
        bool changed = false;
        for (int i = 0; i < (int) globalParamNames.size(); i++) {
            float value = (float) context.getParameter(globalParamNames[i]);
@@ -4010,7 +3914,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
            globalParamValues[i] = value;
        }
        if (changed)
-            globals->upload(globalParamValues);
+            globals.upload(globalParamValues);
    }
    pairEnergyArgs[5] = &includeEnergy;
    if (nb.getUseCutoff()) {
@@ -4089,8 +3993,6 @@ CudaCalcCustomExternalForceKernel::~CudaCalcCustomExternalForceKernel() {
    cu.setAsCurrent();
    if (params != NULL)
        delete params;
-    if (globals != NULL)
-        delete globals;
 }

 void CudaCalcCustomExternalForceKernel::initialize(const System& system, const CustomExternalForce& force) {
@@ -4146,9 +4048,9 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
        variables[name] = "particleParams"+params->getParameterSuffix(i);
    }
    if (force.getNumGlobalParameters() > 0) {
-        globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customExternalGlobals");
-        globals->upload(globalParamValues);
-        string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
+        globals.initialize<float>(cu, force.getNumGlobalParameters(), "customExternalGlobals");
+        globals.upload(globalParamValues);
+        string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
        for (int i = 0; i < force.getNumGlobalParameters(); i++) {
            const string& name = force.getGlobalParameterName(i);
            string value = argName+"["+cu.intToString(i)+"]";
@@ -4170,7 +4072,7 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
 }

 double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    if (globals != NULL) {
+    if (globals.isInitialized()) {
        bool changed = false;
        for (int i = 0; i < (int) globalParamNames.size(); i++) {
            float value = (float) context.getParameter(globalParamNames[i]);
@@ -4179,7 +4081,7 @@ double CudaCalcCustomExternalForceKernel::execute(ContextImpl& context, bool inc
            globalParamValues[i] = value;
        }
        if (changed)
-            globals->upload(globalParamValues);
+            globals.upload(globalParamValues);
    }
    return 0.0;
 }
@@ -4294,18 +4196,6 @@ CudaCalcCustomHbondForceKernel::~CudaCalcCustomHbondForceKernel() {
        delete donorParams;
    if (acceptorParams != NULL)
        delete acceptorParams;
-    if (donors != NULL)
-        delete donors;
-    if (acceptors != NULL)
-        delete acceptors;
-    if (globals != NULL)
-        delete globals;
-    if (donorExclusions != NULL)
-        delete donorExclusions;
-    if (acceptorExclusions != NULL)
-        delete acceptorExclusions;
-    for (auto function : tabulatedFunctions)
-        delete function;
 }

 static void addDonorAndAcceptorCode(stringstream& computeDonor, stringstream& computeAcceptor, const string& value) {
@@ -4333,12 +4223,12 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
    if (numDonors == 0 || numAcceptors == 0)
        return;
    int numParticles = system.getNumParticles();
-    donors = CudaArray::create<int4>(cu, numDonors, "customHbondDonors");
-    acceptors = CudaArray::create<int4>(cu, numAcceptors, "customHbondAcceptors");
+    donors.initialize<int4>(cu, numDonors, "customHbondDonors");
+    acceptors.initialize<int4>(cu, numAcceptors, "customHbondAcceptors");
    donorParams = new CudaParameterSet(cu, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
    acceptorParams = new CudaParameterSet(cu, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
    if (force.getNumGlobalParameters() > 0)
-        globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customHbondGlobals");
+        globals.initialize<float>(cu, force.getNumGlobalParameters(), "customHbondGlobals");
    vector<vector<float> > donorParamVector(numDonors);
    vector<int4> donorVector(numDonors);
    for (int i = 0; i < numDonors; i++) {
@@ -4348,7 +4238,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
        for (int j = 0; j < (int) parameters.size(); j++)
            donorParamVector[i][j] = (float) parameters[j];
    }
-    donors->upload(donorVector);
+    donors.upload(donorVector);
    donorParams->setParameterValues(donorParamVector);
    vector<vector<float> > acceptorParamVector(numAcceptors);
    vector<int4> acceptorVector(numAcceptors);
@@ -4359,7 +4249,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
        for (int j = 0; j < (int) parameters.size(); j++)
            acceptorParamVector[i][j] = (float) parameters[j];
    }
-    acceptors->upload(acceptorVector);
+    acceptors.upload(acceptorVector);
    acceptorParams->setParameterValues(acceptorParamVector);
    info = new ForceInfo(force);
    cu.addForce(info);
@@ -4395,10 +4285,10 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
        else
            throw OpenMMException("CustomHbondForce: CudaPlatform does not support more than four exclusions per acceptor");
    }
-    donorExclusions = CudaArray::create<int4>(cu, numDonors, "customHbondDonorExclusions");
-    acceptorExclusions = CudaArray::create<int4>(cu, numAcceptors, "customHbondAcceptorExclusions");
-    donorExclusions->upload(donorExclusionVector);
-    acceptorExclusions->upload(acceptorExclusionVector);
+    donorExclusions.initialize<int4>(cu, numDonors, "customHbondDonorExclusions");
+    acceptorExclusions.initialize<int4>(cu, numAcceptors, "customHbondAcceptorExclusions");
+    donorExclusions.upload(donorExclusionVector);
+    acceptorExclusions.upload(acceptorExclusionVector);

    // Record the tabulated functions.

@@ -4406,6 +4296,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
    vector<pair<string, string> > functionDefinitions;
    vector<const TabulatedFunction*> functionList;
    stringstream tableArgs;
+    tabulatedFunctions.resize(force.getNumTabulatedFunctions());
    for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
        functionList.push_back(&force.getTabulatedFunction(i));
        string name = force.getTabulatedFunctionName(i);
@@ -4414,8 +4305,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
        functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
        int width;
        vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
-        tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
-        tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
+        tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
+        tabulatedFunctions[i].upload(f);
        tableArgs << ", const float";
        if (width > 1)
            tableArgs << width;
@@ -4430,8 +4321,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
        globalParamNames[i] = force.getGlobalParameterName(i);
        globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
    }
-    if (globals != NULL)
-        globals->upload(globalParamValues);
+    if (globals.isInitialized())
+        globals.upload(globalParamValues);
    map<string, string> variables;
    for (int i = 0; i < force.getNumPerDonorParameters(); i++) {
        const string& name = force.getPerDonorParameterName(i);
@@ -4623,7 +4514,7 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
 double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
    if (numDonors == 0 || numAcceptors == 0)
        return 0.0;
-    if (globals != NULL) {
+    if (globals.isInitialized()) {
        bool changed = false;
        for (int i = 0; i < (int) globalParamNames.size(); i++) {
            float value = (float) context.getParameter(globalParamNames[i]);
@@ -4632,7 +4523,7 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
            globalParamValues[i] = value;
        }
        if (changed)
-            globals->upload(globalParamValues);
+            globals.upload(globalParamValues);
    }
    if (!hasInitializedKernel) {
        hasInitializedKernel = true;
@@ -4640,42 +4531,42 @@ double CudaCalcCustomHbondForceKernel::execute(ContextImpl& context, bool includ
        donorArgs.push_back(&cu.getForce().getDevicePointer());
        donorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
        donorArgs.push_back(&cu.getPosq().getDevicePointer());
-        donorArgs.push_back(&donorExclusions->getDevicePointer());
-        donorArgs.push_back(&donors->getDevicePointer());
-        donorArgs.push_back(&acceptors->getDevicePointer());
+        donorArgs.push_back(&donorExclusions.getDevicePointer());
+        donorArgs.push_back(&donors.getDevicePointer());
+        donorArgs.push_back(&acceptors.getDevicePointer());
        donorArgs.push_back(cu.getPeriodicBoxSizePointer());
        donorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
        donorArgs.push_back(cu.getPeriodicBoxVecXPointer());
        donorArgs.push_back(cu.getPeriodicBoxVecYPointer());
        donorArgs.push_back(cu.getPeriodicBoxVecZPointer());
-        if (globals != NULL)
-            donorArgs.push_back(&globals->getDevicePointer());
+        if (globals.isInitialized())
+            donorArgs.push_back(&globals.getDevicePointer());
        for (auto& buffer : donorParams->getBuffers())
            donorArgs.push_back(&buffer.getMemory());
        for (auto& buffer : acceptorParams->getBuffers())
            donorArgs.push_back(&buffer.getMemory());
-        for (auto function : tabulatedFunctions)
-            donorArgs.push_back(&function->getDevicePointer());
+        for (auto& function : tabulatedFunctions)
+            donorArgs.push_back(&function.getDevicePointer());
        index = 0;
        acceptorArgs.push_back(&cu.getForce().getDevicePointer());
        acceptorArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
        acceptorArgs.push_back(&cu.getPosq().getDevicePointer());
-        acceptorArgs.push_back(&acceptorExclusions->getDevicePointer());
-        acceptorArgs.push_back(&donors->getDevicePointer());
-        acceptorArgs.push_back(&acceptors->getDevicePointer());
+        acceptorArgs.push_back(&acceptorExclusions.getDevicePointer());
+        acceptorArgs.push_back(&donors.getDevicePointer());
+        acceptorArgs.push_back(&acceptors.getDevicePointer());
        acceptorArgs.push_back(cu.getPeriodicBoxSizePointer());
        acceptorArgs.push_back(cu.getInvPeriodicBoxSizePointer());
        acceptorArgs.push_back(cu.getPeriodicBoxVecXPointer());
        acceptorArgs.push_back(cu.getPeriodicBoxVecYPointer());
        acceptorArgs.push_back(cu.getPeriodicBoxVecZPointer());
-        if (globals != NULL)
-            acceptorArgs.push_back(&globals->getDevicePointer());
+        if (globals.isInitialized())
+            acceptorArgs.push_back(&globals.getDevicePointer());
        for (auto& buffer : donorParams->getBuffers())
            acceptorArgs.push_back(&buffer.getMemory());
        for (auto& buffer : acceptorParams->getBuffers())
            acceptorArgs.push_back(&buffer.getMemory());
-        for (auto function : tabulatedFunctions)
-            acceptorArgs.push_back(&function->getDevicePointer());
+        for (auto& function : tabulatedFunctions)
+            acceptorArgs.push_back(&function.getDevicePointer());
    }
    int sharedMemorySize = 3*CudaContext::ThreadBlockSize*sizeof(float4);
    cu.executeKernel(donorKernel, &donorArgs[0], max(numDonors, numAcceptors), CudaContext::ThreadBlockSize, sharedMemorySize);
@@ -4775,22 +4666,6 @@ CudaCalcCustomCentroidBondForceKernel::~CudaCalcCustomCentroidBondForceKernel()
    cu.setAsCurrent();
    if (params != NULL)
        delete params;
-    if (globals != NULL)
-        delete globals;
-    if (groupParticles != NULL)
-        delete groupParticles;
-    if (groupWeights != NULL)
-        delete groupWeights;
-    if (groupOffsets != NULL)
-        delete groupOffsets;
-    if (groupForces != NULL)
-        delete groupForces;
-    if (bondGroups != NULL)
-        delete bondGroups;
-    if (centerPositions != NULL)
-        delete centerPositions;
-    for (auto function : tabulatedFunctions)
-        delete function;
 }

 void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
@@ -4827,22 +4702,22 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
            for (int j = 0; j < normalizedWeights[i].size(); j++)
                groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
    }
-    groupParticles = CudaArray::create<int>(cu, groupParticleVec.size(), "groupParticles");
-    groupParticles->upload(groupParticleVec);
+    groupParticles.initialize<int>(cu, groupParticleVec.size(), "groupParticles");
+    groupParticles.upload(groupParticleVec);
    if (cu.getUseDoublePrecision()) {
-        groupWeights = CudaArray::create<double>(cu, groupParticleVec.size(), "groupWeights");
-        groupWeights->upload(groupWeightVecDouble);
-        centerPositions = CudaArray::create<double4>(cu, numGroups, "centerPositions");
+        groupWeights.initialize<double>(cu, groupParticleVec.size(), "groupWeights");
+        groupWeights.upload(groupWeightVecDouble);
+        centerPositions.initialize<double4>(cu, numGroups, "centerPositions");
    }
    else {
-        groupWeights = CudaArray::create<float>(cu, groupParticleVec.size(), "groupWeights");
-        groupWeights->upload(groupWeightVecFloat);
-        centerPositions = CudaArray::create<float4>(cu, numGroups, "centerPositions");
+        groupWeights.initialize<float>(cu, groupParticleVec.size(), "groupWeights");
+        groupWeights.upload(groupWeightVecFloat);
+        centerPositions.initialize<float4>(cu, numGroups, "centerPositions");
    }
-    groupOffsets = CudaArray::create<int>(cu, groupOffsetVec.size(), "groupOffsets");
-    groupOffsets->upload(groupOffsetVec);
-    groupForces = CudaArray::create<long long>(cu, numGroups*3, "groupForces");
-    cu.addAutoclearBuffer(*groupForces);
+    groupOffsets.initialize<int>(cu, groupOffsetVec.size(), "groupOffsets");
+    groupOffsets.upload(groupOffsetVec);
+    groupForces.initialize<long long>(cu, numGroups*3, "groupForces");
+    cu.addAutoclearBuffer(groupForces);
    
    // Record the bonds.
    
@@ -4861,15 +4736,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
            paramVector[i][j] = (float) parameters[j];
    }
    params->setParameterValues(paramVector);
-    bondGroups = CudaArray::create<int>(cu, bondGroupVec.size(), "bondGroups");
-    bondGroups->upload(bondGroupVec);
+    bondGroups.initialize<int>(cu, bondGroupVec.size(), "bondGroups");
+    bondGroups.upload(bondGroupVec);
    
    // Record the arguments to the force kernel.
    
-    groupForcesArgs.push_back(&groupForces->getDevicePointer());
+    groupForcesArgs.push_back(&groupForces.getDevicePointer());
    groupForcesArgs.push_back(NULL); // Energy buffer hasn't been created yet
-    groupForcesArgs.push_back(&centerPositions->getDevicePointer());
-    groupForcesArgs.push_back(&bondGroups->getDevicePointer());
+    groupForcesArgs.push_back(&centerPositions.getDevicePointer());
+    groupForcesArgs.push_back(&bondGroups.getDevicePointer());
    groupForcesArgs.push_back(cu.getPeriodicBoxSizePointer());
    groupForcesArgs.push_back(cu.getInvPeriodicBoxSizePointer());
    groupForcesArgs.push_back(cu.getPeriodicBoxVecXPointer());
@@ -4885,6 +4760,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
    vector<pair<string, string> > functionDefinitions;
    vector<const TabulatedFunction*> functionList;
    stringstream extraArgs;
+    tabulatedFunctions.resize(force.getNumTabulatedFunctions());
    for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
        functionList.push_back(&force.getTabulatedFunction(i));
        string name = force.getTabulatedFunctionName(i);
@@ -4893,13 +4769,13 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
        functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
        int width;
        vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
-        tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
-        tabulatedFunctions.back()->upload(f);
+        tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
+        tabulatedFunctions[i].upload(f);
        extraArgs << ", const float";
        if (width > 1)
            extraArgs << width;
        extraArgs << "* __restrict__ " << arrayName;
-        groupForcesArgs.push_back(&tabulatedFunctions.back()->getDevicePointer());
+        groupForcesArgs.push_back(&tabulatedFunctions[i].getDevicePointer());
    }
    
    // Record information about parameters.
@@ -4924,15 +4800,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
    if (needEnergyParamDerivs)
        extraArgs << ", mixed* __restrict__ energyParamDerivs";
    if (force.getNumGlobalParameters() > 0) {
-        globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customCentroidBondGlobals");
-        globals->upload(globalParamValues);
+        globals.initialize<float>(cu, force.getNumGlobalParameters(), "customCentroidBondGlobals");
+        globals.upload(globalParamValues);
        extraArgs << ", const float* __restrict__ globals";
        for (int i = 0; i < force.getNumGlobalParameters(); i++) {
            const string& name = force.getGlobalParameterName(i);
            string value = "globals["+cu.intToString(i)+"]";
            variables[name] = value;
        }
-        groupForcesArgs.push_back(&globals->getDevicePointer());
+        groupForcesArgs.push_back(&globals.getDevicePointer());
    }

    // Now to generate the kernel.  First, it needs to calculate all distances, angles,
@@ -5144,7 +5020,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
 double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
    if (numBonds == 0)
        return 0.0;
-    if (globals != NULL) {
+    if (globals.isInitialized()) {
        bool changed = false;
        for (int i = 0; i < (int) globalParamNames.size(); i++) {
            float value = (float) context.getParameter(globalParamNames[i]);
@@ -5153,17 +5029,17 @@ double CudaCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool
            globalParamValues[i] = value;
        }
        if (changed)
-            globals->upload(globalParamValues);
+            globals.upload(globalParamValues);
    }
-    void* computeCentersArgs[] = {&cu.getPosq().getDevicePointer(), &groupParticles->getDevicePointer(), &groupWeights->getDevicePointer(),
-            &groupOffsets->getDevicePointer(), &centerPositions->getDevicePointer()};
+    void* computeCentersArgs[] = {&cu.getPosq().getDevicePointer(), &groupParticles.getDevicePointer(), &groupWeights.getDevicePointer(),
+            &groupOffsets.getDevicePointer(), &centerPositions.getDevicePointer()};
    cu.executeKernel(computeCentersKernel, computeCentersArgs, CudaContext::TileSize*numGroups);
    groupForcesArgs[1] = &cu.getEnergyBuffer().getDevicePointer();
    if (needEnergyParamDerivs)
        groupForcesArgs[9] = &cu.getEnergyParamDerivBuffer().getDevicePointer();
    cu.executeKernel(groupForcesKernel, &groupForcesArgs[0], numBonds);
-    void* applyForcesArgs[] = {&groupParticles->getDevicePointer(), &groupWeights->getDevicePointer(), &groupOffsets->getDevicePointer(),
-            &groupForces->getDevicePointer(), &cu.getForce().getDevicePointer()};
+    void* applyForcesArgs[] = {&groupParticles.getDevicePointer(), &groupWeights.getDevicePointer(), &groupOffsets.getDevicePointer(),
+            &groupForces.getDevicePointer(), &cu.getForce().getDevicePointer()};
    cu.executeKernel(applyForcesKernel, applyForcesArgs, CudaContext::TileSize*numGroups);
    return 0.0;
 }
@@ -5222,10 +5098,6 @@ CudaCalcCustomCompoundBondForceKernel::~CudaCalcCustomCompoundBondForceKernel()
    cu.setAsCurrent();
    if (params != NULL)
        delete params;
-    if (globals != NULL)
-        delete globals;
-    for (auto function : tabulatedFunctions)
-        delete function;
 }

 void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, const CustomCompoundBondForce& force) {
@@ -5256,16 +5128,16 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
    map<string, Lepton::CustomFunction*> functions;
    vector<pair<string, string> > functionDefinitions;
    vector<const TabulatedFunction*> functionList;
+    tabulatedFunctions.resize(force.getNumTabulatedFunctions());
    for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
        functionList.push_back(&force.getTabulatedFunction(i));
        string name = force.getTabulatedFunctionName(i);
        functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
        int width;
        vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
-        CudaArray* array = CudaArray::create<float>(cu, f.size(), "TabulatedFunction");
-        tabulatedFunctions.push_back(array);
-        array->upload(f);
-        string arrayName = cu.getBondedUtilities().addArgument(array->getDevicePointer(), width == 1 ? "float" : "float"+cu.intToString(width));
+        tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
+        tabulatedFunctions[i].upload(f);
+        string arrayName = cu.getBondedUtilities().addArgument(tabulatedFunctions[i].getDevicePointer(), width == 1 ? "float" : "float"+cu.intToString(width));
        functionDefinitions.push_back(make_pair(name, arrayName));
    }
    
@@ -5289,9 +5161,9 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
        variables[name] = "bondParams"+params->getParameterSuffix(i);
    }
    if (force.getNumGlobalParameters() > 0) {
-        globals = CudaArray::create<float>(cu, force.getNumGlobalParameters(), "customCompoundBondGlobals");
-        globals->upload(globalParamValues);
-        string argName = cu.getBondedUtilities().addArgument(globals->getDevicePointer(), "float");
+        globals.initialize<float>(cu, force.getNumGlobalParameters(), "customCompoundBondGlobals");
+        globals.upload(globalParamValues);
+        string argName = cu.getBondedUtilities().addArgument(globals.getDevicePointer(), "float");
        for (int i = 0; i < force.getNumGlobalParameters(); i++) {
            const string& name = force.getGlobalParameterName(i);
            string value = argName+"["+cu.intToString(i)+"]";
@@ -5474,7 +5346,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
 }

 double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-    if (globals != NULL) {
+    if (globals.isInitialized()) {
        bool changed = false;
        for (int i = 0; i < (int) globalParamNames.size(); i++) {
            float value = (float) context.getParameter(globalParamNames[i]);
@@ -5483,7 +5355,7 @@ double CudaCalcCustomCompoundBondForceKernel::execute(ContextImpl& context, bool
            globalParamValues[i] = value;
        }
        if (changed)
-            globals->upload(globalParamValues);
+            globals.upload(globalParamValues);
    }
    return 0.0;
 }
@@ -5553,32 +5425,6 @@ CudaCalcCustomManyParticleForceKernel::~CudaCalcCustomManyParticleForceKernel()
    cu.setAsCurrent();
    if (params != NULL)
        delete params;
-    if (orderIndex != NULL)
-        delete orderIndex;
-    if (particleOrder != NULL)
-        delete particleOrder;
-    if (particleTypes != NULL)
-        delete particleTypes;
-    if (exclusions != NULL)
-        delete exclusions;
-    if (exclusionStartIndex != NULL)
-        delete exclusionStartIndex;
-    if (blockCenter != NULL)
-        delete blockCenter;
-    if (blockBoundingBox != NULL)
-        delete blockBoundingBox;
-    if (neighborPairs != NULL)
-        delete neighborPairs;
-    if (numNeighborPairs != NULL)
-        delete numNeighborPairs;
-    if (neighborStartIndex != NULL)
-        delete neighborStartIndex;
-    if (neighbors != NULL)
-        delete neighbors;
-    if (numNeighborsForAtom != NULL)
-        delete numNeighborsForAtom;
-    for (auto function : tabulatedFunctions)
-        delete function;
 }

 void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, const CustomManyParticleForce& force) {
@@ -5612,6 +5458,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
    vector<pair<string, string> > functionDefinitions;
    vector<const TabulatedFunction*> functionList;
    stringstream tableArgs;
+    tabulatedFunctions.resize(force.getNumTabulatedFunctions());
    for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
        functionList.push_back(&force.getTabulatedFunction(i));
        string name = force.getTabulatedFunctionName(i);
@@ -5620,8 +5467,8 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
        functions[name] = cu.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
        int width;
        vector<float> f = cu.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
-        tabulatedFunctions.push_back(CudaArray::create<float>(cu, f.size(), "TabulatedFunction"));
-        tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
+        tabulatedFunctions[i].initialize<float>(cu, f.size(), "TabulatedFunction");
+        tabulatedFunctions[i].upload(f);
        tableArgs << ", const float";
        if (width > 1)
            tableArgs << width;
@@ -5667,16 +5514,16 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
    CustomManyParticleForceImpl::buildFilterArrays(force, numTypes, particleTypesVec, orderIndexVec, particleOrderVec);
    bool hasTypeFilters = (particleOrderVec.size() > 1);
    if (hasTypeFilters) {
-        particleTypes = CudaArray::create<int>(cu, particleTypesVec.size(), "customManyParticleTypes");
-        orderIndex = CudaArray::create<int>(cu, orderIndexVec.size(), "customManyParticleOrderIndex");
-        particleOrder = CudaArray::create<int>(cu, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
-        particleTypes->upload(particleTypesVec);
-        orderIndex->upload(orderIndexVec);
-        vector<int> flattenedOrder(particleOrder->getSize());
+        particleTypes.initialize<int>(cu, particleTypesVec.size(), "customManyParticleTypes");
+        orderIndex.initialize<int>(cu, orderIndexVec.size(), "customManyParticleOrderIndex");
+        particleOrder.initialize<int>(cu, particleOrderVec.size()*particlesPerSet, "customManyParticleOrder");
+        particleTypes.upload(particleTypesVec);
+        orderIndex.upload(orderIndexVec);
+        vector<int> flattenedOrder(particleOrder.getSize());
        for (int i = 0; i < (int) particleOrderVec.size(); i++)
            for (int j = 0; j < particlesPerSet; j++)
                flattenedOrder[i*particlesPerSet+j] = particleOrderVec[i][j];
-        particleOrder->upload(flattenedOrder);
+        particleOrder.upload(flattenedOrder);
    }
    
    // Build data structures for exclusions.
@@ -5697,10 +5544,10 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
            exclusionsVec.insert(exclusionsVec.end(), particleExclusions[i].begin(), particleExclusions[i].end());
            exclusionStartIndexVec[i+1] = exclusionsVec.size();
        }
-        exclusions = CudaArray::create<int>(cu, exclusionsVec.size(), "customManyParticleExclusions");
-        exclusionStartIndex = CudaArray::create<int>(cu, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
-        exclusions->upload(exclusionsVec);
-        exclusionStartIndex->upload(exclusionStartIndexVec);
+        exclusions.initialize<int>(cu, exclusionsVec.size(), "customManyParticleExclusions");
+        exclusionStartIndex.initialize<int>(cu, exclusionStartIndexVec.size(), "customManyParticleExclusionStart");
+        exclusions.upload(exclusionsVec);
+        exclusionStartIndex.upload(exclusionStartIndexVec);
    }
    
    // Build data structures for the neighbor list.
@@ -5708,19 +5555,19 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
    if (nonbondedMethod != NoCutoff) {
        int numAtomBlocks = cu.getNumAtomBlocks();
        int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
-        blockCenter = new CudaArray(cu, numAtomBlocks, 4*elementSize, "blockCenter");
-        blockBoundingBox = new CudaArray(cu, numAtomBlocks, 4*elementSize, "blockBoundingBox");
-        numNeighborPairs = CudaArray::create<int>(cu, 1, "customManyParticleNumNeighborPairs");
-        neighborStartIndex = CudaArray::create<int>(cu, numParticles+1, "customManyParticleNeighborStartIndex");
-        numNeighborsForAtom = CudaArray::create<int>(cu, numParticles, "customManyParticleNumNeighborsForAtom");
+        blockCenter.initialize(cu, numAtomBlocks, 4*elementSize, "blockCenter");
+        blockBoundingBox.initialize(cu, numAtomBlocks, 4*elementSize, "blockBoundingBox");
+        numNeighborPairs.initialize<int>(cu, 1, "customManyParticleNumNeighborPairs");
+        neighborStartIndex.initialize<int>(cu, numParticles+1, "customManyParticleNeighborStartIndex");
+        numNeighborsForAtom.initialize<int>(cu, numParticles, "customManyParticleNumNeighborsForAtom");
        CHECK_RESULT(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING), "Error creating event for CustomManyParticleForce");

        // Select a size for the array that holds the neighbor list.  We have to make a fairly
        // arbitrary guess, but if this turns out to be too small we'll increase it later.

        maxNeighborPairs = 150*numParticles;
-        neighborPairs = CudaArray::create<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs");
-        neighbors = CudaArray::create<int>(cu, maxNeighborPairs, "customManyParticleNeighbors");
+        neighborPairs.initialize<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs");
+        neighbors.initialize<int>(cu, maxNeighborPairs, "customManyParticleNeighbors");
    }

    // Now to generate the kernel.  First, it needs to calculate all distances, angles,
@@ -6045,22 +5892,22 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
        forceArgs.push_back(cu.getPeriodicBoxVecYPointer());
        forceArgs.push_back(cu.getPeriodicBoxVecZPointer());
        if (nonbondedMethod != NoCutoff) {
-            forceArgs.push_back(&neighbors->getDevicePointer());
-            forceArgs.push_back(&neighborStartIndex->getDevicePointer());
+            forceArgs.push_back(&neighbors.getDevicePointer());
+            forceArgs.push_back(&neighborStartIndex.getDevicePointer());
        }
-        if (particleTypes != NULL) {
-            forceArgs.push_back(&particleTypes->getDevicePointer());
-            forceArgs.push_back(&orderIndex->getDevicePointer());
-            forceArgs.push_back(&particleOrder->getDevicePointer());
+        if (particleTypes.isInitialized()) {
+            forceArgs.push_back(&particleTypes.getDevicePointer());
+            forceArgs.push_back(&orderIndex.getDevicePointer());
+            forceArgs.push_back(&particleOrder.getDevicePointer());
        }
-        if (exclusions != NULL) {
-            forceArgs.push_back(&exclusions->getDevicePointer());
-            forceArgs.push_back(&exclusionStartIndex->getDevicePointer());
+        if (exclusions.isInitialized()) {
+            forceArgs.push_back(&exclusions.getDevicePointer());
+            forceArgs.push_back(&exclusionStartIndex.getDevicePointer());
        }
        for (auto& buffer : params->getBuffers())
            forceArgs.push_back(&buffer.getMemory());
-        for (auto function : tabulatedFunctions)
-            forceArgs.push_back(&function->getDevicePointer());
+        for (auto& function : tabulatedFunctions)
+            forceArgs.push_back(&function.getDevicePointer());
        
        if (nonbondedMethod != NoCutoff) {
            // Set arguments for the block bounds kernel.
@@ -6071,9 +5918,9 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
            blockBoundsArgs.push_back(cu.getPeriodicBoxVecYPointer());
            blockBoundsArgs.push_back(cu.getPeriodicBoxVecZPointer());
            blockBoundsArgs.push_back(&cu.getPosq().getDevicePointer());
-            blockBoundsArgs.push_back(&blockCenter->getDevicePointer());
-            blockBoundsArgs.push_back(&blockBoundingBox->getDevicePointer());
-            blockBoundsArgs.push_back(&numNeighborPairs->getDevicePointer());
+            blockBoundsArgs.push_back(&blockCenter.getDevicePointer());
+            blockBoundsArgs.push_back(&blockBoundingBox.getDevicePointer());
+            blockBoundsArgs.push_back(&numNeighborPairs.getDevicePointer());

            // Set arguments for the neighbor list kernel.

@@ -6083,32 +5930,32 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
            neighborsArgs.push_back(cu.getPeriodicBoxVecYPointer());
            neighborsArgs.push_back(cu.getPeriodicBoxVecZPointer());
            neighborsArgs.push_back(&cu.getPosq().getDevicePointer());
-            neighborsArgs.push_back(&blockCenter->getDevicePointer());
-            neighborsArgs.push_back(&blockBoundingBox->getDevicePointer());
-            neighborsArgs.push_back(&neighborPairs->getDevicePointer());
-            neighborsArgs.push_back(&numNeighborPairs->getDevicePointer());
-            neighborsArgs.push_back(&numNeighborsForAtom->getDevicePointer());
+            neighborsArgs.push_back(&blockCenter.getDevicePointer());
+            neighborsArgs.push_back(&blockBoundingBox.getDevicePointer());
+            neighborsArgs.push_back(&neighborPairs.getDevicePointer());
+            neighborsArgs.push_back(&numNeighborPairs.getDevicePointer());
+            neighborsArgs.push_back(&numNeighborsForAtom.getDevicePointer());
            neighborsArgs.push_back(&maxNeighborPairs);
-            if (exclusions != NULL) {
-                neighborsArgs.push_back(&exclusions->getDevicePointer());
-                neighborsArgs.push_back(&exclusionStartIndex->getDevicePointer());
+            if (exclusions.isInitialized()) {
+                neighborsArgs.push_back(&exclusions.getDevicePointer());
+                neighborsArgs.push_back(&exclusionStartIndex.getDevicePointer());
            }
            
            // Set arguments for the kernel to find neighbor list start indices.
            
-            startIndicesArgs.push_back(&numNeighborsForAtom->getDevicePointer());
-            startIndicesArgs.push_back(&neighborStartIndex->getDevicePointer());
-            startIndicesArgs.push_back(&numNeighborPairs->getDevicePointer());
+            startIndicesArgs.push_back(&numNeighborsForAtom.getDevicePointer());
+            startIndicesArgs.push_back(&neighborStartIndex.getDevicePointer());
+            startIndicesArgs.push_back(&numNeighborPairs.getDevicePointer());
            startIndicesArgs.push_back(&maxNeighborPairs);

            // Set arguments for the kernel to assemble the final neighbor list.
            
-            copyPairsArgs.push_back(&neighborPairs->getDevicePointer());
-            copyPairsArgs.push_back(&neighbors->getDevicePointer());
-            copyPairsArgs.push_back(&numNeighborPairs->getDevicePointer());
+            copyPairsArgs.push_back(&neighborPairs.getDevicePointer());
+            copyPairsArgs.push_back(&neighbors.getDevicePointer());
+            copyPairsArgs.push_back(&numNeighborPairs.getDevicePointer());
            copyPairsArgs.push_back(&maxNeighborPairs);
-            copyPairsArgs.push_back(&numNeighborsForAtom->getDevicePointer());
-            copyPairsArgs.push_back(&neighborStartIndex->getDevicePointer());
+            copyPairsArgs.push_back(&numNeighborsForAtom.getDevicePointer());
+            copyPairsArgs.push_back(&neighborStartIndex.getDevicePointer());
       }
    }
    if (globalParamValues.size() > 0) {
@@ -6131,7 +5978,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
            // We need to make sure there was enough memory for the neighbor list.  Download the
            // information asynchronously so kernels can be running at the same time.

-            numNeighborPairs->download(numPairs, false);
+            numNeighborPairs.download(numPairs, false);
            CHECK_RESULT(cuEventRecord(event, 0), "Error recording event for CustomManyParticleForce");
            cu.executeKernel(startIndicesKernel, &startIndicesArgs[0], 256, 256, 256*sizeof(int));
            cu.executeKernel(copyPairsKernel, &copyPairsArgs[0], maxNeighborPairs);
@@ -6145,17 +5992,13 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
            if (*numPairs > maxNeighborPairs) {
                // Resize the arrays and run the calculation again.

-                delete neighborPairs;
-                neighborPairs = NULL;
-                delete neighbors;
-                neighbors = NULL;
                maxNeighborPairs = (int) (1.1*(*numPairs));
-                neighborPairs = CudaArray::create<int2>(cu, maxNeighborPairs, "customManyParticleNeighborPairs");
-                neighbors = CudaArray::create<int>(cu, maxNeighborPairs, "customManyParticleNeighbors");
-                forceArgs[5] = &neighbors->getDevicePointer();
-                neighborsArgs[5] = &neighborPairs->getDevicePointer();
-                copyPairsArgs[0] = &neighborPairs->getDevicePointer();
-                copyPairsArgs[1] = &neighbors->getDevicePointer();
+                neighborPairs.resize(maxNeighborPairs);
+                neighbors.resize(maxNeighborPairs);
+                forceArgs[5] = &neighbors.getDevicePointer();
+                neighborsArgs[5] = &neighborPairs.getDevicePointer();
+                copyPairsArgs[0] = &neighborPairs.getDevicePointer();
+                copyPairsArgs[1] = &neighbors.getDevicePointer();
                continue;
            }
        }