Finished converting CudaArrays.

372f1724 · Peter Eastman · f15f591b · 372f1724 · 372f1724 · 372f1724
Commit 372f1724 authored Feb 14, 2018 by Peter Eastman
10 changed files
--- a/platforms/cuda/include/CudaKernels.h
+++ b/platforms/cuda/include/CudaKernels.h
@@ -1154,12 +1154,8 @@ private:
 class CudaCalcGayBerneForceKernel : public CalcGayBerneForceKernel {
 public:
    CudaCalcGayBerneForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcGayBerneForceKernel(name, platform), cu(cu),
-            hasInitializedKernels(false), sortedParticles(NULL), axisParticleIndices(NULL), sigParams(NULL), epsParams(NULL), scale(NULL), exceptionParticles(NULL),
-            exceptionParams(NULL), aMatrix(NULL),
-            bMatrix(NULL), gMatrix(NULL), exclusions(NULL), exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighbors(NULL),
-            neighborIndex(NULL), neighborBlockCount(NULL), sortedPos(NULL), torque(NULL) {
+            hasInitializedKernels(false) {
    }
-    ~CudaCalcGayBerneForceKernel();
    /**
     * Initialize the kernel.
     *
@@ -1191,25 +1187,25 @@ private:
    bool hasInitializedKernels;
    int numRealParticles, numExceptions, maxNeighborBlocks;
    GayBerneForce::NonbondedMethod nonbondedMethod;
-    CudaArray* sortedParticles;
-    CudaArray* axisParticleIndices;
-    CudaArray* sigParams;
-    CudaArray* epsParams;
-    CudaArray* scale;
-    CudaArray* exceptionParticles;
-    CudaArray* exceptionParams;
-    CudaArray* aMatrix;
-    CudaArray* bMatrix;
-    CudaArray* gMatrix;
-    CudaArray* exclusions;
-    CudaArray* exclusionStartIndex;
-    CudaArray* blockCenter;
-    CudaArray* blockBoundingBox;
-    CudaArray* neighbors;
-    CudaArray* neighborIndex;
-    CudaArray* neighborBlockCount;
-    CudaArray* sortedPos;
-    CudaArray* torque;
+    CudaArray sortedParticles;
+    CudaArray axisParticleIndices;
+    CudaArray sigParams;
+    CudaArray epsParams;
+    CudaArray scale;
+    CudaArray exceptionParticles;
+    CudaArray exceptionParams;
+    CudaArray aMatrix;
+    CudaArray bMatrix;
+    CudaArray gMatrix;
+    CudaArray exclusions;
+    CudaArray exclusionStartIndex;
+    CudaArray blockCenter;
+    CudaArray blockBoundingBox;
+    CudaArray neighbors;
+    CudaArray neighborIndex;
+    CudaArray neighborBlockCount;
+    CudaArray sortedPos;
+    CudaArray torque;
    std::vector<bool> isRealParticle;
    std::vector<std::pair<int, int> > exceptionAtoms;
    std::vector<std::pair<int, int> > excludedPairs;
@@ -1224,9 +1220,8 @@ private:
 class CudaCalcCustomCVForceKernel : public CalcCustomCVForceKernel {
 public:
    CudaCalcCustomCVForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcCustomCVForceKernel(name, platform),
-            cu(cu), hasInitializedListeners(false), invAtomOrder(NULL), innerInvAtomOrder(NULL) {
+            cu(cu), hasInitializedListeners(false) {
    }
-    ~CudaCalcCustomCVForceKernel();
    /**
     * Initialize the kernel.
     *
@@ -1260,9 +1255,9 @@ private:
    std::vector<std::string> variableNames, paramDerivNames, globalParameterNames;
    std::vector<Lepton::ExpressionProgram> variableDerivExpressions;
    std::vector<Lepton::ExpressionProgram> paramDerivExpressions;
-    std::vector<CudaArray*> cvForces;
-    CudaArray* invAtomOrder;
-    CudaArray* innerInvAtomOrder;
+    std::vector<CudaArray> cvForces;
+    CudaArray invAtomOrder;
+    CudaArray innerInvAtomOrder;
    CUfunction copyStateKernel, copyForcesKernel, addForcesKernel;
 };

@@ -1271,10 +1266,8 @@ private:
 */
 class CudaCalcRMSDForceKernel : public CalcRMSDForceKernel {
 public:
-    CudaCalcRMSDForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcRMSDForceKernel(name, platform),
-            cu(cu), referencePos(NULL), particles(NULL), buffer(NULL) {
+    CudaCalcRMSDForceKernel(std::string name, const Platform& platform, CudaContext& cu) : CalcRMSDForceKernel(name, platform), cu(cu) {
    }
-    ~CudaCalcRMSDForceKernel();
    /**
     * Initialize the kernel.
     *
@@ -1313,9 +1306,9 @@ private:
    CudaContext& cu;
    ForceInfo* info;
    double sumNormRef;
-    CudaArray* referencePos;
-    CudaArray* particles;
-    CudaArray* buffer;
+    CudaArray referencePos;
+    CudaArray particles;
+    CudaArray buffer;
    CUfunction kernel1, kernel2;
 };

@@ -1326,7 +1319,6 @@ class CudaIntegrateVerletStepKernel : public IntegrateVerletStepKernel {
 public:
    CudaIntegrateVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVerletStepKernel(name, platform), cu(cu) {
    }
-    ~CudaIntegrateVerletStepKernel();
    /**
     * Initialize the kernel.
     *
@@ -1358,9 +1350,8 @@ private:
 */
 class CudaIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
 public:
-    CudaIntegrateLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateLangevinStepKernel(name, platform), cu(cu), params(NULL) {
+    CudaIntegrateLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateLangevinStepKernel(name, platform), cu(cu) {
    }
-    ~CudaIntegrateLangevinStepKernel();
    /**
     * Initialize the kernel, setting up the particle masses.
     *
@@ -1385,7 +1376,7 @@ public:
 private:
    CudaContext& cu;
    double prevTemp, prevFriction, prevStepSize;
-    CudaArray* params;
+    CudaArray params;
    CUfunction kernel1, kernel2;
 };

@@ -1396,7 +1387,6 @@ class CudaIntegrateBrownianStepKernel : public IntegrateBrownianStepKernel {
 public:
    CudaIntegrateBrownianStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateBrownianStepKernel(name, platform), cu(cu) {
    }
-    ~CudaIntegrateBrownianStepKernel();
    /**
     * Initialize the kernel.
     *
@@ -1431,7 +1421,6 @@ class CudaIntegrateVariableVerletStepKernel : public IntegrateVariableVerletStep
 public:
    CudaIntegrateVariableVerletStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableVerletStepKernel(name, platform), cu(cu) {
    }
-    ~CudaIntegrateVariableVerletStepKernel();
    /**
     * Initialize the kernel.
     *
@@ -1466,10 +1455,8 @@ private:
 */
 class CudaIntegrateVariableLangevinStepKernel : public IntegrateVariableLangevinStepKernel {
 public:
-    CudaIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableLangevinStepKernel(name, platform),
-            cu(cu), params(NULL) {
+    CudaIntegrateVariableLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateVariableLangevinStepKernel(name, platform), cu(cu) {
    }
-    ~CudaIntegrateVariableLangevinStepKernel();
    /**
     * Initialize the kernel, setting up the particle masses.
     *
@@ -1496,7 +1483,7 @@ public:
 private:
    CudaContext& cu;
    int blockSize;
-    CudaArray* params;
+    CudaArray params;
    CUfunction kernel1, kernel2, selectSizeKernel;
    double prevTemp, prevFriction, prevErrorTol;
 };
@@ -1508,8 +1495,7 @@ class CudaIntegrateCustomStepKernel : public IntegrateCustomStepKernel {
 public:
    enum GlobalTargetType {DT, VARIABLE, PARAMETER};
    CudaIntegrateCustomStepKernel(std::string name, const Platform& platform, CudaContext& cu) : IntegrateCustomStepKernel(name, platform), cu(cu),
-            hasInitializedKernels(false), localValuesAreCurrent(false), globalValues(NULL), sumBuffer(NULL), summedValue(NULL), uniformRandoms(NULL),
-            randomSeed(NULL), perDofEnergyParamDerivs(NULL), perDofValues(NULL), needsEnergyParamDerivs(false) {
+            hasInitializedKernels(false), localValuesAreCurrent(false), perDofValues(NULL), needsEnergyParamDerivs(false) {
    }
    ~CudaIntegrateCustomStepKernel();
    /**
@@ -1590,15 +1576,15 @@ private:
    int numGlobalVariables, sumWorkGroupSize;
    bool hasInitializedKernels, deviceValuesAreCurrent, deviceGlobalsAreCurrent, modifiesParameters, keNeedsForce, hasAnyConstraints, needsEnergyParamDerivs;
    mutable bool localValuesAreCurrent;
-    CudaArray* globalValues;
-    CudaArray* sumBuffer;
-    CudaArray* summedValue;
-    CudaArray* uniformRandoms;
-    CudaArray* randomSeed;
-    CudaArray* perDofEnergyParamDerivs;
-    std::vector<CudaArray*> tabulatedFunctions;
+    CudaArray globalValues;
+    CudaArray sumBuffer;
+    CudaArray summedValue;
+    CudaArray uniformRandoms;
+    CudaArray randomSeed;
+    CudaArray perDofEnergyParamDerivs;
+    std::vector<CudaArray> tabulatedFunctions;
    std::map<int, double> savedEnergy;
-    std::map<int, CudaArray*> savedForces;
+    std::map<int, CudaArray> savedForces;
    std::set<int> validSavedForces;
    CudaParameterSet* perDofValues;
    mutable std::vector<std::vector<float> > localPerDofValuesFloat;
@@ -1651,10 +1637,8 @@ public:
 */
 class CudaApplyAndersenThermostatKernel : public ApplyAndersenThermostatKernel {
 public:
-    CudaApplyAndersenThermostatKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyAndersenThermostatKernel(name, platform), cu(cu),
-            atomGroups(NULL) {
+    CudaApplyAndersenThermostatKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyAndersenThermostatKernel(name, platform), cu(cu) {
    }
-    ~CudaApplyAndersenThermostatKernel();
    /**
     * Initialize the kernel.
     *
@@ -1671,7 +1655,7 @@ public:
 private:
    CudaContext& cu;
    int randomSeed;
-    CudaArray* atomGroups;
+    CudaArray atomGroups;
    CUfunction kernel;
 };

@@ -1681,9 +1665,8 @@ private:
 class CudaApplyMonteCarloBarostatKernel : public ApplyMonteCarloBarostatKernel {
 public:
    CudaApplyMonteCarloBarostatKernel(std::string name, const Platform& platform, CudaContext& cu) : ApplyMonteCarloBarostatKernel(name, platform), cu(cu),
-            hasInitializedKernels(false), savedPositions(NULL), savedForces(NULL), moleculeAtoms(NULL), moleculeStartIndex(NULL) {
+            hasInitializedKernels(false) {
    }
-    ~CudaApplyMonteCarloBarostatKernel();
    /**
     * Initialize the kernel.
     *
@@ -1715,10 +1698,10 @@ private:
    CudaContext& cu;
    bool hasInitializedKernels;
    int numMolecules;
-    CudaArray* savedPositions;
-    CudaArray* savedForces;
-    CudaArray* moleculeAtoms;
-    CudaArray* moleculeStartIndex;
+    CudaArray savedPositions;
+    CudaArray savedForces;
+    CudaArray moleculeAtoms;
+    CudaArray moleculeStartIndex;
    CUfunction kernel;
    std::vector<int> lastAtomOrder;
 };
@@ -1728,9 +1711,8 @@ private:
 */
 class CudaRemoveCMMotionKernel : public RemoveCMMotionKernel {
 public:
-    CudaRemoveCMMotionKernel(std::string name, const Platform& platform, CudaContext& cu) : RemoveCMMotionKernel(name, platform), cu(cu), cmMomentum(NULL) {
+    CudaRemoveCMMotionKernel(std::string name, const Platform& platform, CudaContext& cu) : RemoveCMMotionKernel(name, platform), cu(cu) {
    }
-    ~CudaRemoveCMMotionKernel();
    /**
     * Initialize the kernel, setting up the particle masses.
     *
@@ -1747,7 +1729,7 @@ public:
 private:
    CudaContext& cu;
    int frequency;
-    CudaArray* cmMomentum;
+    CudaArray cmMomentum;
    CUfunction kernel1, kernel2;
 };


--- a/platforms/cuda/include/CudaParallelKernels.h
+++ b/platforms/cuda/include/CudaParallelKernels.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2015 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -84,7 +84,7 @@ private:
    std::vector<long long> completionTimes;
    std::vector<double> contextNonbondedFractions;
    int2* interactionCounts;
-    CudaArray* contextForces;
+    CudaArray contextForces;
    void* pinnedPositionBuffer;
    long long* pinnedForceBuffer;
    CUfunction sumKernel;

--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
--- a/platforms/cuda/src/CudaParallelKernels.cpp
+++ b/platforms/cuda/src/CudaParallelKernels.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2015 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -142,15 +142,13 @@ private:

 CudaParallelCalcForcesAndEnergyKernel::CudaParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, CudaPlatform::PlatformData& data) :
        CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextNonbondedFractions(data.contexts.size()),
-        interactionCounts(NULL), contextForces(NULL), pinnedPositionBuffer(NULL), pinnedForceBuffer(NULL) {
+        interactionCounts(NULL), pinnedPositionBuffer(NULL), pinnedForceBuffer(NULL) {
    for (int i = 0; i < (int) data.contexts.size(); i++)
        kernels.push_back(Kernel(new CudaCalcForcesAndEnergyKernel(name, platform, *data.contexts[i])));
 }

 CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel() {
    data.contexts[0]->setAsCurrent();
-    if (contextForces != NULL)
-        delete contextForces;
    if (pinnedPositionBuffer != NULL)
        cuMemFreeHost(pinnedPositionBuffer);
    if (pinnedForceBuffer != NULL)
@@ -179,8 +177,8 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
 void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
    CudaContext& cu = *data.contexts[0];
    cu.setAsCurrent();
-    if (contextForces == NULL) {
-        contextForces = CudaArray::create<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces");
+    if (!contextForces.isInitialized()) {
+        contextForces.initialize<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces");
        CHECK_RESULT(cuMemHostAlloc((void**) &pinnedForceBuffer, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms()*sizeof(long long), CU_MEMHOSTALLOC_PORTABLE), "Error allocating pinned memory");
        CHECK_RESULT(cuMemHostAlloc(&pinnedPositionBuffer, cu.getPaddedNumAtoms()*(cu.getUseDoublePrecision() ? sizeof(double4) : sizeof(float4)), CU_MEMHOSTALLOC_PORTABLE), "Error allocating pinned memory");
    }
@@ -211,7 +209,7 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
    for (int i = 0; i < (int) data.contexts.size(); i++) {
        CudaContext& cu = *data.contexts[i];
        CudaContext::WorkThread& thread = cu.getWorkThread();
-        thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, *contextForces, valid, interactionCounts[i]));
+        thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, contextForces, valid, interactionCounts[i]));
    }
    data.syncContexts();
    double energy = 0.0;
@@ -222,10 +220,10 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
        
        CudaContext& cu = *data.contexts[0];
        if (!cu.getPlatformData().peerAccessSupported)
-            contextForces->upload(pinnedForceBuffer, false);
+            contextForces.upload(pinnedForceBuffer, false);
        int bufferSize = 3*cu.getPaddedNumAtoms();
        int numBuffers = data.contexts.size()-1;
-        void* args[] = {&cu.getForce().getDevicePointer(), &contextForces->getDevicePointer(), &bufferSize, &numBuffers};
+        void* args[] = {&cu.getForce().getDevicePointer(), &contextForces.getDevicePointer(), &bufferSize, &numBuffers};
        cu.executeKernel(sumKernel, args, bufferSize);
        
        // Balance work between the contexts by transferring a little nonbonded work from the context that

--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2008-2015 Stanford University and the Authors.      *
+ * Portions copyright (c) 2008-2018 Stanford University and the Authors.      *
 * Authors: Mark Friedrichs, Peter Eastman                                    *
 * Contributors:                                                              *
 *                                                                            *
@@ -48,7 +48,6 @@ public:
                                          const Platform& platform,
                                          CudaContext& cu,
                                          const System& system);
-    ~CudaCalcAmoebaBondForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -77,7 +76,7 @@ private:
    int numBonds;
    CudaContext& cu;
    const System& system;
-    CudaArray* params;
+    CudaArray params;
 };

 /**
@@ -86,7 +85,6 @@ private:
 class CudaCalcAmoebaAngleForceKernel : public CalcAmoebaAngleForceKernel {
 public:
    CudaCalcAmoebaAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
-    ~CudaCalcAmoebaAngleForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -115,7 +113,7 @@ private:
    int numAngles;
    CudaContext& cu;
    const System& system;
-    CudaArray* params;
+    CudaArray params;
 };

 /**
@@ -124,7 +122,6 @@ private:
 class CudaCalcAmoebaInPlaneAngleForceKernel : public CalcAmoebaInPlaneAngleForceKernel {
 public:
    CudaCalcAmoebaInPlaneAngleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
-    ~CudaCalcAmoebaInPlaneAngleForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -153,7 +150,7 @@ private:
    int numAngles;
    CudaContext& cu;
    const System& system;
-    CudaArray* params;
+    CudaArray params;
 };

 /**
@@ -162,7 +159,6 @@ private:
 class CudaCalcAmoebaPiTorsionForceKernel : public CalcAmoebaPiTorsionForceKernel {
 public:
    CudaCalcAmoebaPiTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
-    ~CudaCalcAmoebaPiTorsionForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -191,7 +187,7 @@ private:
    int numPiTorsions;
    CudaContext& cu;
    const System& system;
-    CudaArray* params;
+    CudaArray params;
 };

 /**
@@ -200,7 +196,6 @@ private:
 class CudaCalcAmoebaStretchBendForceKernel : public CalcAmoebaStretchBendForceKernel {
 public:
    CudaCalcAmoebaStretchBendForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
-    ~CudaCalcAmoebaStretchBendForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -229,8 +224,8 @@ private:
    int numStretchBends;
    CudaContext& cu;
    const System& system;
-    CudaArray* params1; // Equilibrium values
-    CudaArray* params2; // force constants
+    CudaArray params1; // Equilibrium values
+    CudaArray params2; // force constants
 };

 /**
@@ -239,7 +234,6 @@ private:
 class CudaCalcAmoebaOutOfPlaneBendForceKernel : public CalcAmoebaOutOfPlaneBendForceKernel {
 public:
    CudaCalcAmoebaOutOfPlaneBendForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
-    ~CudaCalcAmoebaOutOfPlaneBendForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -268,7 +262,7 @@ private:
    int numOutOfPlaneBends;
    CudaContext& cu;
    const System& system;
-    CudaArray* params;
+    CudaArray params;
 };

 /**
@@ -277,7 +271,6 @@ private:
 class CudaCalcAmoebaTorsionTorsionForceKernel : public CalcAmoebaTorsionTorsionForceKernel {
 public:
    CudaCalcAmoebaTorsionTorsionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
-    ~CudaCalcAmoebaTorsionTorsionForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -300,9 +293,9 @@ private:
    int numTorsionTorsionGrids;
    CudaContext& cu;
    const System& system;
-    CudaArray* gridValues;
-    CudaArray* gridParams;
-    CudaArray* torsionParams;
+    CudaArray gridValues;
+    CudaArray gridParams;
+    CudaArray torsionParams;
 };

 /**
@@ -414,58 +407,58 @@ private:
    const System& system;
    std::vector<int3> covalentFlagValues;
    std::vector<int2> polarizationFlagValues;
-    CudaArray* multipoleParticles;
-    CudaArray* molecularDipoles;
-    CudaArray* molecularQuadrupoles;
-    CudaArray* labFrameDipoles;
-    CudaArray* labFrameQuadrupoles;
-    CudaArray* sphericalDipoles;
-    CudaArray* sphericalQuadrupoles;
-    CudaArray* fracDipoles;
-    CudaArray* fracQuadrupoles;
-    CudaArray* field;
-    CudaArray* fieldPolar;
-    CudaArray* inducedField;
-    CudaArray* inducedFieldPolar;
-    CudaArray* torque;
-    CudaArray* dampingAndThole;
-    CudaArray* inducedDipole;
-    CudaArray* inducedDipolePolar;
-    CudaArray* inducedDipoleErrors;
-    CudaArray* prevDipoles;
-    CudaArray* prevDipolesPolar;
-    CudaArray* prevDipolesGk;
-    CudaArray* prevDipolesGkPolar;
-    CudaArray* prevErrors;
-    CudaArray* diisMatrix;
-    CudaArray* diisCoefficients;
-    CudaArray* extrapolatedDipole;
-    CudaArray* extrapolatedDipolePolar;
-    CudaArray* extrapolatedDipoleGk;
-    CudaArray* extrapolatedDipoleGkPolar;
-    CudaArray* inducedDipoleFieldGradient;
-    CudaArray* inducedDipoleFieldGradientPolar;
-    CudaArray* inducedDipoleFieldGradientGk;
-    CudaArray* inducedDipoleFieldGradientGkPolar;
-    CudaArray* extrapolatedDipoleFieldGradient;
-    CudaArray* extrapolatedDipoleFieldGradientPolar;
-    CudaArray* extrapolatedDipoleFieldGradientGk;
-    CudaArray* extrapolatedDipoleFieldGradientGkPolar;
-    CudaArray* polarizability;
-    CudaArray* covalentFlags;
-    CudaArray* polarizationGroupFlags;
-    CudaArray* pmeGrid;
-    CudaArray* pmeBsplineModuliX;
-    CudaArray* pmeBsplineModuliY;
-    CudaArray* pmeBsplineModuliZ;
-    CudaArray* pmeIgrid;
-    CudaArray* pmePhi;
-    CudaArray* pmePhid;
-    CudaArray* pmePhip;
-    CudaArray* pmePhidp;
-    CudaArray* pmeCphi;
-    CudaArray* pmeAtomRange;
-    CudaArray* lastPositions;
+    CudaArray multipoleParticles;
+    CudaArray molecularDipoles;
+    CudaArray molecularQuadrupoles;
+    CudaArray labFrameDipoles;
+    CudaArray labFrameQuadrupoles;
+    CudaArray sphericalDipoles;
+    CudaArray sphericalQuadrupoles;
+    CudaArray fracDipoles;
+    CudaArray fracQuadrupoles;
+    CudaArray field;
+    CudaArray fieldPolar;
+    CudaArray inducedField;
+    CudaArray inducedFieldPolar;
+    CudaArray torque;
+    CudaArray dampingAndThole;
+    CudaArray inducedDipole;
+    CudaArray inducedDipolePolar;
+    CudaArray inducedDipoleErrors;
+    CudaArray prevDipoles;
+    CudaArray prevDipolesPolar;
+    CudaArray prevDipolesGk;
+    CudaArray prevDipolesGkPolar;
+    CudaArray prevErrors;
+    CudaArray diisMatrix;
+    CudaArray diisCoefficients;
+    CudaArray extrapolatedDipole;
+    CudaArray extrapolatedDipolePolar;
+    CudaArray extrapolatedDipoleGk;
+    CudaArray extrapolatedDipoleGkPolar;
+    CudaArray inducedDipoleFieldGradient;
+    CudaArray inducedDipoleFieldGradientPolar;
+    CudaArray inducedDipoleFieldGradientGk;
+    CudaArray inducedDipoleFieldGradientGkPolar;
+    CudaArray extrapolatedDipoleFieldGradient;
+    CudaArray extrapolatedDipoleFieldGradientPolar;
+    CudaArray extrapolatedDipoleFieldGradientGk;
+    CudaArray extrapolatedDipoleFieldGradientGkPolar;
+    CudaArray polarizability;
+    CudaArray covalentFlags;
+    CudaArray polarizationGroupFlags;
+    CudaArray pmeGrid;
+    CudaArray pmeBsplineModuliX;
+    CudaArray pmeBsplineModuliY;
+    CudaArray pmeBsplineModuliZ;
+    CudaArray pmeIgrid;
+    CudaArray pmePhi;
+    CudaArray pmePhid;
+    CudaArray pmePhip;
+    CudaArray pmePhidp;
+    CudaArray pmeCphi;
+    CudaArray pmeAtomRange;
+    CudaArray lastPositions;
    CudaSort* sort;
    cufftHandle fft;
    CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel;
@@ -486,7 +479,6 @@ private:
 class CudaCalcAmoebaGeneralizedKirkwoodForceKernel : public CalcAmoebaGeneralizedKirkwoodForceKernel {
 public:
    CudaCalcAmoebaGeneralizedKirkwoodForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
-    ~CudaCalcAmoebaGeneralizedKirkwoodForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -511,22 +503,22 @@ public:
     * Perform the final parts of the force/energy computation.
     */
    void finishComputation(CudaArray& torque, CudaArray& labFrameDipoles, CudaArray& labFrameQuadrupoles, CudaArray& inducedDipole, CudaArray& inducedDipolePolar, CudaArray& dampingAndThole, CudaArray& covalentFlags, CudaArray& polarizationGroupFlags);
-    CudaArray* getBornRadii() {
+    CudaArray& getBornRadii() {
        return bornRadii;
    }
-    CudaArray* getField() {
+    CudaArray& getField() {
        return field;
    }
-    CudaArray* getInducedField() {
+    CudaArray& getInducedField() {
        return inducedField;
    }
-    CudaArray* getInducedFieldPolar() {
+    CudaArray& getInducedFieldPolar() {
        return inducedFieldPolar;
    }
-    CudaArray* getInducedDipoles() {
+    CudaArray& getInducedDipoles() {
        return inducedDipoleS;
    }
-    CudaArray* getInducedDipolesPolar() {
+    CudaArray& getInducedDipolesPolar() {
        return inducedDipolePolarS;
    }
    /**
@@ -544,15 +536,15 @@ private:
    int computeBornSumThreads, gkForceThreads, chainRuleThreads, ediffThreads;
    AmoebaMultipoleForce::PolarizationType polarizationType;
    std::map<std::string, std::string> defines;
-    CudaArray* params;
-    CudaArray* bornSum;
-    CudaArray* bornRadii;
-    CudaArray* bornForce;
-    CudaArray* field;
-    CudaArray* inducedField;
-    CudaArray* inducedFieldPolar;
-    CudaArray* inducedDipoleS;
-    CudaArray* inducedDipolePolarS;
+    CudaArray params;
+    CudaArray bornSum;
+    CudaArray bornRadii;
+    CudaArray bornForce;
+    CudaArray field;
+    CudaArray inducedField;
+    CudaArray inducedFieldPolar;
+    CudaArray inducedDipoleS;
+    CudaArray inducedDipolePolarS;
    CUfunction computeBornSumKernel, reduceBornSumKernel, surfaceAreaKernel, gkForceKernel, chainRuleKernel, ediffKernel;
 };

@@ -592,11 +584,11 @@ private:
    const System& system;
    bool hasInitializedNonbonded;
    double dispersionCoefficient;
-    CudaArray* sigmaEpsilon;
-    CudaArray* bondReductionAtoms;
-    CudaArray* bondReductionFactors;
-    CudaArray* tempPosq;
-    CudaArray* tempForces;
+    CudaArray sigmaEpsilon;
+    CudaArray bondReductionAtoms;
+    CudaArray bondReductionFactors;
+    CudaArray tempPosq;
+    CudaArray tempForces;
    CudaNonbondedUtilities* nonbonded;
    CUfunction prepareKernel, spreadKernel;
 };
@@ -607,7 +599,6 @@ private:
 class CudaCalcAmoebaWcaDispersionForceKernel : public CalcAmoebaWcaDispersionForceKernel {
 public:
    CudaCalcAmoebaWcaDispersionForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system);
-    ~CudaCalcAmoebaWcaDispersionForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -636,7 +627,7 @@ private:
    CudaContext& cu;
    const System& system;
    double totalMaximumDispersionEnergy;
-    CudaArray* radiusEpsilon;
+    CudaArray radiusEpsilon;
    CUfunction forceKernel;
 };


--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernels.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013-2015 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -100,14 +100,6 @@ private:
    const DrudeForce& force;
 };

-CudaCalcDrudeForceKernel::~CudaCalcDrudeForceKernel() {
-    cu.setAsCurrent();
-    if (particleParams != NULL)
-        delete particleParams;
-    if (pairParams != NULL)
-        delete pairParams;
-}
-
 void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce& force) {
    cu.setAsCurrent();
    int numContexts = cu.getPlatformData().contexts.size();
@@ -118,7 +110,7 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce
        // Create the harmonic interaction .
        
        vector<vector<int> > atoms(numParticles, vector<int>(5));
-        particleParams = CudaArray::create<float4>(cu, numParticles, "drudeParticleParams");
+        particleParams.initialize<float4>(cu, numParticles, "drudeParticleParams");
        vector<float4> paramVector(numParticles);
        for (int i = 0; i < numParticles; i++) {
            double charge, polarizability, aniso12, aniso34;
@@ -140,9 +132,9 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce
            }
            paramVector[i] = make_float4((float) k1, (float) k2, (float) k3, 0.0f);
        }
-        particleParams->upload(paramVector);
+        particleParams.upload(paramVector);
        map<string, string> replacements;
-        replacements["PARAMS"] = cu.getBondedUtilities().addArgument(particleParams->getDevicePointer(), "float4");
+        replacements["PARAMS"] = cu.getBondedUtilities().addArgument(particleParams.getDevicePointer(), "float4");
        cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudeParticleForce, replacements), force.getForceGroup());
    }
    int startPairIndex = cu.getContextIndex()*force.getNumScreenedPairs()/numContexts;
@@ -152,7 +144,7 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce
        // Create the screened interaction between dipole pairs.
        
        vector<vector<int> > atoms(numPairs, vector<int>(4));
-        pairParams = CudaArray::create<float2>(cu, numPairs, "drudePairParams");
+        pairParams.initialize<float2>(cu, numPairs, "drudePairParams");
        vector<float2> paramVector(numPairs);
        for (int i = 0; i < numPairs; i++) {
            int drude1, drude2;
@@ -166,9 +158,9 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce
            double energyScale = ONE_4PI_EPS0*charge1*charge2;
            paramVector[i] = make_float2((float) screeningScale, (float) energyScale);
        }
-        pairParams->upload(paramVector);
+        pairParams.upload(paramVector);
        map<string, string> replacements;
-        replacements["PARAMS"] = cu.getBondedUtilities().addArgument(pairParams->getDevicePointer(), "float2");
+        replacements["PARAMS"] = cu.getBondedUtilities().addArgument(pairParams.getDevicePointer(), "float2");
        cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaDrudeKernelSources::drudePairForce, replacements), force.getForceGroup());
    }
    cu.addForce(new CudaDrudeForceInfo(force));
@@ -187,7 +179,7 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con
    int endParticleIndex = (cu.getContextIndex()+1)*force.getNumParticles()/numContexts;
    int numParticles = endParticleIndex-startParticleIndex;
    if (numParticles > 0) {
-        if (particleParams == NULL || numParticles != particleParams->getSize())
+        if (!particleParams.isInitialized() || numParticles != particleParams.getSize())
            throw OpenMMException("updateParametersInContext: The number of Drude particles has changed");
        vector<float4> paramVector(numParticles);
        for (int i = 0; i < numParticles; i++) {
@@ -206,7 +198,7 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con
                k2 = 0;
            paramVector[i] = make_float4((float) k1, (float) k2, (float) k3, 0.0f);
        }
-        particleParams->upload(paramVector);
+        particleParams.upload(paramVector);
    }
    
    // Set the pair parameters.
@@ -215,7 +207,7 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con
    int endPairIndex = (cu.getContextIndex()+1)*force.getNumScreenedPairs()/numContexts;
    int numPairs = endPairIndex-startPairIndex;
    if (numPairs > 0) {
-        if (pairParams == NULL || numPairs != pairParams->getSize())
+        if (!pairParams.isInitialized() || numPairs != pairParams.getSize())
            throw OpenMMException("updateParametersInContext: The number of screened pairs has changed");
        vector<float2> paramVector(numPairs);
        for (int i = 0; i < numPairs; i++) {
@@ -230,17 +222,10 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con
            double energyScale = ONE_4PI_EPS0*charge1*charge2;
            paramVector[i] = make_float2((float) screeningScale, (float) energyScale);
        }
-        pairParams->upload(paramVector);
+        pairParams.upload(paramVector);
    }
 }

-CudaIntegrateDrudeLangevinStepKernel::~CudaIntegrateDrudeLangevinStepKernel() {
-    if (normalParticles != NULL)
-        delete normalParticles;
-    if (pairParticles != NULL)
-        delete pairParticles;
-}
-
 void CudaIntegrateDrudeLangevinStepKernel::initialize(const System& system, const DrudeLangevinIntegrator& integrator, const DrudeForce& force) {
    cu.getPlatformData().initializeContexts(system);
    cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
@@ -261,12 +246,12 @@ void CudaIntegrateDrudeLangevinStepKernel::initialize(const System& system, cons
        pairParticleVec.push_back(make_int2(p, p1));
    }
    normalParticleVec.insert(normalParticleVec.begin(), particles.begin(), particles.end());
-    normalParticles = CudaArray::create<int>(cu, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
-    pairParticles = CudaArray::create<int2>(cu, max((int) pairParticleVec.size(), 1), "drudePairParticles");
+    normalParticles.initialize<int>(cu, max((int) normalParticleVec.size(), 1), "drudeNormalParticles");
+    pairParticles.initialize<int2>(cu, max((int) pairParticleVec.size(), 1), "drudePairParticles");
    if (normalParticleVec.size() > 0)
-        normalParticles->upload(normalParticleVec);
+        normalParticles.upload(normalParticleVec);
    if (pairParticleVec.size() > 0)
-        pairParticles->upload(pairParticleVec);
+        pairParticles.upload(pairParticleVec);

    // Create kernels.
    
@@ -345,9 +330,9 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D

    // Call the first integration kernel.

-    int randomIndex = integration.prepareRandomNumbers(normalParticles->getSize()+2*pairParticles->getSize());
+    int randomIndex = integration.prepareRandomNumbers(normalParticles.getSize()+2*pairParticles.getSize());
    void* args1[] = {&cu.getVelm().getDevicePointer(), &cu.getForce().getDevicePointer(), &integration.getPosDelta().getDevicePointer(),
-            &normalParticles->getDevicePointer(), &pairParticles->getDevicePointer(), &integration.getStepSize().getDevicePointer(),
+            &normalParticles.getDevicePointer(), &pairParticles.getDevicePointer(), &integration.getStepSize().getDevicePointer(),
            vscalePtr, fscalePtr, noisescalePtr, vscaleDrudePtr, fscaleDrudePtr, noisescaleDrudePtr, &integration.getRandom().getDevicePointer(), &randomIndex};
    cu.executeKernel(kernel1, args1, numAtoms);

@@ -366,8 +351,8 @@ void CudaIntegrateDrudeLangevinStepKernel::execute(ContextImpl& context, const D
    
    if (maxDrudeDistance > 0) {
        void* hardwallArgs[] = {&cu.getPosq().getDevicePointer(), &posCorrection, &cu.getVelm().getDevicePointer(),
-                &pairParticles->getDevicePointer(), &integration.getStepSize().getDevicePointer(), maxDrudeDistancePtr, hardwallscaleDrudePtr};
-        cu.executeKernel(hardwallKernel, hardwallArgs, pairParticles->getSize());
+                &pairParticles.getDevicePointer(), &integration.getStepSize().getDevicePointer(), maxDrudeDistancePtr, hardwallscaleDrudePtr};
+        cu.executeKernel(hardwallKernel, hardwallArgs, pairParticles.getSize());
    }
    integration.computeVirtualSites();


--- a/plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
+++ b/plugins/drude/platforms/cuda/src/CudaDrudeKernels.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013-2015 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -45,9 +45,8 @@ namespace OpenMM {
 class CudaCalcDrudeForceKernel : public CalcDrudeForceKernel {
 public:
    CudaCalcDrudeForceKernel(std::string name, const Platform& platform, CudaContext& cu) :
-            CalcDrudeForceKernel(name, platform), cu(cu), particleParams(NULL), pairParams(NULL) {
+            CalcDrudeForceKernel(name, platform), cu(cu) {
    }
-    ~CudaCalcDrudeForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -73,8 +72,8 @@ public:
    void copyParametersToContext(ContextImpl& context, const DrudeForce& force);
 private:
    CudaContext& cu;
-    CudaArray* particleParams;
-    CudaArray* pairParams;
+    CudaArray particleParams;
+    CudaArray pairParams;
 };

 /**
@@ -83,9 +82,8 @@ private:
 class CudaIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
 public:
    CudaIntegrateDrudeLangevinStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
-            IntegrateDrudeLangevinStepKernel(name, platform), cu(cu), normalParticles(NULL), pairParticles(NULL) {
+            IntegrateDrudeLangevinStepKernel(name, platform), cu(cu) {
    }
-    ~CudaIntegrateDrudeLangevinStepKernel();
    /**
     * Initialize the kernel.
     *
@@ -111,8 +109,8 @@ public:
 private:
    CudaContext& cu;
    double prevStepSize;
-    CudaArray* normalParticles;
-    CudaArray* pairParticles;
+    CudaArray normalParticles;
+    CudaArray pairParticles;
    CUfunction kernel1, kernel2, hardwallKernel;
 };


--- a/plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
+++ b/plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2013 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -62,19 +62,6 @@ static int findFFTDimension(int minimum) {
    }
 }

-CudaIntegrateRPMDStepKernel::~CudaIntegrateRPMDStepKernel() {
-    if (forces != NULL)
-        delete forces;
-    if (positions != NULL)
-        delete positions;
-    if (velocities != NULL)
-        delete velocities;
-    if (contractedForces != NULL)
-        delete contractedForces;
-    if (contractedPositions != NULL)
-        delete contractedPositions;
-}
-
 void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
    cu.getPlatformData().initializeContexts(system);
    numCopies = integrator.getNumCopies();
@@ -85,30 +72,30 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
    int paddedParticles = cu.getPaddedNumAtoms();
    bool useDoublePrecision = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision());
    int elementSize = (useDoublePrecision ? sizeof(double4) : sizeof(float4));
-    forces = CudaArray::create<long long>(cu, numCopies*paddedParticles*3, "rpmdForces");
-    positions = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdPositions");
-    velocities = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdVelocities");
+    forces.initialize<long long>(cu, numCopies*paddedParticles*3, "rpmdForces");
+    positions.initialize(cu, numCopies*paddedParticles, elementSize, "rpmdPositions");
+    velocities.initialize(cu, numCopies*paddedParticles, elementSize, "rpmdVelocities");
    cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
    
    // Fill in the posq and velm arrays with safe values to avoid a risk of nans.
    
    if (useDoublePrecision) {
-        vector<double4> temp(positions->getSize());
-        for (int i = 0; i < positions->getSize(); i++)
+        vector<double4> temp(positions.getSize());
+        for (int i = 0; i < positions.getSize(); i++)
            temp[i] = make_double4(0, 0, 0, 0);
-        positions->upload(temp);
-        for (int i = 0; i < velocities->getSize(); i++)
+        positions.upload(temp);
+        for (int i = 0; i < velocities.getSize(); i++)
            temp[i] = make_double4(0, 0, 0, 1);
-        velocities->upload(temp);
+        velocities.upload(temp);
    }
    else {
-        vector<float4> temp(positions->getSize());
-        for (int i = 0; i < positions->getSize(); i++)
+        vector<float4> temp(positions.getSize());
+        for (int i = 0; i < positions.getSize(); i++)
            temp[i] = make_float4(0, 0, 0, 0);
-        positions->upload(temp);
-        for (int i = 0; i < velocities->getSize(); i++)
+        positions.upload(temp);
+        for (int i = 0; i < velocities.getSize(); i++)
            temp[i] = make_float4(0, 0, 0, 1);
-        velocities->upload(temp);
+        velocities.upload(temp);
    }
    
    // Build a list of contractions.
@@ -137,8 +124,8 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
        }
    }
    if (maxContractedCopies > 0) {
-        contractedForces = CudaArray::create<long long>(cu, maxContractedCopies*paddedParticles*3, "rpmdContractedForces");
-        contractedPositions = new CudaArray(cu, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
+        contractedForces.initialize<long long>(cu, maxContractedCopies*paddedParticles*3, "rpmdContractedForces");
+        contractedPositions.initialize(cu, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
    }

    // Create kernels.
@@ -204,13 +191,13 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
    float frictionFloat = (float) friction;
    void* frictionPtr = (useDoublePrecision ? (void*) &friction : (void*) &frictionFloat);
    int randomIndex = integration.prepareRandomNumbers(numParticles*numCopies);
-    void* pileArgs[] = {&velocities->getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex, dtPtr, kTPtr, frictionPtr};
+    void* pileArgs[] = {&velocities.getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex, dtPtr, kTPtr, frictionPtr};
    if (integrator.getApplyThermostat())
        cu.executeKernel(pileKernel, pileArgs, numParticles*numCopies, workgroupSize);

    // Update positions and velocities.
    
-    void* stepArgs[] = {&positions->getDevicePointer(), &velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr, kTPtr};
+    void* stepArgs[] = {&positions.getDevicePointer(), &velocities.getDevicePointer(), &forces.getDevicePointer(), dtPtr, kTPtr};
    cu.executeKernel(stepKernel, stepArgs, numParticles*numCopies, workgroupSize);

    // Calculate forces based on the updated positions.
@@ -219,7 +206,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
    
    // Update velocities.

-    void* velocitiesArgs[] = {&velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr};
+    void* velocitiesArgs[] = {&velocities.getDevicePointer(), &forces.getDevicePointer(), dtPtr};
    cu.executeKernel(velocitiesKernel, velocitiesArgs, numParticles*numCopies, workgroupSize);

    // Apply the PILE-L thermostat again.
@@ -239,7 +226,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
        // the same translation to all the beads.

        int i = numCopies-1;
-        void* args[] = {&positions->getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
+        void* args[] = {&positions.getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
        cu.executeKernel(translateKernel, args, cu.getNumAtoms());
    }
 }
@@ -248,7 +235,7 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
    // Compute forces from all groups that didn't have a specified contraction.

    for (int i = 0; i < numCopies; i++) {
-        void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(),
+        void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
                &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
        cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
        context.computeVirtualSites();
@@ -260,8 +247,8 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
        if (initialBox[0] != finalBox[0] || initialBox[1] != finalBox[1] || initialBox[2] != finalBox[2])
            throw OpenMMException("Standard barostats cannot be used with RPMDIntegrator.  Use RPMDMonteCarloBarostat instead.");
        context.calcForcesAndEnergy(true, false, groupsNotContracted);
-        void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces->getDevicePointer(), &cu.getVelm().getDevicePointer(),
-                &velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
+        void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces.getDevicePointer(), &cu.getVelm().getDevicePointer(),
+                &velocities.getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions.getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
        cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms());
    }
    
@@ -273,32 +260,32 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
        
        // Find the contracted positions.
        
-        void* contractPosArgs[] = {&positions->getDevicePointer(), &contractedPositions->getDevicePointer()};
+        void* contractPosArgs[] = {&positions.getDevicePointer(), &contractedPositions.getDevicePointer()};
        cu.executeKernel(positionContractionKernels[copies], contractPosArgs, numParticles*numCopies, workgroupSize);

        // Compute forces.

        for (int i = 0; i < copies; i++) {
-            void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &contractedPositions->getDevicePointer(),
+            void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &contractedPositions.getDevicePointer(),
                    &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
            cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
            context.computeVirtualSites();
            context.calcForcesAndEnergy(true, false, groupFlags);
-            void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &contractedForces->getDevicePointer(), &cu.getVelm().getDevicePointer(),
-                   &velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &contractedPositions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
+            void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &contractedForces.getDevicePointer(), &cu.getVelm().getDevicePointer(),
+                   &velocities.getDevicePointer(), &cu.getPosq().getDevicePointer(), &contractedPositions.getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
            cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms());
        }
        
        // Apply the forces to the original copies.
        
-        void* contractForceArgs[] = {&forces->getDevicePointer(), &contractedForces->getDevicePointer()};
+        void* contractForceArgs[] = {&forces.getDevicePointer(), &contractedForces.getDevicePointer()};
        cu.executeKernel(forceContractionKernels[copies], contractForceArgs, numParticles*numCopies, workgroupSize);
    }
    if (groupsByCopies.size() > 0) {
        // Ensure the Context contains the positions from the last copy, since we'll assume that later.
        
        int i = numCopies-1;
-        void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(),
+        void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
                &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
        cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
    }
@@ -309,7 +296,7 @@ double CudaIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context, c
 }

 void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
-    if (positions == NULL)
+    if (!positions.isInitialized())
        throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
    if (pos.size() != numParticles)
        throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
@@ -332,7 +319,7 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
        cu.getPosq().download(posq);
        for (int i = 0; i < numParticles; i++)
            posq[i] = make_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w);
-        result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
+        result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
    }
    else if (cu.getUseMixedPrecision()) {
        vector<float4> posqf(cu.getPaddedNumAtoms());
@@ -340,24 +327,24 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
        vector<double4> posq(cu.getPaddedNumAtoms());
        for (int i = 0; i < numParticles; i++)
            posq[i] = make_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w);
-        result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
+        result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
    }
    else {
        vector<float4> posq(cu.getPaddedNumAtoms());
        cu.getPosq().download(posq);
        for (int i = 0; i < numParticles; i++)
            posq[i] = make_float4((float) offsetPos[i][0], (float) offsetPos[i][1], (float) offsetPos[i][2], posq[i].w);
-        result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &posq[0], numParticles*sizeof(float4));
+        result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &posq[0], numParticles*sizeof(float4));
    }
    if (result != CUDA_SUCCESS) {
        std::stringstream str;
-        str<<"Error uploading array "<<positions->getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
+        str<<"Error uploading array "<<positions.getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
        throw OpenMMException(str.str());
    }
 }

 void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) {
-    if (velocities == NULL)
+    if (!velocities.isInitialized())
        throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
    if (vel.size() != numParticles)
        throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
@@ -367,24 +354,24 @@ void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& ve
        cu.getVelm().download(velm);
        for (int i = 0; i < numParticles; i++)
            velm[i] = make_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w);
-        result = cuMemcpyHtoD(velocities->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &velm[0], numParticles*sizeof(double4));
+        result = cuMemcpyHtoD(velocities.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &velm[0], numParticles*sizeof(double4));
    }
    else {
        vector<float4> velm(cu.getPaddedNumAtoms());
        cu.getVelm().download(velm);
        for (int i = 0; i < numParticles; i++)
            velm[i] = make_float4((float) vel[i][0], (float) vel[i][1], (float) vel[i][2], velm[i].w);
-        result = cuMemcpyHtoD(velocities->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &velm[0], numParticles*sizeof(float4));
+        result = cuMemcpyHtoD(velocities.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &velm[0], numParticles*sizeof(float4));
    }
    if (result != CUDA_SUCCESS) {
        std::stringstream str;
-        str<<"Error uploading array "<<velocities->getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
+        str<<"Error uploading array "<<velocities.getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
        throw OpenMMException(str.str());
    }
 }

 void CudaIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
-    void* copyArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(),
+    void* copyArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
            &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &copy};
    cu.executeKernel(copyToContextKernel, copyArgs, cu.getNumAtoms());
 }

--- a/plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
+++ b/plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2013 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -46,9 +46,8 @@ namespace OpenMM {
 class CudaIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
 public:
    CudaIntegrateRPMDStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
-            IntegrateRPMDStepKernel(name, platform), cu(cu), forces(NULL), positions(NULL), velocities(NULL), contractedForces(NULL), contractedPositions(NULL) {
+            IntegrateRPMDStepKernel(name, platform), cu(cu) {
    }
-    ~CudaIntegrateRPMDStepKernel();
    /**
     * Initialize the kernel.
     *
@@ -91,11 +90,11 @@ private:
    int numCopies, numParticles, workgroupSize;
    std::map<int, int> groupsByCopies;
    int groupsNotContracted;
-    CudaArray* forces;
-    CudaArray* positions;
-    CudaArray* velocities;
-    CudaArray* contractedForces;
-    CudaArray* contractedPositions;
+    CudaArray forces;
+    CudaArray positions;
+    CudaArray velocities;
+    CudaArray contractedForces;
+    CudaArray contractedPositions;
    CUfunction pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
    std::map<int, CUfunction> positionContractionKernels;
    std::map<int, CUfunction> forceContractionKernels;