Merge pull request #1992 from peastman/arrays

Simplification to CudaArray and OpenCLArray

Merge pull request #1992 from peastman/arrays
Simplification to CudaArray and OpenCLArray
d9756688 · peastman · GitHub · 979525c7 · c75dba47 · d9756688
Unverified Commit d9756688 authored Feb 16, 2018 by peastman Committed by GitHub Feb 16, 2018
5 changed files
--- a/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
+++ b/plugins/drude/platforms/opencl/src/OpenCLDrudeKernels.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2013-2015 Stanford University and the Authors.      *
+ * Portions copyright (c) 2013-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -45,9 +45,8 @@ namespace OpenMM {
 class OpenCLCalcDrudeForceKernel : public CalcDrudeForceKernel {
 public:
    OpenCLCalcDrudeForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
-            CalcDrudeForceKernel(name, platform), cl(cl), particleParams(NULL), pairParams(NULL) {
+            CalcDrudeForceKernel(name, platform), cl(cl) {
    }
-    ~OpenCLCalcDrudeForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -73,8 +72,8 @@ public:
    void copyParametersToContext(ContextImpl& context, const DrudeForce& force);
 private:
    OpenCLContext& cl;
-    OpenCLArray* particleParams;
-    OpenCLArray* pairParams;
+    OpenCLArray particleParams;
+    OpenCLArray pairParams;
 };

 /**
@@ -83,9 +82,8 @@ private:
 class OpenCLIntegrateDrudeLangevinStepKernel : public IntegrateDrudeLangevinStepKernel {
 public:
    OpenCLIntegrateDrudeLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
-            IntegrateDrudeLangevinStepKernel(name, platform), cl(cl), hasInitializedKernels(false), normalParticles(NULL), pairParticles(NULL) {
+            IntegrateDrudeLangevinStepKernel(name, platform), cl(cl), hasInitializedKernels(false) {
    }
-    ~OpenCLIntegrateDrudeLangevinStepKernel();
    /**
     * Initialize the kernel.
     *
@@ -112,8 +110,8 @@ private:
    OpenCLContext& cl;
    bool hasInitializedKernels;
    double prevStepSize;
-    OpenCLArray* normalParticles;
-    OpenCLArray* pairParticles;
+    OpenCLArray normalParticles;
+    OpenCLArray pairParticles;
    cl::Kernel kernel1, kernel2, hardwallKernel;
 };


--- a/plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
+++ b/plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2013 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -62,19 +62,6 @@ static int findFFTDimension(int minimum) {
    }
 }

-CudaIntegrateRPMDStepKernel::~CudaIntegrateRPMDStepKernel() {
-    if (forces != NULL)
-        delete forces;
-    if (positions != NULL)
-        delete positions;
-    if (velocities != NULL)
-        delete velocities;
-    if (contractedForces != NULL)
-        delete contractedForces;
-    if (contractedPositions != NULL)
-        delete contractedPositions;
-}
-
 void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
    cu.getPlatformData().initializeContexts(system);
    numCopies = integrator.getNumCopies();
@@ -85,30 +72,30 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
    int paddedParticles = cu.getPaddedNumAtoms();
    bool useDoublePrecision = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision());
    int elementSize = (useDoublePrecision ? sizeof(double4) : sizeof(float4));
-    forces = CudaArray::create<long long>(cu, numCopies*paddedParticles*3, "rpmdForces");
-    positions = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdPositions");
-    velocities = new CudaArray(cu, numCopies*paddedParticles, elementSize, "rpmdVelocities");
+    forces.initialize<long long>(cu, numCopies*paddedParticles*3, "rpmdForces");
+    positions.initialize(cu, numCopies*paddedParticles, elementSize, "rpmdPositions");
+    velocities.initialize(cu, numCopies*paddedParticles, elementSize, "rpmdVelocities");
    cu.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
    
    // Fill in the posq and velm arrays with safe values to avoid a risk of nans.
    
    if (useDoublePrecision) {
-        vector<double4> temp(positions->getSize());
-        for (int i = 0; i < positions->getSize(); i++)
+        vector<double4> temp(positions.getSize());
+        for (int i = 0; i < positions.getSize(); i++)
            temp[i] = make_double4(0, 0, 0, 0);
-        positions->upload(temp);
-        for (int i = 0; i < velocities->getSize(); i++)
+        positions.upload(temp);
+        for (int i = 0; i < velocities.getSize(); i++)
            temp[i] = make_double4(0, 0, 0, 1);
-        velocities->upload(temp);
+        velocities.upload(temp);
    }
    else {
-        vector<float4> temp(positions->getSize());
-        for (int i = 0; i < positions->getSize(); i++)
+        vector<float4> temp(positions.getSize());
+        for (int i = 0; i < positions.getSize(); i++)
            temp[i] = make_float4(0, 0, 0, 0);
-        positions->upload(temp);
-        for (int i = 0; i < velocities->getSize(); i++)
+        positions.upload(temp);
+        for (int i = 0; i < velocities.getSize(); i++)
            temp[i] = make_float4(0, 0, 0, 1);
-        velocities->upload(temp);
+        velocities.upload(temp);
    }
    
    // Build a list of contractions.
@@ -137,8 +124,8 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
        }
    }
    if (maxContractedCopies > 0) {
-        contractedForces = CudaArray::create<long long>(cu, maxContractedCopies*paddedParticles*3, "rpmdContractedForces");
-        contractedPositions = new CudaArray(cu, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
+        contractedForces.initialize<long long>(cu, maxContractedCopies*paddedParticles*3, "rpmdContractedForces");
+        contractedPositions.initialize(cu, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
    }

    // Create kernels.
@@ -204,13 +191,13 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
    float frictionFloat = (float) friction;
    void* frictionPtr = (useDoublePrecision ? (void*) &friction : (void*) &frictionFloat);
    int randomIndex = integration.prepareRandomNumbers(numParticles*numCopies);
-    void* pileArgs[] = {&velocities->getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex, dtPtr, kTPtr, frictionPtr};
+    void* pileArgs[] = {&velocities.getDevicePointer(), &integration.getRandom().getDevicePointer(), &randomIndex, dtPtr, kTPtr, frictionPtr};
    if (integrator.getApplyThermostat())
        cu.executeKernel(pileKernel, pileArgs, numParticles*numCopies, workgroupSize);

    // Update positions and velocities.
    
-    void* stepArgs[] = {&positions->getDevicePointer(), &velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr, kTPtr};
+    void* stepArgs[] = {&positions.getDevicePointer(), &velocities.getDevicePointer(), &forces.getDevicePointer(), dtPtr, kTPtr};
    cu.executeKernel(stepKernel, stepArgs, numParticles*numCopies, workgroupSize);

    // Calculate forces based on the updated positions.
@@ -219,7 +206,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
    
    // Update velocities.

-    void* velocitiesArgs[] = {&velocities->getDevicePointer(), &forces->getDevicePointer(), dtPtr};
+    void* velocitiesArgs[] = {&velocities.getDevicePointer(), &forces.getDevicePointer(), dtPtr};
    cu.executeKernel(velocitiesKernel, velocitiesArgs, numParticles*numCopies, workgroupSize);

    // Apply the PILE-L thermostat again.
@@ -239,7 +226,7 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
        // the same translation to all the beads.

        int i = numCopies-1;
-        void* args[] = {&positions->getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
+        void* args[] = {&positions.getDevicePointer(), &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
        cu.executeKernel(translateKernel, args, cu.getNumAtoms());
    }
 }
@@ -248,7 +235,7 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
    // Compute forces from all groups that didn't have a specified contraction.

    for (int i = 0; i < numCopies; i++) {
-        void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(),
+        void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
                &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
        cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
        context.computeVirtualSites();
@@ -260,8 +247,8 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
        if (initialBox[0] != finalBox[0] || initialBox[1] != finalBox[1] || initialBox[2] != finalBox[2])
            throw OpenMMException("Standard barostats cannot be used with RPMDIntegrator.  Use RPMDMonteCarloBarostat instead.");
        context.calcForcesAndEnergy(true, false, groupsNotContracted);
-        void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces->getDevicePointer(), &cu.getVelm().getDevicePointer(),
-                &velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
+        void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &forces.getDevicePointer(), &cu.getVelm().getDevicePointer(),
+                &velocities.getDevicePointer(), &cu.getPosq().getDevicePointer(), &positions.getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
        cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms());
    }
    
@@ -273,32 +260,32 @@ void CudaIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
        
        // Find the contracted positions.
        
-        void* contractPosArgs[] = {&positions->getDevicePointer(), &contractedPositions->getDevicePointer()};
+        void* contractPosArgs[] = {&positions.getDevicePointer(), &contractedPositions.getDevicePointer()};
        cu.executeKernel(positionContractionKernels[copies], contractPosArgs, numParticles*numCopies, workgroupSize);

        // Compute forces.

        for (int i = 0; i < copies; i++) {
-            void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &contractedPositions->getDevicePointer(),
+            void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &contractedPositions.getDevicePointer(),
                    &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
            cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
            context.computeVirtualSites();
            context.calcForcesAndEnergy(true, false, groupFlags);
-            void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &contractedForces->getDevicePointer(), &cu.getVelm().getDevicePointer(),
-                   &velocities->getDevicePointer(), &cu.getPosq().getDevicePointer(), &contractedPositions->getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
+            void* copyFromContextArgs[] = {&cu.getForce().getDevicePointer(), &contractedForces.getDevicePointer(), &cu.getVelm().getDevicePointer(),
+                   &velocities.getDevicePointer(), &cu.getPosq().getDevicePointer(), &contractedPositions.getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
            cu.executeKernel(copyFromContextKernel, copyFromContextArgs, cu.getNumAtoms());
        }
        
        // Apply the forces to the original copies.
        
-        void* contractForceArgs[] = {&forces->getDevicePointer(), &contractedForces->getDevicePointer()};
+        void* contractForceArgs[] = {&forces.getDevicePointer(), &contractedForces.getDevicePointer()};
        cu.executeKernel(forceContractionKernels[copies], contractForceArgs, numParticles*numCopies, workgroupSize);
    }
    if (groupsByCopies.size() > 0) {
        // Ensure the Context contains the positions from the last copy, since we'll assume that later.
        
        int i = numCopies-1;
-        void* copyToContextArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(),
+        void* copyToContextArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
                &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &i};
        cu.executeKernel(copyToContextKernel, copyToContextArgs, cu.getNumAtoms());
    }
@@ -309,7 +296,7 @@ double CudaIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context, c
 }

 void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
-    if (positions == NULL)
+    if (!positions.isInitialized())
        throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
    if (pos.size() != numParticles)
        throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
@@ -332,7 +319,7 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
        cu.getPosq().download(posq);
        for (int i = 0; i < numParticles; i++)
            posq[i] = make_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w);
-        result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
+        result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
    }
    else if (cu.getUseMixedPrecision()) {
        vector<float4> posqf(cu.getPaddedNumAtoms());
@@ -340,24 +327,24 @@ void CudaIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos
        vector<double4> posq(cu.getPaddedNumAtoms());
        for (int i = 0; i < numParticles; i++)
            posq[i] = make_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w);
-        result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
+        result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &posq[0], numParticles*sizeof(double4));
    }
    else {
        vector<float4> posq(cu.getPaddedNumAtoms());
        cu.getPosq().download(posq);
        for (int i = 0; i < numParticles; i++)
            posq[i] = make_float4((float) offsetPos[i][0], (float) offsetPos[i][1], (float) offsetPos[i][2], posq[i].w);
-        result = cuMemcpyHtoD(positions->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &posq[0], numParticles*sizeof(float4));
+        result = cuMemcpyHtoD(positions.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &posq[0], numParticles*sizeof(float4));
    }
    if (result != CUDA_SUCCESS) {
        std::stringstream str;
-        str<<"Error uploading array "<<positions->getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
+        str<<"Error uploading array "<<positions.getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
        throw OpenMMException(str.str());
    }
 }

 void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) {
-    if (velocities == NULL)
+    if (!velocities.isInitialized())
        throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
    if (vel.size() != numParticles)
        throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
@@ -367,24 +354,24 @@ void CudaIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& ve
        cu.getVelm().download(velm);
        for (int i = 0; i < numParticles; i++)
            velm[i] = make_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w);
-        result = cuMemcpyHtoD(velocities->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &velm[0], numParticles*sizeof(double4));
+        result = cuMemcpyHtoD(velocities.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(double4), &velm[0], numParticles*sizeof(double4));
    }
    else {
        vector<float4> velm(cu.getPaddedNumAtoms());
        cu.getVelm().download(velm);
        for (int i = 0; i < numParticles; i++)
            velm[i] = make_float4((float) vel[i][0], (float) vel[i][1], (float) vel[i][2], velm[i].w);
-        result = cuMemcpyHtoD(velocities->getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &velm[0], numParticles*sizeof(float4));
+        result = cuMemcpyHtoD(velocities.getDevicePointer()+copy*cu.getPaddedNumAtoms()*sizeof(float4), &velm[0], numParticles*sizeof(float4));
    }
    if (result != CUDA_SUCCESS) {
        std::stringstream str;
-        str<<"Error uploading array "<<velocities->getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
+        str<<"Error uploading array "<<velocities.getName()<<": "<<CudaContext::getErrorString(result)<<" ("<<result<<")";
        throw OpenMMException(str.str());
    }
 }

 void CudaIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
-    void* copyArgs[] = {&velocities->getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions->getDevicePointer(),
+    void* copyArgs[] = {&velocities.getDevicePointer(), &cu.getVelm().getDevicePointer(), &positions.getDevicePointer(),
            &cu.getPosq().getDevicePointer(), &cu.getAtomIndexArray().getDevicePointer(), &copy};
    cu.executeKernel(copyToContextKernel, copyArgs, cu.getNumAtoms());
 }

--- a/plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
+++ b/plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2013 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -46,9 +46,8 @@ namespace OpenMM {
 class CudaIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
 public:
    CudaIntegrateRPMDStepKernel(std::string name, const Platform& platform, CudaContext& cu) :
-            IntegrateRPMDStepKernel(name, platform), cu(cu), forces(NULL), positions(NULL), velocities(NULL), contractedForces(NULL), contractedPositions(NULL) {
+            IntegrateRPMDStepKernel(name, platform), cu(cu) {
    }
-    ~CudaIntegrateRPMDStepKernel();
    /**
     * Initialize the kernel.
     *
@@ -91,11 +90,11 @@ private:
    int numCopies, numParticles, workgroupSize;
    std::map<int, int> groupsByCopies;
    int groupsNotContracted;
-    CudaArray* forces;
-    CudaArray* positions;
-    CudaArray* velocities;
-    CudaArray* contractedForces;
-    CudaArray* contractedPositions;
+    CudaArray forces;
+    CudaArray positions;
+    CudaArray velocities;
+    CudaArray contractedForces;
+    CudaArray contractedPositions;
    CUfunction pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
    std::map<int, CUfunction> positionContractionKernels;
    std::map<int, CUfunction> forceContractionKernels;

--- a/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
+++ b/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
@@ -6,7 +6,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2013 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -41,19 +41,6 @@
 using namespace OpenMM;
 using namespace std;

-OpenCLIntegrateRPMDStepKernel::~OpenCLIntegrateRPMDStepKernel() {
-    if (forces != NULL)
-        delete forces;
-    if (positions != NULL)
-        delete positions;
-    if (velocities != NULL)
-        delete velocities;
-    if (contractedForces != NULL)
-        delete contractedForces;
-    if (contractedPositions != NULL)
-        delete contractedPositions;
-}
-
 void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDIntegrator& integrator) {
    cl.getPlatformData().initializeContexts(system);
    numCopies = integrator.getNumCopies();
@@ -63,32 +50,32 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
        throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5.");
    int paddedParticles = cl.getPaddedNumAtoms();
    int forceElementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
-    forces = new OpenCLArray(cl, numCopies*paddedParticles, forceElementSize, "rpmdForces");
+    forces.initialize(cl, numCopies*paddedParticles, forceElementSize, "rpmdForces");
    bool useDoublePrecision = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision());
    int elementSize = (useDoublePrecision ? sizeof(mm_double4) : sizeof(mm_float4));
-    positions = new OpenCLArray(cl, numCopies*paddedParticles, elementSize, "rpmdPositions");
-    velocities = new OpenCLArray(cl, numCopies*paddedParticles, elementSize, "rpmdVelocities");
+    positions.initialize(cl, numCopies*paddedParticles, elementSize, "rpmdPositions");
+    velocities.initialize(cl, numCopies*paddedParticles, elementSize, "rpmdVelocities");
    cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
    
    // Fill in the posq and velm arrays with safe values to avoid a risk of nans.
    
    if (useDoublePrecision) {
-        vector<mm_double4> temp(positions->getSize());
-        for (int i = 0; i < positions->getSize(); i++)
+        vector<mm_double4> temp(positions.getSize());
+        for (int i = 0; i < positions.getSize(); i++)
            temp[i] = mm_double4(0, 0, 0, 0);
-        positions->upload(temp);
-        for (int i = 0; i < velocities->getSize(); i++)
+        positions.upload(temp);
+        for (int i = 0; i < velocities.getSize(); i++)
            temp[i] = mm_double4(0, 0, 0, 1);
-        velocities->upload(temp);
+        velocities.upload(temp);
    }
    else {
-        vector<mm_float4> temp(positions->getSize());
-        for (int i = 0; i < positions->getSize(); i++)
+        vector<mm_float4> temp(positions.getSize());
+        for (int i = 0; i < positions.getSize(); i++)
            temp[i] = mm_float4(0, 0, 0, 0);
-        positions->upload(temp);
-        for (int i = 0; i < velocities->getSize(); i++)
+        positions.upload(temp);
+        for (int i = 0; i < velocities.getSize(); i++)
            temp[i] = mm_float4(0, 0, 0, 1);
-        velocities->upload(temp);
+        velocities.upload(temp);
    }
    
    // Build a list of contractions.
@@ -117,8 +104,8 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
        }
    }
    if (maxContractedCopies > 0) {
-        contractedForces = new OpenCLArray(cl, maxContractedCopies*paddedParticles, forceElementSize, "rpmdContractedForces");
-        contractedPositions = new OpenCLArray(cl, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
+        contractedForces.initialize(cl, maxContractedCopies*paddedParticles, forceElementSize, "rpmdContractedForces");
+        contractedPositions.initialize(cl, maxContractedCopies*paddedParticles, elementSize, "rpmdContractedPositions");
    }

    // Create kernels.
@@ -164,30 +151,30 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI

 void OpenCLIntegrateRPMDStepKernel::initializeKernels(ContextImpl& context) {
    hasInitializedKernel = true;
-    pileKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
-    stepKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
-    stepKernel.setArg<cl::Buffer>(1, velocities->getDeviceBuffer());
-    stepKernel.setArg<cl::Buffer>(2, forces->getDeviceBuffer());
-    velocitiesKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
-    velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
-    translateKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
+    pileKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
+    stepKernel.setArg<cl::Buffer>(0, positions.getDeviceBuffer());
+    stepKernel.setArg<cl::Buffer>(1, velocities.getDeviceBuffer());
+    stepKernel.setArg<cl::Buffer>(2, forces.getDeviceBuffer());
+    velocitiesKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
+    velocitiesKernel.setArg<cl::Buffer>(1, forces.getDeviceBuffer());
+    translateKernel.setArg<cl::Buffer>(0, positions.getDeviceBuffer());
    translateKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
    translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
-    copyToContextKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
+    copyToContextKernel.setArg<cl::Buffer>(0, velocities.getDeviceBuffer());
    copyToContextKernel.setArg<cl::Buffer>(1, cl.getVelm().getDeviceBuffer());
    copyToContextKernel.setArg<cl::Buffer>(3, cl.getPosq().getDeviceBuffer());
    copyToContextKernel.setArg<cl::Buffer>(4, cl.getAtomIndexArray().getDeviceBuffer());
    copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
    copyFromContextKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
-    copyFromContextKernel.setArg<cl::Buffer>(3, velocities->getDeviceBuffer());
+    copyFromContextKernel.setArg<cl::Buffer>(3, velocities.getDeviceBuffer());
    copyFromContextKernel.setArg<cl::Buffer>(4, cl.getPosq().getDeviceBuffer());
    copyFromContextKernel.setArg<cl::Buffer>(6, cl.getAtomIndexArray().getDeviceBuffer());
    for (auto& g : groupsByCopies) {
        int copies = g.first;
-        positionContractionKernels[copies].setArg<cl::Buffer>(0, positions->getDeviceBuffer());
-        positionContractionKernels[copies].setArg<cl::Buffer>(1, contractedPositions->getDeviceBuffer());
-        forceContractionKernels[copies].setArg<cl::Buffer>(0, forces->getDeviceBuffer());
-        forceContractionKernels[copies].setArg<cl::Buffer>(1, contractedForces->getDeviceBuffer());
+        positionContractionKernels[copies].setArg<cl::Buffer>(0, positions.getDeviceBuffer());
+        positionContractionKernels[copies].setArg<cl::Buffer>(1, contractedPositions.getDeviceBuffer());
+        forceContractionKernels[copies].setArg<cl::Buffer>(0, forces.getDeviceBuffer());
+        forceContractionKernels[copies].setArg<cl::Buffer>(1, contractedForces.getDeviceBuffer());
    }
 }

@@ -261,9 +248,9 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
 void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
    // Compute forces from all groups that didn't have a specified contraction.

-    copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer());
-    copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
-    copyFromContextKernel.setArg<cl::Buffer>(5, positions->getDeviceBuffer());
+    copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
+    copyFromContextKernel.setArg<cl::Buffer>(1, forces.getDeviceBuffer());
+    copyFromContextKernel.setArg<cl::Buffer>(5, positions.getDeviceBuffer());
    for (int i = 0; i < numCopies; i++) {
        copyToContextKernel.setArg<cl_int>(5, i);
        cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
@@ -283,9 +270,9 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
    // Now loop over contractions and compute forces from them.
    
    if (groupsByCopies.size() > 0) {
-        copyToContextKernel.setArg<cl::Buffer>(2, contractedPositions->getDeviceBuffer());
-        copyFromContextKernel.setArg<cl::Buffer>(1, contractedForces->getDeviceBuffer());
-        copyFromContextKernel.setArg<cl::Buffer>(5, contractedPositions->getDeviceBuffer());
+        copyToContextKernel.setArg<cl::Buffer>(2, contractedPositions.getDeviceBuffer());
+        copyFromContextKernel.setArg<cl::Buffer>(1, contractedForces.getDeviceBuffer());
+        copyFromContextKernel.setArg<cl::Buffer>(5, contractedPositions.getDeviceBuffer());
        for (auto& g : groupsByCopies) {
            int copies = g.first;
            int groupFlags = g.second;
@@ -313,7 +300,7 @@ void OpenCLIntegrateRPMDStepKernel::computeForces(ContextImpl& context) {
    if (groupsByCopies.size() > 0) {
        // Ensure the Context contains the positions from the last copy, since we'll assume that later.
        
-        copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer());
+        copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
        copyToContextKernel.setArg<cl_int>(5, numCopies-1);
        cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
    }
@@ -324,7 +311,7 @@ double OpenCLIntegrateRPMDStepKernel::computeKineticEnergy(ContextImpl& context,
 }

 void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& pos) {
-    if (positions == NULL)
+    if (!positions.isInitialized())
        throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
    if (pos.size() != numParticles)
        throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
@@ -346,7 +333,7 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
        cl.getPosq().download(posq);
        for (int i = 0; i < numParticles; i++)
            posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posq[i].w);
-        cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
+        cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
    }
    else if (cl.getUseMixedPrecision()) {
        vector<mm_float4> posqf(cl.getPaddedNumAtoms());
@@ -354,19 +341,19 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
        vector<mm_double4> posq(cl.getPaddedNumAtoms());
        for (int i = 0; i < numParticles; i++)
            posq[i] = mm_double4(offsetPos[i][0], offsetPos[i][1], offsetPos[i][2], posqf[i].w);
-        cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
+        cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &posq[0]);
    }
    else {
        vector<mm_float4> posq(cl.getPaddedNumAtoms());
        cl.getPosq().download(posq);
        for (int i = 0; i < numParticles; i++)
            posq[i] = mm_float4((cl_float) offsetPos[i][0], (cl_float) offsetPos[i][1], (cl_float) offsetPos[i][2], posq[i].w);
-        cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]);
+        cl.getQueue().enqueueWriteBuffer(positions.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]);
    }
 }

 void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& vel) {
-    if (velocities == NULL)
+    if (!velocities.isInitialized())
        throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
    if (vel.size() != numParticles)
        throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
@@ -375,21 +362,21 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
        cl.getVelm().download(velm);
        for (int i = 0; i < numParticles; i++)
            velm[i] = mm_double4(vel[i][0], vel[i][1], vel[i][2], velm[i].w);
-        cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &velm[0]);
+        cl.getQueue().enqueueWriteBuffer(velocities.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_double4), numParticles*sizeof(mm_double4), &velm[0]);
    }
    else {
        vector<mm_float4> velm(cl.getPaddedNumAtoms());
        cl.getVelm().download(velm);
        for (int i = 0; i < numParticles; i++)
            velm[i] = mm_float4((cl_float) vel[i][0], (cl_float) vel[i][1], (cl_float) vel[i][2], velm[i].w);
-        cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
+        cl.getQueue().enqueueWriteBuffer(velocities.getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
    }
 }

 void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
    if (!hasInitializedKernel)
        initializeKernels(context);
-    copyToContextKernel.setArg<cl::Buffer>(2, positions->getDeviceBuffer());
+    copyToContextKernel.setArg<cl::Buffer>(2, positions.getDeviceBuffer());
    copyToContextKernel.setArg<cl_int>(5, copy);
    cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
 }

--- a/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
+++ b/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
@@ -9,7 +9,7 @@
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2011-2013 Stanford University and the Authors.      *
+ * Portions copyright (c) 2011-2018 Stanford University and the Authors.      *
 * Authors: Peter Eastman                                                     *
 * Contributors:                                                              *
 *                                                                            *
@@ -45,9 +45,8 @@ namespace OpenMM {
 class OpenCLIntegrateRPMDStepKernel : public IntegrateRPMDStepKernel {
 public:
    OpenCLIntegrateRPMDStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) :
-            IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false), forces(NULL), positions(NULL), velocities(NULL), contractedForces(NULL), contractedPositions(NULL) {
+            IntegrateRPMDStepKernel(name, platform), cl(cl), hasInitializedKernel(false) {
    }
-    ~OpenCLIntegrateRPMDStepKernel();
    /**
     * Initialize the kernel.
     *
@@ -92,11 +91,11 @@ private:
    int numCopies, numParticles, workgroupSize;
    std::map<int, int> groupsByCopies;
    int groupsNotContracted;
-    OpenCLArray* forces;
-    OpenCLArray* positions;
-    OpenCLArray* velocities;
-    OpenCLArray* contractedForces;
-    OpenCLArray* contractedPositions;
+    OpenCLArray forces;
+    OpenCLArray positions;
+    OpenCLArray velocities;
+    OpenCLArray contractedForces;
+    OpenCLArray contractedPositions;
    cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
    std::map<int, cl::Kernel> positionContractionKernels;
    std::map<int, cl::Kernel> forceContractionKernels;