OpenCLArray is no longer templatized and doesn't provide a host buffer. This...

OpenCLArray is no longer templatized and doesn't provide a host buffer. This is in preparation for adding mixed/double precision support.

OpenCLArray is no longer templatized and doesn't provide a host buffer. This...
OpenCLArray is no longer templatized and doesn't provide a host buffer. This is in preparation for adding mixed/double precision support.
1107aa83 · Peter Eastman · 5980100d · 1107aa83 · 1107aa83 · 1107aa83
Commit 1107aa83 authored Oct 10, 2012 by Peter Eastman
8 changed files
--- a/platforms/opencl/src/OpenCLParallelKernels.h
+++ b/platforms/opencl/src/OpenCLParallelKernels.h
@@ -81,7 +81,7 @@ private:
    std::vector<Kernel> kernels;
    std::vector<long long> completionTimes;
    std::vector<int> contextTiles;
-    OpenCLArray<mm_float4>* contextForces;
+    OpenCLArray* contextForces;
    cl::Buffer* pinnedPositionBuffer;
    cl::Buffer* pinnedForceBuffer;
    mm_float4* pinnedPositionMemory;

--- a/platforms/opencl/src/OpenCLSort.h
+++ b/platforms/opencl/src/OpenCLSort.h
@@ -123,11 +123,11 @@ public:

        // Create workspace arrays.

-        dataRange = new OpenCLArray<typename TRAIT::KeyType>(context, 2, "sortDataRange");
-        bucketOffset = new OpenCLArray<cl_uint>(context, numBuckets, "bucketOffset");
-        bucketOfElement = new OpenCLArray<cl_uint>(context, length, "bucketOfElement");
-        offsetInBucket = new OpenCLArray<cl_uint>(context, length, "offsetInBucket");
-        buckets = new OpenCLArray<typename TRAIT::DataType>(context, length, "buckets");
+        dataRange = OpenCLArray::create<typename TRAIT::KeyType>(context, 2, "sortDataRange");
+        bucketOffset = OpenCLArray::create<cl_uint>(context, numBuckets, "bucketOffset");
+        bucketOfElement = OpenCLArray::create<cl_uint>(context, length, "bucketOfElement");
+        offsetInBucket = OpenCLArray::create<cl_uint>(context, length, "offsetInBucket");
+        buckets = OpenCLArray::create<typename TRAIT::DataType>(context, length, "buckets");
    }
    ~OpenCLSort() {
        if (dataRange != NULL)
@@ -144,7 +144,7 @@ public:
    /**
     * Sort an array.
     */
-    void sort(OpenCLArray<typename TRAIT::DataType>& data) {
+    void sort(OpenCLArray& data) {

        if (data.getSize() != bucketOfElement->getSize())
            throw OpenMMException("OpenCLSort called with different data size");
@@ -200,11 +200,11 @@ public:
    }
 private:
    OpenCLContext& context;
-    OpenCLArray<typename TRAIT::KeyType>* dataRange;
-    OpenCLArray<cl_uint>* bucketOfElement;
-    OpenCLArray<cl_uint>* offsetInBucket;
-    OpenCLArray<cl_uint>* bucketOffset;
-    OpenCLArray<typename TRAIT::DataType>* buckets;
+    OpenCLArray* dataRange;
+    OpenCLArray* bucketOfElement;
+    OpenCLArray* offsetInBucket;
+    OpenCLArray* bucketOffset;
+    OpenCLArray* buckets;
    cl::Kernel computeRangeKernel, assignElementsKernel, computeBucketPositionsKernel, copyToBucketsKernel, sortBucketsKernel;
    unsigned int rangeKernelSize, positionsKernelSize, sortKernelSize;
 };

--- a/platforms/opencl/tests/TestOpenCLFFT.cpp
+++ b/platforms/opencl/tests/TestOpenCLFFT.cpp
@@ -64,8 +64,8 @@ void testTransform() {
        original[i] = value;
        reference[i] = t_complex(value.x, value.y);
    }
-    OpenCLArray<mm_float2> grid1(context, original.size(), "grid1");
-    OpenCLArray<mm_float2> grid2(context, original.size(), "grid2");
+    OpenCLArray grid1(context, original.size(), sizeof(mm_float2), "grid1");
+    OpenCLArray grid2(context, original.size(), sizeof(mm_float2), "grid2");
    grid1.upload(original);
    OpenCLFFT3D fft(context, xsize, ysize, zsize);


--- a/platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
+++ b/platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
@@ -432,9 +432,12 @@ void testLargeSystem() {
    clState = clContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
    referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
    for (int i = 0; i < numParticles; i++) {
-        ASSERT_EQUAL_TOL(fmod(clState.getPositions()[i][0]-referenceState.getPositions()[i][0], boxSize), 0, tol);
-        ASSERT_EQUAL_TOL(fmod(clState.getPositions()[i][1]-referenceState.getPositions()[i][1], boxSize), 0, tol);
-        ASSERT_EQUAL_TOL(fmod(clState.getPositions()[i][2]-referenceState.getPositions()[i][2], boxSize), 0, tol);
+        double dx = clState.getPositions()[i][0]-referenceState.getPositions()[i][0];
+        double dy = clState.getPositions()[i][1]-referenceState.getPositions()[i][1];
+        double dz = clState.getPositions()[i][2]-referenceState.getPositions()[i][2];
+        ASSERT_EQUAL_TOL(dx-floor(dx/boxSize+0.5)*boxSize, 0, tol);
+        ASSERT_EQUAL_TOL(dy-floor(dy/boxSize+0.5)*boxSize, 0, tol);
+        ASSERT_EQUAL_TOL(dz-floor(dz/boxSize+0.5)*boxSize, 0, tol);
        ASSERT_EQUAL_VEC(clState.getVelocities()[i], referenceState.getVelocities()[i], tol);
        ASSERT_EQUAL_VEC(clState.getForces()[i], referenceState.getForces()[i], tol);
    }
@@ -476,7 +479,8 @@ void testBlockInteractions(bool periodic) {

    // Verify that the bounds of each block were calculated correctly.

-    clcontext.getPosq().download();
+    vector<mm_float4> posq;
+    clcontext.getPosq().download(posq);
    vector<mm_float4> blockCenters(numBlocks);
    vector<mm_float4> blockBoundingBoxes(numBlocks);
    nb.getBlockCenters().download(blockCenters);
@@ -491,7 +495,7 @@ void testBlockInteractions(bool periodic) {
        }
        float minx = 0.0, maxx = 0.0, miny = 0.0, maxy = 0.0, minz = 0.0, maxz = 0.0, radius = 0.0;
        for (int j = 0; j < blockSize; j++) {
-            mm_float4 pos = clcontext.getPosq()[i*blockSize+j];
+            mm_float4 pos = posq[i*blockSize+j];
            float dx = pos.x-center.x;
            float dy = pos.y-center.y;
            float dz = pos.z-center.z;
@@ -563,9 +567,9 @@ void testBlockInteractions(bool periodic) {
            unsigned int flags = interactionFlags[i];
            for (int atom2 = 0; atom2 < 32; atom2++) {
                if ((flags & 1) == 0) {
-                    mm_float4 pos2 = clcontext.getPosq()[y*blockSize+atom2];
+                    mm_float4 pos2 = posq[y*blockSize+atom2];
                    for (int atom1 = 0; atom1 < blockSize; ++atom1) {
-                        mm_float4 pos1 = clcontext.getPosq()[x*blockSize+atom1];
+                        mm_float4 pos1 = posq[x*blockSize+atom1];
                        float dx = pos2.x-pos1.x;
                        float dy = pos2.y-pos1.y;
                        float dz = pos2.z-pos1.z;
@@ -589,9 +593,9 @@ void testBlockInteractions(bool periodic) {
            unsigned int y = (unsigned int) std::floor(numBlocks+0.5-std::sqrt((numBlocks+0.5)*(numBlocks+0.5)-2*i));
            unsigned int x = (i-y*numBlocks+y*(y+1)/2);
            for (int atom1 = 0; atom1 < blockSize; ++atom1) {
-                mm_float4 pos1 = clcontext.getPosq()[x*blockSize+atom1];
+                mm_float4 pos1 = posq[x*blockSize+atom1];
                for (int atom2 = 0; atom2 < blockSize; ++atom2) {
-                    mm_float4 pos2 = clcontext.getPosq()[y*blockSize+atom2];
+                    mm_float4 pos2 = posq[y*blockSize+atom2];
                    float dx = pos1.x-pos2.x;
                    float dy = pos1.y-pos2.y;
                    float dz = pos1.z-pos2.z;

--- a/platforms/opencl/tests/TestOpenCLRandom.cpp
+++ b/platforms/opencl/tests/TestOpenCLRandom.cpp
@@ -52,7 +52,7 @@ void testGaussian() {
    OpenCLContext& context = *platformData.contexts[0];
    context.initialize();
    context.getIntegrationUtilities().initRandomNumberGenerator(0);
-    OpenCLArray<mm_float4>& random = context.getIntegrationUtilities().getRandom();
+    OpenCLArray& random = context.getIntegrationUtilities().getRandom();
    context.getIntegrationUtilities().prepareRandomNumbers(random.getSize());
    const int numValues = random.getSize()*4;
    vector<mm_float4> values(numValues);

--- a/platforms/opencl/tests/TestOpenCLSort.cpp
+++ b/platforms/opencl/tests/TestOpenCLSort.cpp
@@ -65,7 +65,7 @@ void verifySorting(vector<float> array) {
    OpenCLPlatform::PlatformData platformData(system, "", "");
    OpenCLContext& context = *platformData.contexts[0];
    context.initialize();
-    OpenCLArray<float> data(context, array.size(), "sortData");
+    OpenCLArray data(context, array.size(), sizeof(float), "sortData");
    data.upload(array);
    OpenCLSort<SortTrait> sort(context, array.size());
    sort.sort(data);

--- a/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
+++ b/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.cpp
@@ -59,9 +59,9 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
    if (numCopies != OpenCLFFT3D::findLegalDimension(numCopies))
        throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5.");
    int paddedParticles = cl.getPaddedNumAtoms();
-    forces = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdForces");
-    positions = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdPositions");
-    velocities = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdVelocities");
+    forces = OpenCLArray::create<mm_float4>(cl, numCopies*paddedParticles, "rpmdForces");
+    positions = OpenCLArray::create<mm_float4>(cl, numCopies*paddedParticles, "rpmdPositions");
+    velocities = OpenCLArray::create<mm_float4>(cl, numCopies*paddedParticles, "rpmdVelocities");
    cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
    
    // Fill in the posq and velm arrays with safe values to avoid a risk of nans.
@@ -119,17 +119,17 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
        velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
        translateKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
        translateKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
-        translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
+        translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
    }
    
    // Loop over copies and compute the force on each one.
    
    copyToContextKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
    copyToContextKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
-    copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
+    copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
    copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
    copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
-    copyFromContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
+    copyFromContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
    if (!forcesAreValid)
        computeForces(context);
    
@@ -190,9 +190,10 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
        throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
    if (pos.size() != numParticles)
        throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
-    vector<mm_float4> posq(numParticles);
+    vector<mm_float4> posq(cl.getPaddedNumAtoms());
+    cl.getPosq().download(posq);
    for (int i = 0; i < numParticles; i++)
-        posq[i] = mm_float4(pos[i][0], pos[i][1], pos[i][2], cl.getPosq()[i].w);
+        posq[i] = mm_float4(pos[i][0], pos[i][1], pos[i][2], posq[i].w);
    cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]);
 }

@@ -201,16 +202,17 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
        throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
    if (vel.size() != numParticles)
        throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
-    vector<mm_float4> velm(numParticles);
+    vector<mm_float4> velm(cl.getPaddedNumAtoms());
+    cl.getVelm().download(velm);
    for (int i = 0; i < numParticles; i++)
-        velm[i] = mm_float4(vel[i][0], vel[i][1], vel[i][2], cl.getVelm()[i].w);
+        velm[i] = mm_float4(vel[i][0], vel[i][1], vel[i][2], velm[i].w);
    cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
 }

 void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
    copyToContextKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
    copyToContextKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
-    copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
+    copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
    copyToContextKernel.setArg<cl_int>(3, copy);
    cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
    copyToContextKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());

--- a/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
+++ b/plugins/rpmd/platforms/opencl/src/OpenCLRpmdKernels.h
@@ -82,9 +82,9 @@ private:
    OpenCLContext& cl;
    bool hasInitializedKernel;
    int numCopies, numParticles, workgroupSize;
-    OpenCLArray<mm_float4>* forces;
-    OpenCLArray<mm_float4>* positions;
-    OpenCLArray<mm_float4>* velocities;
+    OpenCLArray* forces;
+    OpenCLArray* positions;
+    OpenCLArray* velocities;
    cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
 };