Commit 1107aa83 authored by Peter Eastman's avatar Peter Eastman
Browse files

OpenCLArray is no longer templatized and doesn't provide a host buffer. This...

OpenCLArray is no longer templatized and doesn't provide a host buffer.  This is in preparation for adding mixed/double precision support.
parent 5980100d
...@@ -81,7 +81,7 @@ private: ...@@ -81,7 +81,7 @@ private:
std::vector<Kernel> kernels; std::vector<Kernel> kernels;
std::vector<long long> completionTimes; std::vector<long long> completionTimes;
std::vector<int> contextTiles; std::vector<int> contextTiles;
OpenCLArray<mm_float4>* contextForces; OpenCLArray* contextForces;
cl::Buffer* pinnedPositionBuffer; cl::Buffer* pinnedPositionBuffer;
cl::Buffer* pinnedForceBuffer; cl::Buffer* pinnedForceBuffer;
mm_float4* pinnedPositionMemory; mm_float4* pinnedPositionMemory;
......
...@@ -123,11 +123,11 @@ public: ...@@ -123,11 +123,11 @@ public:
// Create workspace arrays. // Create workspace arrays.
dataRange = new OpenCLArray<typename TRAIT::KeyType>(context, 2, "sortDataRange"); dataRange = OpenCLArray::create<typename TRAIT::KeyType>(context, 2, "sortDataRange");
bucketOffset = new OpenCLArray<cl_uint>(context, numBuckets, "bucketOffset"); bucketOffset = OpenCLArray::create<cl_uint>(context, numBuckets, "bucketOffset");
bucketOfElement = new OpenCLArray<cl_uint>(context, length, "bucketOfElement"); bucketOfElement = OpenCLArray::create<cl_uint>(context, length, "bucketOfElement");
offsetInBucket = new OpenCLArray<cl_uint>(context, length, "offsetInBucket"); offsetInBucket = OpenCLArray::create<cl_uint>(context, length, "offsetInBucket");
buckets = new OpenCLArray<typename TRAIT::DataType>(context, length, "buckets"); buckets = OpenCLArray::create<typename TRAIT::DataType>(context, length, "buckets");
} }
~OpenCLSort() { ~OpenCLSort() {
if (dataRange != NULL) if (dataRange != NULL)
...@@ -144,7 +144,7 @@ public: ...@@ -144,7 +144,7 @@ public:
/** /**
* Sort an array. * Sort an array.
*/ */
void sort(OpenCLArray<typename TRAIT::DataType>& data) { void sort(OpenCLArray& data) {
if (data.getSize() != bucketOfElement->getSize()) if (data.getSize() != bucketOfElement->getSize())
throw OpenMMException("OpenCLSort called with different data size"); throw OpenMMException("OpenCLSort called with different data size");
...@@ -200,11 +200,11 @@ public: ...@@ -200,11 +200,11 @@ public:
} }
private: private:
OpenCLContext& context; OpenCLContext& context;
OpenCLArray<typename TRAIT::KeyType>* dataRange; OpenCLArray* dataRange;
OpenCLArray<cl_uint>* bucketOfElement; OpenCLArray* bucketOfElement;
OpenCLArray<cl_uint>* offsetInBucket; OpenCLArray* offsetInBucket;
OpenCLArray<cl_uint>* bucketOffset; OpenCLArray* bucketOffset;
OpenCLArray<typename TRAIT::DataType>* buckets; OpenCLArray* buckets;
cl::Kernel computeRangeKernel, assignElementsKernel, computeBucketPositionsKernel, copyToBucketsKernel, sortBucketsKernel; cl::Kernel computeRangeKernel, assignElementsKernel, computeBucketPositionsKernel, copyToBucketsKernel, sortBucketsKernel;
unsigned int rangeKernelSize, positionsKernelSize, sortKernelSize; unsigned int rangeKernelSize, positionsKernelSize, sortKernelSize;
}; };
......
...@@ -64,8 +64,8 @@ void testTransform() { ...@@ -64,8 +64,8 @@ void testTransform() {
original[i] = value; original[i] = value;
reference[i] = t_complex(value.x, value.y); reference[i] = t_complex(value.x, value.y);
} }
OpenCLArray<mm_float2> grid1(context, original.size(), "grid1"); OpenCLArray grid1(context, original.size(), sizeof(mm_float2), "grid1");
OpenCLArray<mm_float2> grid2(context, original.size(), "grid2"); OpenCLArray grid2(context, original.size(), sizeof(mm_float2), "grid2");
grid1.upload(original); grid1.upload(original);
OpenCLFFT3D fft(context, xsize, ysize, zsize); OpenCLFFT3D fft(context, xsize, ysize, zsize);
......
...@@ -432,9 +432,12 @@ void testLargeSystem() { ...@@ -432,9 +432,12 @@ void testLargeSystem() {
clState = clContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy); clState = clContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy); referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL_TOL(fmod(clState.getPositions()[i][0]-referenceState.getPositions()[i][0], boxSize), 0, tol); double dx = clState.getPositions()[i][0]-referenceState.getPositions()[i][0];
ASSERT_EQUAL_TOL(fmod(clState.getPositions()[i][1]-referenceState.getPositions()[i][1], boxSize), 0, tol); double dy = clState.getPositions()[i][1]-referenceState.getPositions()[i][1];
ASSERT_EQUAL_TOL(fmod(clState.getPositions()[i][2]-referenceState.getPositions()[i][2], boxSize), 0, tol); double dz = clState.getPositions()[i][2]-referenceState.getPositions()[i][2];
ASSERT_EQUAL_TOL(dx-floor(dx/boxSize+0.5)*boxSize, 0, tol);
ASSERT_EQUAL_TOL(dy-floor(dy/boxSize+0.5)*boxSize, 0, tol);
ASSERT_EQUAL_TOL(dz-floor(dz/boxSize+0.5)*boxSize, 0, tol);
ASSERT_EQUAL_VEC(clState.getVelocities()[i], referenceState.getVelocities()[i], tol); ASSERT_EQUAL_VEC(clState.getVelocities()[i], referenceState.getVelocities()[i], tol);
ASSERT_EQUAL_VEC(clState.getForces()[i], referenceState.getForces()[i], tol); ASSERT_EQUAL_VEC(clState.getForces()[i], referenceState.getForces()[i], tol);
} }
...@@ -476,7 +479,8 @@ void testBlockInteractions(bool periodic) { ...@@ -476,7 +479,8 @@ void testBlockInteractions(bool periodic) {
// Verify that the bounds of each block were calculated correctly. // Verify that the bounds of each block were calculated correctly.
clcontext.getPosq().download(); vector<mm_float4> posq;
clcontext.getPosq().download(posq);
vector<mm_float4> blockCenters(numBlocks); vector<mm_float4> blockCenters(numBlocks);
vector<mm_float4> blockBoundingBoxes(numBlocks); vector<mm_float4> blockBoundingBoxes(numBlocks);
nb.getBlockCenters().download(blockCenters); nb.getBlockCenters().download(blockCenters);
...@@ -491,7 +495,7 @@ void testBlockInteractions(bool periodic) { ...@@ -491,7 +495,7 @@ void testBlockInteractions(bool periodic) {
} }
float minx = 0.0, maxx = 0.0, miny = 0.0, maxy = 0.0, minz = 0.0, maxz = 0.0, radius = 0.0; float minx = 0.0, maxx = 0.0, miny = 0.0, maxy = 0.0, minz = 0.0, maxz = 0.0, radius = 0.0;
for (int j = 0; j < blockSize; j++) { for (int j = 0; j < blockSize; j++) {
mm_float4 pos = clcontext.getPosq()[i*blockSize+j]; mm_float4 pos = posq[i*blockSize+j];
float dx = pos.x-center.x; float dx = pos.x-center.x;
float dy = pos.y-center.y; float dy = pos.y-center.y;
float dz = pos.z-center.z; float dz = pos.z-center.z;
...@@ -563,9 +567,9 @@ void testBlockInteractions(bool periodic) { ...@@ -563,9 +567,9 @@ void testBlockInteractions(bool periodic) {
unsigned int flags = interactionFlags[i]; unsigned int flags = interactionFlags[i];
for (int atom2 = 0; atom2 < 32; atom2++) { for (int atom2 = 0; atom2 < 32; atom2++) {
if ((flags & 1) == 0) { if ((flags & 1) == 0) {
mm_float4 pos2 = clcontext.getPosq()[y*blockSize+atom2]; mm_float4 pos2 = posq[y*blockSize+atom2];
for (int atom1 = 0; atom1 < blockSize; ++atom1) { for (int atom1 = 0; atom1 < blockSize; ++atom1) {
mm_float4 pos1 = clcontext.getPosq()[x*blockSize+atom1]; mm_float4 pos1 = posq[x*blockSize+atom1];
float dx = pos2.x-pos1.x; float dx = pos2.x-pos1.x;
float dy = pos2.y-pos1.y; float dy = pos2.y-pos1.y;
float dz = pos2.z-pos1.z; float dz = pos2.z-pos1.z;
...@@ -589,9 +593,9 @@ void testBlockInteractions(bool periodic) { ...@@ -589,9 +593,9 @@ void testBlockInteractions(bool periodic) {
unsigned int y = (unsigned int) std::floor(numBlocks+0.5-std::sqrt((numBlocks+0.5)*(numBlocks+0.5)-2*i)); unsigned int y = (unsigned int) std::floor(numBlocks+0.5-std::sqrt((numBlocks+0.5)*(numBlocks+0.5)-2*i));
unsigned int x = (i-y*numBlocks+y*(y+1)/2); unsigned int x = (i-y*numBlocks+y*(y+1)/2);
for (int atom1 = 0; atom1 < blockSize; ++atom1) { for (int atom1 = 0; atom1 < blockSize; ++atom1) {
mm_float4 pos1 = clcontext.getPosq()[x*blockSize+atom1]; mm_float4 pos1 = posq[x*blockSize+atom1];
for (int atom2 = 0; atom2 < blockSize; ++atom2) { for (int atom2 = 0; atom2 < blockSize; ++atom2) {
mm_float4 pos2 = clcontext.getPosq()[y*blockSize+atom2]; mm_float4 pos2 = posq[y*blockSize+atom2];
float dx = pos1.x-pos2.x; float dx = pos1.x-pos2.x;
float dy = pos1.y-pos2.y; float dy = pos1.y-pos2.y;
float dz = pos1.z-pos2.z; float dz = pos1.z-pos2.z;
......
...@@ -52,7 +52,7 @@ void testGaussian() { ...@@ -52,7 +52,7 @@ void testGaussian() {
OpenCLContext& context = *platformData.contexts[0]; OpenCLContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0); context.getIntegrationUtilities().initRandomNumberGenerator(0);
OpenCLArray<mm_float4>& random = context.getIntegrationUtilities().getRandom(); OpenCLArray& random = context.getIntegrationUtilities().getRandom();
context.getIntegrationUtilities().prepareRandomNumbers(random.getSize()); context.getIntegrationUtilities().prepareRandomNumbers(random.getSize());
const int numValues = random.getSize()*4; const int numValues = random.getSize()*4;
vector<mm_float4> values(numValues); vector<mm_float4> values(numValues);
......
...@@ -65,7 +65,7 @@ void verifySorting(vector<float> array) { ...@@ -65,7 +65,7 @@ void verifySorting(vector<float> array) {
OpenCLPlatform::PlatformData platformData(system, "", ""); OpenCLPlatform::PlatformData platformData(system, "", "");
OpenCLContext& context = *platformData.contexts[0]; OpenCLContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
OpenCLArray<float> data(context, array.size(), "sortData"); OpenCLArray data(context, array.size(), sizeof(float), "sortData");
data.upload(array); data.upload(array);
OpenCLSort<SortTrait> sort(context, array.size()); OpenCLSort<SortTrait> sort(context, array.size());
sort.sort(data); sort.sort(data);
......
...@@ -59,9 +59,9 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI ...@@ -59,9 +59,9 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
if (numCopies != OpenCLFFT3D::findLegalDimension(numCopies)) if (numCopies != OpenCLFFT3D::findLegalDimension(numCopies))
throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5."); throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5.");
int paddedParticles = cl.getPaddedNumAtoms(); int paddedParticles = cl.getPaddedNumAtoms();
forces = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdForces"); forces = OpenCLArray::create<mm_float4>(cl, numCopies*paddedParticles, "rpmdForces");
positions = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdPositions"); positions = OpenCLArray::create<mm_float4>(cl, numCopies*paddedParticles, "rpmdPositions");
velocities = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdVelocities"); velocities = OpenCLArray::create<mm_float4>(cl, numCopies*paddedParticles, "rpmdVelocities");
cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed()); cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans. // Fill in the posq and velm arrays with safe values to avoid a risk of nans.
...@@ -119,17 +119,17 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte ...@@ -119,17 +119,17 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer()); velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer()); translateKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer()); translateKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer()); translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
} }
// Loop over copies and compute the force on each one. // Loop over copies and compute the force on each one.
copyToContextKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer()); copyFromContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
if (!forcesAreValid) if (!forcesAreValid)
computeForces(context); computeForces(context);
...@@ -190,9 +190,10 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p ...@@ -190,9 +190,10 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context"); throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
if (pos.size() != numParticles) if (pos.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()"); throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
vector<mm_float4> posq(numParticles); vector<mm_float4> posq(cl.getPaddedNumAtoms());
cl.getPosq().download(posq);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
posq[i] = mm_float4(pos[i][0], pos[i][1], pos[i][2], cl.getPosq()[i].w); posq[i] = mm_float4(pos[i][0], pos[i][1], pos[i][2], posq[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]); cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]);
} }
...@@ -201,16 +202,17 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>& ...@@ -201,16 +202,17 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context"); throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
if (vel.size() != numParticles) if (vel.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()"); throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
vector<mm_float4> velm(numParticles); vector<mm_float4> velm(cl.getPaddedNumAtoms());
cl.getVelm().download(velm);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
velm[i] = mm_float4(vel[i][0], vel[i][1], vel[i][2], cl.getVelm()[i].w); velm[i] = mm_float4(vel[i][0], vel[i][1], vel[i][2], velm[i].w);
cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]); cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
} }
void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) { void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
copyToContextKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
copyToContextKernel.setArg<cl_int>(3, copy); copyToContextKernel.setArg<cl_int>(3, copy);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms()); cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
copyToContextKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer()); copyToContextKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
......
...@@ -82,9 +82,9 @@ private: ...@@ -82,9 +82,9 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernel; bool hasInitializedKernel;
int numCopies, numParticles, workgroupSize; int numCopies, numParticles, workgroupSize;
OpenCLArray<mm_float4>* forces; OpenCLArray* forces;
OpenCLArray<mm_float4>* positions; OpenCLArray* positions;
OpenCLArray<mm_float4>* velocities; OpenCLArray* velocities;
cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel; cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment