Commit 1107aa83 authored by Peter Eastman's avatar Peter Eastman
Browse files

OpenCLArray is no longer templatized and doesn't provide a host buffer. This...

OpenCLArray is no longer templatized and doesn't provide a host buffer.  This is in preparation for adding mixed/double precision support.
parent 5980100d
......@@ -81,7 +81,7 @@ private:
std::vector<Kernel> kernels;
std::vector<long long> completionTimes;
std::vector<int> contextTiles;
OpenCLArray<mm_float4>* contextForces;
OpenCLArray* contextForces;
cl::Buffer* pinnedPositionBuffer;
cl::Buffer* pinnedForceBuffer;
mm_float4* pinnedPositionMemory;
......
......@@ -123,11 +123,11 @@ public:
// Create workspace arrays.
dataRange = new OpenCLArray<typename TRAIT::KeyType>(context, 2, "sortDataRange");
bucketOffset = new OpenCLArray<cl_uint>(context, numBuckets, "bucketOffset");
bucketOfElement = new OpenCLArray<cl_uint>(context, length, "bucketOfElement");
offsetInBucket = new OpenCLArray<cl_uint>(context, length, "offsetInBucket");
buckets = new OpenCLArray<typename TRAIT::DataType>(context, length, "buckets");
dataRange = OpenCLArray::create<typename TRAIT::KeyType>(context, 2, "sortDataRange");
bucketOffset = OpenCLArray::create<cl_uint>(context, numBuckets, "bucketOffset");
bucketOfElement = OpenCLArray::create<cl_uint>(context, length, "bucketOfElement");
offsetInBucket = OpenCLArray::create<cl_uint>(context, length, "offsetInBucket");
buckets = OpenCLArray::create<typename TRAIT::DataType>(context, length, "buckets");
}
~OpenCLSort() {
if (dataRange != NULL)
......@@ -144,7 +144,7 @@ public:
/**
* Sort an array.
*/
void sort(OpenCLArray<typename TRAIT::DataType>& data) {
void sort(OpenCLArray& data) {
if (data.getSize() != bucketOfElement->getSize())
throw OpenMMException("OpenCLSort called with different data size");
......@@ -200,11 +200,11 @@ public:
}
private:
OpenCLContext& context;
OpenCLArray<typename TRAIT::KeyType>* dataRange;
OpenCLArray<cl_uint>* bucketOfElement;
OpenCLArray<cl_uint>* offsetInBucket;
OpenCLArray<cl_uint>* bucketOffset;
OpenCLArray<typename TRAIT::DataType>* buckets;
OpenCLArray* dataRange;
OpenCLArray* bucketOfElement;
OpenCLArray* offsetInBucket;
OpenCLArray* bucketOffset;
OpenCLArray* buckets;
cl::Kernel computeRangeKernel, assignElementsKernel, computeBucketPositionsKernel, copyToBucketsKernel, sortBucketsKernel;
unsigned int rangeKernelSize, positionsKernelSize, sortKernelSize;
};
......
......@@ -64,8 +64,8 @@ void testTransform() {
original[i] = value;
reference[i] = t_complex(value.x, value.y);
}
OpenCLArray<mm_float2> grid1(context, original.size(), "grid1");
OpenCLArray<mm_float2> grid2(context, original.size(), "grid2");
OpenCLArray grid1(context, original.size(), sizeof(mm_float2), "grid1");
OpenCLArray grid2(context, original.size(), sizeof(mm_float2), "grid2");
grid1.upload(original);
OpenCLFFT3D fft(context, xsize, ysize, zsize);
......
......@@ -432,9 +432,12 @@ void testLargeSystem() {
clState = clContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL_TOL(fmod(clState.getPositions()[i][0]-referenceState.getPositions()[i][0], boxSize), 0, tol);
ASSERT_EQUAL_TOL(fmod(clState.getPositions()[i][1]-referenceState.getPositions()[i][1], boxSize), 0, tol);
ASSERT_EQUAL_TOL(fmod(clState.getPositions()[i][2]-referenceState.getPositions()[i][2], boxSize), 0, tol);
double dx = clState.getPositions()[i][0]-referenceState.getPositions()[i][0];
double dy = clState.getPositions()[i][1]-referenceState.getPositions()[i][1];
double dz = clState.getPositions()[i][2]-referenceState.getPositions()[i][2];
ASSERT_EQUAL_TOL(dx-floor(dx/boxSize+0.5)*boxSize, 0, tol);
ASSERT_EQUAL_TOL(dy-floor(dy/boxSize+0.5)*boxSize, 0, tol);
ASSERT_EQUAL_TOL(dz-floor(dz/boxSize+0.5)*boxSize, 0, tol);
ASSERT_EQUAL_VEC(clState.getVelocities()[i], referenceState.getVelocities()[i], tol);
ASSERT_EQUAL_VEC(clState.getForces()[i], referenceState.getForces()[i], tol);
}
......@@ -476,7 +479,8 @@ void testBlockInteractions(bool periodic) {
// Verify that the bounds of each block were calculated correctly.
clcontext.getPosq().download();
vector<mm_float4> posq;
clcontext.getPosq().download(posq);
vector<mm_float4> blockCenters(numBlocks);
vector<mm_float4> blockBoundingBoxes(numBlocks);
nb.getBlockCenters().download(blockCenters);
......@@ -491,7 +495,7 @@ void testBlockInteractions(bool periodic) {
}
float minx = 0.0, maxx = 0.0, miny = 0.0, maxy = 0.0, minz = 0.0, maxz = 0.0, radius = 0.0;
for (int j = 0; j < blockSize; j++) {
mm_float4 pos = clcontext.getPosq()[i*blockSize+j];
mm_float4 pos = posq[i*blockSize+j];
float dx = pos.x-center.x;
float dy = pos.y-center.y;
float dz = pos.z-center.z;
......@@ -563,9 +567,9 @@ void testBlockInteractions(bool periodic) {
unsigned int flags = interactionFlags[i];
for (int atom2 = 0; atom2 < 32; atom2++) {
if ((flags & 1) == 0) {
mm_float4 pos2 = clcontext.getPosq()[y*blockSize+atom2];
mm_float4 pos2 = posq[y*blockSize+atom2];
for (int atom1 = 0; atom1 < blockSize; ++atom1) {
mm_float4 pos1 = clcontext.getPosq()[x*blockSize+atom1];
mm_float4 pos1 = posq[x*blockSize+atom1];
float dx = pos2.x-pos1.x;
float dy = pos2.y-pos1.y;
float dz = pos2.z-pos1.z;
......@@ -589,9 +593,9 @@ void testBlockInteractions(bool periodic) {
unsigned int y = (unsigned int) std::floor(numBlocks+0.5-std::sqrt((numBlocks+0.5)*(numBlocks+0.5)-2*i));
unsigned int x = (i-y*numBlocks+y*(y+1)/2);
for (int atom1 = 0; atom1 < blockSize; ++atom1) {
mm_float4 pos1 = clcontext.getPosq()[x*blockSize+atom1];
mm_float4 pos1 = posq[x*blockSize+atom1];
for (int atom2 = 0; atom2 < blockSize; ++atom2) {
mm_float4 pos2 = clcontext.getPosq()[y*blockSize+atom2];
mm_float4 pos2 = posq[y*blockSize+atom2];
float dx = pos1.x-pos2.x;
float dy = pos1.y-pos2.y;
float dz = pos1.z-pos2.z;
......
......@@ -52,7 +52,7 @@ void testGaussian() {
OpenCLContext& context = *platformData.contexts[0];
context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0);
OpenCLArray<mm_float4>& random = context.getIntegrationUtilities().getRandom();
OpenCLArray& random = context.getIntegrationUtilities().getRandom();
context.getIntegrationUtilities().prepareRandomNumbers(random.getSize());
const int numValues = random.getSize()*4;
vector<mm_float4> values(numValues);
......
......@@ -65,7 +65,7 @@ void verifySorting(vector<float> array) {
OpenCLPlatform::PlatformData platformData(system, "", "");
OpenCLContext& context = *platformData.contexts[0];
context.initialize();
OpenCLArray<float> data(context, array.size(), "sortData");
OpenCLArray data(context, array.size(), sizeof(float), "sortData");
data.upload(array);
OpenCLSort<SortTrait> sort(context, array.size());
sort.sort(data);
......
......@@ -59,9 +59,9 @@ void OpenCLIntegrateRPMDStepKernel::initialize(const System& system, const RPMDI
if (numCopies != OpenCLFFT3D::findLegalDimension(numCopies))
throw OpenMMException("RPMDIntegrator: the number of copies must be a multiple of powers of 2, 3, and 5.");
int paddedParticles = cl.getPaddedNumAtoms();
forces = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdForces");
positions = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdPositions");
velocities = new OpenCLArray<mm_float4>(cl, numCopies*paddedParticles, "rpmdVelocities");
forces = OpenCLArray::create<mm_float4>(cl, numCopies*paddedParticles, "rpmdForces");
positions = OpenCLArray::create<mm_float4>(cl, numCopies*paddedParticles, "rpmdPositions");
velocities = OpenCLArray::create<mm_float4>(cl, numCopies*paddedParticles, "rpmdVelocities");
cl.getIntegrationUtilities().initRandomNumberGenerator((unsigned int) integrator.getRandomNumberSeed());
// Fill in the posq and velm arrays with safe values to avoid a risk of nans.
......@@ -119,17 +119,17 @@ void OpenCLIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDInte
velocitiesKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
translateKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
}
// Loop over copies and compute the force on each one.
copyToContextKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(1, forces->getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
copyFromContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
if (!forcesAreValid)
computeForces(context);
......@@ -190,9 +190,10 @@ void OpenCLIntegrateRPMDStepKernel::setPositions(int copy, const vector<Vec3>& p
throw OpenMMException("RPMDIntegrator: Cannot set positions before the integrator is added to a Context");
if (pos.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setPositions()");
vector<mm_float4> posq(numParticles);
vector<mm_float4> posq(cl.getPaddedNumAtoms());
cl.getPosq().download(posq);
for (int i = 0; i < numParticles; i++)
posq[i] = mm_float4(pos[i][0], pos[i][1], pos[i][2], cl.getPosq()[i].w);
posq[i] = mm_float4(pos[i][0], pos[i][1], pos[i][2], posq[i].w);
cl.getQueue().enqueueWriteBuffer(positions->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &posq[0]);
}
......@@ -201,16 +202,17 @@ void OpenCLIntegrateRPMDStepKernel::setVelocities(int copy, const vector<Vec3>&
throw OpenMMException("RPMDIntegrator: Cannot set velocities before the integrator is added to a Context");
if (vel.size() != numParticles)
throw OpenMMException("RPMDIntegrator: wrong number of values passed to setVelocities()");
vector<mm_float4> velm(numParticles);
vector<mm_float4> velm(cl.getPaddedNumAtoms());
cl.getVelm().download(velm);
for (int i = 0; i < numParticles; i++)
velm[i] = mm_float4(vel[i][0], vel[i][1], vel[i][2], cl.getVelm()[i].w);
velm[i] = mm_float4(vel[i][0], vel[i][1], vel[i][2], velm[i].w);
cl.getQueue().enqueueWriteBuffer(velocities->getDeviceBuffer(), CL_TRUE, copy*cl.getPaddedNumAtoms()*sizeof(mm_float4), numParticles*sizeof(mm_float4), &velm[0]);
}
void OpenCLIntegrateRPMDStepKernel::copyToContext(int copy, ContextImpl& context) {
copyToContextKernel.setArg<cl::Buffer>(0, positions->getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndex().getDeviceBuffer());
copyToContextKernel.setArg<cl::Buffer>(2, cl.getAtomIndexArray().getDeviceBuffer());
copyToContextKernel.setArg<cl_int>(3, copy);
cl.executeKernel(copyToContextKernel, cl.getNumAtoms());
copyToContextKernel.setArg<cl::Buffer>(0, velocities->getDeviceBuffer());
......
......@@ -82,9 +82,9 @@ private:
OpenCLContext& cl;
bool hasInitializedKernel;
int numCopies, numParticles, workgroupSize;
OpenCLArray<mm_float4>* forces;
OpenCLArray<mm_float4>* positions;
OpenCLArray<mm_float4>* velocities;
OpenCLArray* forces;
OpenCLArray* positions;
OpenCLArray* velocities;
cl::Kernel pileKernel, stepKernel, velocitiesKernel, copyToContextKernel, copyFromContextKernel, translateKernel;
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment