Commit 1107aa83 authored by Peter Eastman's avatar Peter Eastman
Browse files

OpenCLArray is no longer templatized and doesn't provide a host buffer. This...

OpenCLArray is no longer templatized and doesn't provide a host buffer.  This is in preparation for adding mixed/double precision support.
parent 5980100d
......@@ -45,7 +45,7 @@ using namespace std;
CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), cutoff(-1.0), useCutoff(false), anyExclusions(false),
exclusionIndices(NULL), exclusionRowIndices(NULL), exclusions(NULL), interactingTiles(NULL), interactionFlags(NULL),
interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), pinnedInteractionCount(NULL), nonbondedForceGroup(0) {
interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), nonbondedForceGroup(0) {
// Decide how many thread blocks to use.
string errorMessage = "Error initializing nonbonded utilities";
......@@ -72,8 +72,6 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (pinnedInteractionCount != NULL)
cuMemFreeHost(pinnedInteractionCount);
}
void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) {
......@@ -240,9 +238,8 @@ void CudaNonbondedUtilities::initialize(const System& system) {
blockCenter = CudaArray::create<float4>(context, numAtomBlocks, "blockCenter");
blockBoundingBox = CudaArray::create<float4>(context, numAtomBlocks, "blockBoundingBox");
}
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedInteractionCount, sizeof(unsigned int), 0));
pinnedInteractionCount[0] = 0;
interactionCount->upload(pinnedInteractionCount);
vector<unsigned int> count(1, 0);
interactionCount->upload(count);
}
// Create kernels.
......@@ -325,6 +322,7 @@ void CudaNonbondedUtilities::computeInteractions() {
void CudaNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff)
return;
unsigned int* pinnedInteractionCount = (unsigned int*) context.getPinnedBuffer();
interactionCount->download(pinnedInteractionCount);
if (pinnedInteractionCount[0] <= (unsigned int) maxTiles)
return;
......
......@@ -259,7 +259,6 @@ private:
CudaArray* interactionCount;
CudaArray* blockCenter;
CudaArray* blockBoundingBox;
unsigned int* pinnedInteractionCount;
std::vector<void*> forceArgs, findBlockBoundsArgs, findInteractingBlocksArgs, findInteractionsWithinBlocksArgs;
std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters;
......
......@@ -432,6 +432,9 @@ void testLargeSystem() {
cuState = cuContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
for (int i = 0; i < numParticles; i++) {
double dx = cuState.getPositions()[i][0]-referenceState.getPositions()[i][0];
double dy = cuState.getPositions()[i][1]-referenceState.getPositions()[i][1];
double dz = cuState.getPositions()[i][2]-referenceState.getPositions()[i][2];
ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][0]-referenceState.getPositions()[i][0], boxSize), 0, tol);
ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][1]-referenceState.getPositions()[i][1], boxSize), 0, tol);
ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][2]-referenceState.getPositions()[i][2], boxSize), 0, tol);
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLArray.h"
#include <iostream>
#include <sstream>
#include <vector>
using namespace OpenMM;
OpenCLArray::OpenCLArray(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags) :
context(context), size(size), elementSize(elementSize), name(name), ownsBuffer(true) {
try {
buffer = new cl::Buffer(context.getContext(), flags, size*elementSize);
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error creating array "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
OpenCLArray::OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name) :
context(context), buffer(buffer), size(size), elementSize(elementSize), name(name), ownsBuffer(false) {
}
OpenCLArray::~OpenCLArray() {
if (ownsBuffer)
delete buffer;
}
void OpenCLArray::upload(const void* data, bool blocking) {
try {
context.getQueue().enqueueWriteBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*elementSize, data);
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error uploading array "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
void OpenCLArray::download(void* data, bool blocking) const {
try {
context.getQueue().enqueueReadBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*elementSize, data);
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error downloading array "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
void OpenCLArray::copyTo(OpenCLArray& dest) const {
if (dest.getSize() != size || dest.getElementSize() != elementSize)
throw OpenMMException("Error copying array "+name+" to "+dest.getName()+": The destination array does not match the size of the array");
try {
context.getQueue().enqueueCopyBuffer(*buffer, dest.getDeviceBuffer(), 0, 0, size*elementSize);
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error copying array "<<name<<" to "<<dest.getName()<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -37,62 +37,70 @@ namespace OpenMM {
/**
* This class encapsulates an OpenCL Buffer. It provides a simplified API for working with it,
* an optionally includes a buffer in host memory for copying data to and from the OpenCL Buffer.
* and for copying data to and from the OpenCL Buffer.
*/
template <class T>
class OpenCLArray {
public:
/**
* Create an OpenCLArray object.
* Create an OpenCLArray object. The object is allocated on the heap with the "new" operator.
* The template argument is the data type of each array element.
*
* @param context the context for which to create the array
* @param size the number of elements in the array
* @param name the name of the array
* @param createHostBuffer specifies whether to create a buffer in host memory for copying data to and from
* the OpenCL Buffer
* @param flags the set of flags to specify when creating the OpenCL Buffer
*/
OpenCLArray(OpenCLContext& context, int size, const std::string& name, bool createHostBuffer = false, cl_int flags = CL_MEM_READ_WRITE) :
context(context), size(size), name(name), local(createHostBuffer ? size : 0), ownsBuffer(true) {
try {
buffer = new cl::Buffer(context.getContext(), flags, size*sizeof(T));
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error creating array "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
template <class T>
static OpenCLArray* create(OpenCLContext& context, int size, const std::string& name, cl_int flags = CL_MEM_READ_WRITE) {
return new OpenCLArray(context, size, sizeof(T), name, flags);
}
/**
* Create an OpenCLArray object the uses a preexisting Buffer.
* Create an OpenCLArray object that uses a preexisting Buffer. The object is allocated on the heap with the "new" operator.
* The template argument is the data type of each array element.
*
* @param context the context for which to create the array
* @param buffer the OpenCL Buffer this object encapsulates
* @param size the number of elements in the array
* @param name the name of the array
* @param createHostBuffer specifies whether to create a buffer in host memory for copying data to and from
* the OpenCL Buffer
*/
OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, const std::string& name, bool createHostBuffer = false) :
context(context), buffer(buffer), size(size), name(name), local(createHostBuffer ? size : 0), ownsBuffer(false) {
}
~OpenCLArray() {
if (ownsBuffer)
delete buffer;
}
const T& operator[](int index) const {
return local[index];
}
T& operator[](int index) {
return local[index];
template <class T>
static OpenCLArray* create(OpenCLContext& context, cl::Buffer* buffer, int size, const std::string& name) {
return new OpenCLArray(context, buffer, size, sizeof(T), name);
}
/**
* Create an OpenCLArray object.
*
* @param context the context for which to create the array
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
* @param flags the set of flags to specify when creating the OpenCL Buffer
*/
OpenCLArray(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags = CL_MEM_READ_WRITE);
/**
* Create an OpenCLArray object that uses a preexisting Buffer.
*
* @param context the context for which to create the array
* @param buffer the OpenCL Buffer this object encapsulates
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name);
~OpenCLArray();
/**
* Get the size of the array.
*/
int getSize() const {
return size;
}
/**
* Get the size of each element in bytes.
*/
int getElementSize() const {
return elementSize;
}
/**
* Get the name of the array.
*/
......@@ -105,85 +113,50 @@ public:
cl::Buffer& getDeviceBuffer() {
return *buffer;
}
/**
* Get a pointer to the host buffer.
*/
T* getHostBuffer() {
return &local[0];
}
/**
* Get an element of the host buffer.
*/
const T& get(int index) const {
return local[index];
}
/**
* Set an element of the host buffer.
*/
void set(int index, const T& value) {
local[index] = value;
}
/**
* Copy the values in a vector to the Buffer.
*/
void upload(std::vector<T>& data, bool blocking = true) {
template <class T>
void upload(const std::vector<T>& data, bool blocking = true) {
if (sizeof(T) != elementSize || data.size() != size)
throw OpenMMException("Error uploading array "+name+": The specified vector does not match the size of the array");
upload(&data[0], blocking);
}
/**
* Copy the values in the Buffer to a vector.
*/
void download(std::vector<T>& data) const {
template <class T>
void download(std::vector<T>& data, bool blocking = true) const {
if (sizeof(T) != elementSize)
throw OpenMMException("Error downloading array "+name+": The specified vector has the wrong element size");
if (data.size() != size)
data.resize(size);
download(&data[0]);
download(&data[0], blocking);
}
/**
* Copy the values in an array to the Buffer.
*
* @param data the data to copy
* @param blocking if true, this call will block until the transfer is complete.
*/
void upload(T* data, bool blocking = true) {
try {
context.getQueue().enqueueWriteBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*sizeof(T), data);
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error uploading array "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
void upload(const void* data, bool blocking = true);
/**
* Copy the values in the Buffer to an array.
*
* @param data the array to copy the memory to
* @param blocking if true, this call will block until the transfer is complete.
*/
void download(T* data) const {
try {
context.getQueue().enqueueReadBuffer(*buffer, CL_TRUE, 0, size*sizeof(T), data);
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error downloading array "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
/**
* Copy the values in the host buffer to the OpenCL Buffer.
*/
void upload(bool blocking = true) {
if (local.size() == 0)
throw OpenMMException(name+": Called upload() on an OpenCLArray with no host buffer");
upload(local, blocking);
}
void download(void* data, bool blocking = true) const;
/**
* Copy the values in the Buffer to the host buffer.
* Copy the values in the Buffer to a second OpenCLArray.
*
* @param dest the destination array to copy to
*/
void download() {
if (local.size() == 0)
throw OpenMMException(name+": Called download() on an OpenCLArray with no host buffer");
download(local);
}
void copyTo(OpenCLArray& dest) const;
private:
OpenCLContext& context;
cl::Buffer* buffer;
std::vector<T> local;
int size;
int size, elementSize;
bool ownsBuffer;
std::string name;
};
......
......@@ -87,7 +87,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for (int atom = 0; atom < numAtoms; atom++)
indexVec[bond*width+atom] = forceAtoms[i][bond][atom];
}
OpenCLArray<cl_uint>* indices = new OpenCLArray<cl_uint>(context, indexVec.size(), "bondedIndices");
OpenCLArray* indices = OpenCLArray::create<cl_uint>(context, indexVec.size(), "bondedIndices");
indices->upload(indexVec);
atomIndices.push_back(indices);
bufferVec[i].resize(width*numBonds, 0);
......@@ -151,7 +151,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for (int bond = 0; bond < numBonds; bond++)
for (int atom = 0; atom < numAtoms; atom++)
bufferVec[force][bond*width+atom] += bufferCounter[forceSets[i][k]][forceAtoms[force][bond][atom]];
OpenCLArray<cl_uint>* buffers = new OpenCLArray<cl_uint>(context, bufferVec[force].size(), "bondedBufferIndices");
OpenCLArray* buffers = OpenCLArray::create<cl_uint>(context, bufferVec[force].size(), "bondedBufferIndices");
buffers->upload(bufferVec[force]);
bufferIndices[force] = buffers;
}
......
......@@ -134,8 +134,8 @@ private:
std::vector<std::vector<int> > forceSets;
std::vector<cl::Memory*> arguments;
std::vector<std::string> argTypes;
std::vector<OpenCLArray<cl_uint>*> atomIndices;
std::vector<OpenCLArray<cl_uint>*> bufferIndices;
std::vector<OpenCLArray*> atomIndices;
std::vector<OpenCLArray*> bufferIndices;
std::vector<std::string> prefixCode;
int numForceBuffers, maxBonds;
bool hasInitializedKernels;
......
......@@ -30,7 +30,7 @@
using namespace OpenMM;
OpenCLCompact::OpenCLCompact(OpenCLContext& context) : context(context), dgBlockCounts(NULL) {
dgBlockCounts = new OpenCLArray<cl_uint>(context, context.getNumThreadBlocks(), "dgBlockCounts");
dgBlockCounts = OpenCLArray::create<cl_uint>(context, context.getNumThreadBlocks(), "dgBlockCounts");
cl::Program program = context.createProgram(OpenCLKernelSources::compact);
countKernel = cl::Kernel(program, "countElts");
moveValidKernel = cl::Kernel(program, "moveValidElementsStaged");
......@@ -41,7 +41,7 @@ OpenCLCompact::~OpenCLCompact() {
delete dgBlockCounts;
}
void OpenCLCompact::compactStream(OpenCLArray<cl_uint>& dOut, OpenCLArray<cl_uint>& dIn, OpenCLArray<cl_uint>& dValid, OpenCLArray<cl_uint>& numValid) {
void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid) {
// Figure out # elements per block
unsigned int len = dIn.getSize();
unsigned int numBlocks = context.getNumThreadBlocks();
......
......@@ -33,10 +33,10 @@ class OPENMM_EXPORT OpenCLCompact {
public:
OpenCLCompact(OpenCLContext& context);
~OpenCLCompact();
void compactStream(OpenCLArray<cl_uint>& dOut, OpenCLArray<cl_uint>& dIn, OpenCLArray<cl_uint>& dValid, OpenCLArray<cl_uint>& numValid);
void compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid);
private:
OpenCLContext& context;
OpenCLArray<cl_uint>* dgBlockCounts;
OpenCLArray* dgBlockCounts;
cl::Kernel countKernel;
cl::Kernel moveValidKernel;
};
......
......@@ -67,7 +67,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, OpenCLPlatform::PlatformData& platformData) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), atomsWereReordered(false), posq(NULL),
velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndex(NULL), integration(NULL),
velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL),
bonded(NULL), nonbonded(NULL), thread(NULL) {
try {
contextIndex = platformData.contexts.size();
......@@ -217,8 +217,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
numThreadBlocks = numThreadBlocksPerComputeUnit*device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
bonded = new OpenCLBondedUtilities(*this);
nonbonded = new OpenCLNonbondedUtilities(*this);
posq = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "posq", true);
velm = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "velm", true);
posq = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms, "posq");
velm = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms, "velm");
posCellOffsets.resize(paddedNumAtoms, mm_int4(0, 0, 0, 0));
}
catch (cl::Error err) {
......@@ -242,19 +242,20 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
cl::Kernel accuracyKernel(utilities, "determineNativeAccuracy");
OpenCLArray<mm_float8> values(*this, 20, "values", true);
OpenCLArray valuesArray(*this, 20, sizeof(mm_float8), "values");
vector<mm_float8> values(valuesArray.getSize());
float nextValue = 1e-4f;
for (int i = 0; i < values.getSize(); ++i) {
for (int i = 0; i < (int) values.size(); ++i) {
values[i].s0 = nextValue;
nextValue *= (float) M_PI;
}
values.upload();
accuracyKernel.setArg<cl::Buffer>(0, values.getDeviceBuffer());
accuracyKernel.setArg<cl_int>(1, values.getSize());
executeKernel(accuracyKernel, values.getSize());
values.download();
valuesArray.upload(values);
accuracyKernel.setArg<cl::Buffer>(0, valuesArray.getDeviceBuffer());
accuracyKernel.setArg<cl_int>(1, values.size());
executeKernel(accuracyKernel, values.size());
valuesArray.download(values);
double maxSqrtError = 0.0, maxRsqrtError = 0.0, maxRecipError = 0.0, maxExpError = 0.0, maxLogError = 0.0;
for (int i = 0; i < values.getSize(); ++i) {
for (int i = 0; i < (int) values.size(); ++i) {
double v = values[i].s0;
double correctSqrt = sqrt(v);
maxSqrtError = max(maxSqrtError, fabs(correctSqrt-values[i].s1)/correctSqrt);
......@@ -283,6 +284,8 @@ OpenCLContext::~OpenCLContext() {
delete forces[i];
for (int i = 0; i < (int) reorderListeners.size(); i++)
delete reorderListeners[i];
if (pinnedBuffer != NULL)
delete pinnedBuffer;
if (posq != NULL)
delete posq;
if (velm != NULL)
......@@ -295,8 +298,8 @@ OpenCLContext::~OpenCLContext() {
delete longForceBuffer;
if (energyBuffer != NULL)
delete energyBuffer;
if (atomIndex != NULL)
delete atomIndex;
if (atomIndexDevice != NULL)
delete atomIndexDevice;
if (integration != NULL)
delete integration;
if (bonded != NULL)
......@@ -308,19 +311,20 @@ OpenCLContext::~OpenCLContext() {
}
void OpenCLContext::initialize() {
vector<mm_float4> v(paddedNumAtoms, mm_float4(0, 0, 0, 0));
for (int i = 0; i < numAtoms; i++) {
double mass = system.getParticleMass(i);
(*velm)[i].w = (float) (mass == 0.0 ? 0.0 : 1.0/mass);
v[i].w = (float) (mass == 0.0 ? 0.0 : 1.0/mass);
}
velm->upload();
velm->upload(v);
bonded->initialize(system);
numForceBuffers = platformData.contexts.size();
numForceBuffers = std::max(numForceBuffers, bonded->getNumForceBuffers());
for (int i = 0; i < (int) forces.size(); i++)
numForceBuffers = std::max(numForceBuffers, forces[i]->getRequiredForceBuffers());
forceBuffers = new OpenCLArray<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers", false);
forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
if (supports64BitGlobalAtomics) {
longForceBuffer = new OpenCLArray<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer", false);
longForceBuffer = OpenCLArray::create<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer");
reduceForcesKernel.setArg<cl::Buffer>(0, longForceBuffer->getDeviceBuffer());
reduceForcesKernel.setArg<cl::Buffer>(1, forceBuffers->getDeviceBuffer());
reduceForcesKernel.setArg<cl_int>(2, paddedNumAtoms);
......@@ -328,13 +332,17 @@ void OpenCLContext::initialize() {
addAutoclearBuffer(longForceBuffer->getDeviceBuffer(), longForceBuffer->getSize()*2);
}
addAutoclearBuffer(forceBuffers->getDeviceBuffer(), forceBuffers->getSize()*4);
force = new OpenCLArray<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force", true);
energyBuffer = new OpenCLArray<cl_float>(*this, max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()), "energyBuffer", true);
force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = OpenCLArray::create<cl_float>(*this, max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()), "energyBuffer");
addAutoclearBuffer(energyBuffer->getDeviceBuffer(), energyBuffer->getSize());
atomIndex = new OpenCLArray<cl_int>(*this, paddedNumAtoms, "atomIndex", true);
int bufferBytes = max(posq->getSize()*sizeof(mm_float4), energyBuffer->getSize()*sizeof(cl_float));
pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedMemory = queue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
atomIndexDevice = OpenCLArray::create<cl_int>(*this, paddedNumAtoms, "atomIndexDevice");
atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i)
(*atomIndex)[i] = i;
atomIndex->upload();
atomIndex[i] = i;
atomIndexDevice->upload(atomIndex);
findMoleculeGroups();
moleculesInvalid = false;
nonbonded->initialize(system);
......@@ -410,12 +418,8 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
}
}
void OpenCLContext::clearBuffer(OpenCLArray<float>& array) {
clearBuffer(array.getDeviceBuffer(), array.getSize());
}
void OpenCLContext::clearBuffer(OpenCLArray<mm_float4>& array) {
clearBuffer(array.getDeviceBuffer(), array.getSize()*4);
void OpenCLContext::clearBuffer(OpenCLArray& array) {
clearBuffer(array.getDeviceBuffer(), array.getSize()*array.getElementSize()/sizeof(cl_float));
}
void OpenCLContext::clearBuffer(cl::Memory& memory, int size) {
......@@ -500,7 +504,7 @@ void OpenCLContext::reduceForces() {
reduceBuffer(*forceBuffers, numForceBuffers);
}
void OpenCLContext::reduceBuffer(OpenCLArray<mm_float4>& array, int numBuffers) {
void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
int bufferSize = array.getSize()/numBuffers;
reduceFloat4Kernel.setArg<cl::Buffer>(0, array.getDeviceBuffer());
reduceFloat4Kernel.setArg<cl_int>(1, bufferSize);
......@@ -760,26 +764,28 @@ void OpenCLContext::validateMolecules() {
// atoms to their original order, rebuild the list of identical molecules, and sort them
// again.
vector<mm_float4> newPosq(numAtoms);
vector<mm_float4> newVelm(numAtoms);
vector<mm_float4> oldPosq(paddedNumAtoms);
vector<mm_float4> newPosq(paddedNumAtoms);
vector<mm_float4> oldVelm(paddedNumAtoms);
vector<mm_float4> newVelm(paddedNumAtoms);
vector<mm_int4> newCellOffsets(numAtoms);
posq->download();
velm->download();
posq->download(oldPosq);
velm->download(oldVelm);
for (int i = 0; i < numAtoms; i++) {
int index = atomIndex->get(i);
newPosq[index] = posq->get(i);
newVelm[index] = velm->get(i);
int index = atomIndex[i];
newPosq[index] = oldPosq[i];
newVelm[index] = oldVelm[i];
newCellOffsets[index] = posCellOffsets[i];
}
posq->upload(newPosq);
velm->upload(newVelm);
for (int i = 0; i < numAtoms; i++) {
posq->set(i, newPosq[i]);
velm->set(i, newVelm[i]);
atomIndex->set(i, i);
atomIndex[i] = i;
posCellOffsets[i] = newCellOffsets[i];
}
posq->upload();
velm->upload();
atomIndex->upload();
posq->upload(newPosq);
velm->upload(newVelm);
atomIndexDevice->upload(atomIndex);
findMoleculeGroups();
for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute();
......@@ -794,11 +800,13 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
// Find the range of positions and the number of bins along each axis.
posq->download();
velm->download();
float minx = posq->get(0).x, maxx = posq->get(0).x;
float miny = posq->get(0).y, maxy = posq->get(0).y;
float minz = posq->get(0).z, maxz = posq->get(0).z;
vector<mm_float4> oldPosq(paddedNumAtoms);
vector<mm_float4> oldVelm(paddedNumAtoms);
posq->download(oldPosq);
velm->download(oldVelm);
float minx = oldPosq[0].x, maxx = oldPosq[0].x;
float miny = oldPosq[0].y, maxy = oldPosq[0].y;
float minz = oldPosq[0].z, maxz = oldPosq[0].z;
if (nonbonded->getUsePeriodic()) {
minx = miny = minz = 0.0;
maxx = periodicBoxSize.x;
......@@ -807,7 +815,7 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
}
else {
for (int i = 1; i < numAtoms; i++) {
const mm_float4& pos = posq->get(i);
const mm_float4& pos = oldPosq[i];
minx = min(minx, pos.x);
maxx = max(maxx, pos.x);
miny = min(miny, pos.y);
......@@ -820,8 +828,8 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
// Loop over each group of identical molecules and reorder them.
vector<int> originalIndex(numAtoms);
vector<mm_float4> newPosq(numAtoms);
vector<mm_float4> newVelm(numAtoms);
vector<mm_float4> newPosq(paddedNumAtoms);
vector<mm_float4> newVelm(paddedNumAtoms);
vector<mm_int4> newCellOffsets(numAtoms);
for (int group = 0; group < (int) moleculeGroups.size(); group++) {
// Find the center of each molecule.
......@@ -837,7 +845,7 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
molPos[i].z = 0.0f;
for (int j = 0; j < (int)atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i];
const mm_float4& pos = posq->get(atom);
const mm_float4& pos = oldPosq[atom];
molPos[i].x += pos.x;
molPos[i].y += pos.y;
molPos[i].z += pos.z;
......@@ -863,11 +871,11 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
if (enforcePeriodic) {
for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i];
mm_float4 p = posq->get(atom);
mm_float4 p = oldPosq[atom];
p.x -= dx;
p.y -= dy;
p.z -= dz;
posq->set(atom, p);
oldPosq[atom] = p;
posCellOffsets[atom].x -= xcell;
posCellOffsets[atom].y -= ycell;
posCellOffsets[atom].z -= zcell;
......@@ -918,9 +926,9 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
for (int j = 0; j < (int)atoms.size(); j++) {
int oldIndex = mol.offsets[molBins[i].second]+atoms[j];
int newIndex = mol.offsets[i]+atoms[j];
originalIndex[newIndex] = atomIndex->get(oldIndex);
newPosq[newIndex] = posq->get(oldIndex);
newVelm[newIndex] = velm->get(oldIndex);
originalIndex[newIndex] = atomIndex[oldIndex];
newPosq[newIndex] = oldPosq[oldIndex];
newVelm[newIndex] = oldVelm[oldIndex];
newCellOffsets[newIndex] = posCellOffsets[oldIndex];
}
}
......@@ -929,14 +937,12 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
// Update the streams.
for (int i = 0; i < numAtoms; i++) {
posq->set(i, newPosq[i]);
velm->set(i, newVelm[i]);
atomIndex->set(i, originalIndex[i]);
atomIndex[i] = originalIndex[i];
posCellOffsets[i] = newCellOffsets[i];
}
posq->upload();
velm->upload();
atomIndex->upload();
posq->upload(newPosq);
velm->upload(newVelm);
atomIndexDevice->upload(atomIndex);
for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute();
}
......
......@@ -42,7 +42,6 @@
namespace OpenMM {
template <class T>
class OpenCLArray;
class OpenCLForceInfo;
class OpenCLIntegrationUtilities;
......@@ -196,44 +195,57 @@ public:
/**
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
*/
OpenCLArray<mm_float4>& getPosq() {
OpenCLArray& getPosq() {
return *posq;
}
/**
* Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
*/
OpenCLArray<mm_float4>& getVelm() {
OpenCLArray& getVelm() {
return *velm;
}
/**
* Get the array which contains the force on each atom.
*/
OpenCLArray<mm_float4>& getForce() {
OpenCLArray& getForce() {
return *force;
}
/**
* Get the array which contains the buffers in which forces are computed.
*/
OpenCLArray<mm_float4>& getForceBuffers() {
OpenCLArray& getForceBuffers() {
return *forceBuffers;
}
/**
* Get the array which contains a contribution to each force represented as 64 bit fixed point.
*/
OpenCLArray<cl_long>& getLongForceBuffer() {
OpenCLArray& getLongForceBuffer() {
return *longForceBuffer;
}
/**
* Get the array which contains the buffer in which energy is computed.
*/
OpenCLArray<cl_float>& getEnergyBuffer() {
OpenCLArray& getEnergyBuffer() {
return *energyBuffer;
}
/**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
* This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
*/
void* getPinnedBuffer() {
return pinnedMemory;
}
/**
* Get the host-side vector which contains the index of each atom.
*/
const std::vector<int>& getAtomIndex() const {
return atomIndex;
}
/**
* Get the array which contains the index of each atom.
*/
OpenCLArray<cl_int>& getAtomIndex() {
return *atomIndex;
OpenCLArray& getAtomIndexArray() {
return *atomIndexDevice;
}
/**
* Get the number of cells by which the positions are offset.
......@@ -277,11 +289,7 @@ public:
/**
* Set all elements of an array to 0.
*/
void clearBuffer(OpenCLArray<float>& array);
/**
* Set all elements of an array to 0.
*/
void clearBuffer(OpenCLArray<mm_float4>& array);
void clearBuffer(OpenCLArray& array);
/**
* Set all elements of an array to 0.
*
......@@ -307,7 +315,7 @@ public:
* @param array the array containing the buffers to reduce
* @param numBuffers the number of buffers packed into the array
*/
void reduceBuffer(OpenCLArray<mm_float4>& array, int numBuffers);
void reduceBuffer(OpenCLArray& array, int numBuffers);
/**
* Sum the buffesr containing forces.
*/
......@@ -527,13 +535,16 @@ private:
std::vector<Molecule> molecules;
std::vector<MoleculeGroup> moleculeGroups;
std::vector<mm_int4> posCellOffsets;
OpenCLArray<mm_float4>* posq;
OpenCLArray<mm_float4>* velm;
OpenCLArray<mm_float4>* force;
OpenCLArray<mm_float4>* forceBuffers;
OpenCLArray<cl_long>* longForceBuffer;
OpenCLArray<cl_float>* energyBuffer;
OpenCLArray<cl_int>* atomIndex;
cl::Buffer* pinnedBuffer;
void* pinnedMemory;
OpenCLArray* posq;
OpenCLArray* velm;
OpenCLArray* force;
OpenCLArray* forceBuffers;
OpenCLArray* longForceBuffer;
OpenCLArray* energyBuffer;
OpenCLArray* atomIndexDevice;
std::vector<int> atomIndex;
std::vector<cl::Memory*> autoclearBuffers;
std::vector<int> autoclearBufferSizes;
std::vector<ReorderListener*> reorderListeners;
......
......@@ -41,7 +41,7 @@ OpenCLFFT3D::OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize
ykernel = createKernel(zsize, xsize, ysize);
}
void OpenCLFFT3D::execFFT(OpenCLArray<mm_float2>& in, OpenCLArray<mm_float2>& out, bool forward) {
void OpenCLFFT3D::execFFT(OpenCLArray& in, OpenCLArray& out, bool forward) {
int maxSize = xkernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice());
if (context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU)
maxSize = 1;
......
......@@ -72,7 +72,7 @@ public:
* @param out on exit, this contains the transformed data
* @param forward true to perform a forward transform, false to perform an inverse transform
*/
void execFFT(OpenCLArray<mm_float2>& in, OpenCLArray<mm_float2>& out, bool forward = true);
void execFFT(OpenCLArray& in, OpenCLArray& out, bool forward = true);
/**
* Get the smallest legal size for a dimension of the grid (that is, a size with no prime
* factors other than 2, 3, and 5).
......
......@@ -96,12 +96,12 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), hasInitializedPosConstraintKernels(false), hasInitializedVelConstraintKernels(false) {
// Create workspace arrays.
posDelta = new OpenCLArray<mm_float4>(context, context.getPaddedNumAtoms(), "posDelta");
posDelta = OpenCLArray::create<mm_float4>(context, context.getPaddedNumAtoms(), "posDelta");
vector<mm_float4> deltas(posDelta->getSize(), mm_float4(0.0, 0.0, 0.0, 0.0));
posDelta->upload(deltas);
stepSize = new OpenCLArray<mm_float2>(context, 1, "stepSize", true);
stepSize->set(0, mm_float2(0.0f, 0.0f));
stepSize->upload();
stepSize = OpenCLArray::create<mm_float2>(context, 1, "stepSize");
vector<mm_float2> step(1, mm_float2(0.0f, 0.0f));
stepSize->upload(step);
// Create kernels for enforcing constraints.
......@@ -192,8 +192,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
isShakeAtom[atom2] = true;
isShakeAtom[atom3] = true;
}
settleAtoms = new OpenCLArray<mm_int4>(context, atoms.size(), "settleAtoms");
settleParams = new OpenCLArray<mm_float2>(context, params.size(), "settleParams");
settleAtoms = OpenCLArray::create<mm_int4>(context, atoms.size(), "settleAtoms");
settleParams = OpenCLArray::create<mm_float2>(context, params.size(), "settleParams");
settleAtoms->upload(atoms);
settleParams->upload(params);
}
......@@ -274,8 +274,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
isShakeAtom[cluster.peripheralID[2]] = true;
++index;
}
shakeAtoms = new OpenCLArray<mm_int4>(context, atoms.size(), "shakeAtoms");
shakeParams = new OpenCLArray<mm_float4>(context, params.size(), "shakeParams");
shakeAtoms = OpenCLArray::create<mm_int4>(context, atoms.size(), "shakeAtoms");
shakeParams = OpenCLArray::create<mm_float4>(context, params.size(), "shakeParams");
shakeAtoms->upload(atoms);
shakeParams->upload(params);
}
......@@ -457,18 +457,18 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Record the CCMA data structures.
ccmaAtoms = new OpenCLArray<mm_int2>(context, numCCMA, "CcmaAtoms");
ccmaDistance = new OpenCLArray<mm_float4>(context, numCCMA, "CcmaDistance");
ccmaAtomConstraints = new OpenCLArray<cl_int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints");
ccmaNumAtomConstraints = new OpenCLArray<cl_int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaDelta1 = new OpenCLArray<cl_float>(context, numCCMA, "CcmaDelta1");
ccmaDelta2 = new OpenCLArray<cl_float>(context, numCCMA, "CcmaDelta2");
ccmaConverged = new OpenCLArray<cl_int>(context, 2, "CcmaConverged");
ccmaAtoms = OpenCLArray::create<mm_int2>(context, numCCMA, "CcmaAtoms");
ccmaDistance = OpenCLArray::create<mm_float4>(context, numCCMA, "CcmaDistance");
ccmaAtomConstraints = OpenCLArray::create<cl_int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints");
ccmaNumAtomConstraints = OpenCLArray::create<cl_int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaDelta1 = OpenCLArray::create<cl_float>(context, numCCMA, "CcmaDelta1");
ccmaDelta2 = OpenCLArray::create<cl_float>(context, numCCMA, "CcmaDelta2");
ccmaConverged = OpenCLArray::create<cl_int>(context, 2, "CcmaConverged");
ccmaConvergedBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, 2*sizeof(cl_int));
ccmaConvergedMemory = (cl_int*) context.getQueue().enqueueMapBuffer(*ccmaConvergedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, 2*sizeof(cl_int));
ccmaReducedMass = new OpenCLArray<cl_float>(context, numCCMA, "CcmaReducedMass");
ccmaConstraintMatrixColumn = new OpenCLArray<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConstraintMatrixValue = new OpenCLArray<cl_float>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
ccmaReducedMass = OpenCLArray::create<cl_float>(context, numCCMA, "CcmaReducedMass");
ccmaConstraintMatrixColumn = OpenCLArray::create<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConstraintMatrixValue = OpenCLArray::create<cl_float>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
vector<mm_int2> atomsVec(ccmaAtoms->getSize());
vector<mm_float4> distanceVec(ccmaDistance->getSize());
vector<cl_int> atomConstraintsVec(ccmaAtomConstraints->getSize());
......@@ -556,12 +556,12 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
int num2Avg = vsite2AvgAtomVec.size();
int num3Avg = vsite3AvgAtomVec.size();
int numOutOfPlane = vsiteOutOfPlaneAtomVec.size();
vsite2AvgAtoms = new OpenCLArray<mm_int4>(context, max(1, num2Avg), "vsite2AvgAtoms");
vsite2AvgWeights = new OpenCLArray<mm_float2>(context, max(1, num2Avg), "vsite2AvgWeights");
vsite3AvgAtoms = new OpenCLArray<mm_int4>(context, max(1, num3Avg), "vsite3AvgAtoms");
vsite3AvgWeights = new OpenCLArray<mm_float4>(context, max(1, num3Avg), "vsite3AvgWeights");
vsiteOutOfPlaneAtoms = new OpenCLArray<mm_int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms");
vsiteOutOfPlaneWeights = new OpenCLArray<mm_float4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
vsite2AvgAtoms = OpenCLArray::create<mm_int4>(context, max(1, num2Avg), "vsite2AvgAtoms");
vsite2AvgWeights = OpenCLArray::create<mm_float2>(context, max(1, num2Avg), "vsite2AvgWeights");
vsite3AvgAtoms = OpenCLArray::create<mm_int4>(context, max(1, num3Avg), "vsite3AvgAtoms");
vsite3AvgWeights = OpenCLArray::create<mm_float4>(context, max(1, num3Avg), "vsite3AvgWeights");
vsiteOutOfPlaneAtoms = OpenCLArray::create<mm_int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms");
vsiteOutOfPlaneWeights = OpenCLArray::create<mm_float4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
if (num2Avg > 0) {
vsite2AvgAtoms->upload(vsite2AvgAtomVec);
vsite2AvgWeights->upload(vsite2AvgWeightVec);
......@@ -779,8 +779,8 @@ void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNu
// Create the random number arrays.
lastSeed = randomNumberSeed;
random = new OpenCLArray<mm_float4>(context, 32*context.getPaddedNumAtoms(), "random");
randomSeed = new OpenCLArray<mm_int4>(context, context.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed");
random = OpenCLArray::create<mm_float4>(context, 32*context.getPaddedNumAtoms(), "random");
randomSeed = OpenCLArray::create<mm_int4>(context, context.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed");
randomPos = random->getSize();
// Use a quick and dirty RNG to pick seeds for the real random number generator.
......@@ -809,7 +809,7 @@ int OpenCLIntegrationUtilities::prepareRandomNumbers(int numValues) {
}
if (numValues > random->getSize()) {
delete random;
random = new OpenCLArray<mm_float4>(context, numValues, "random");
random = OpenCLArray::create<mm_float4>(context, numValues, "random");
}
randomKernel.setArg<cl_int>(0, random->getSize());
randomKernel.setArg<cl::Buffer>(1, random->getDeviceBuffer());
......
......@@ -46,20 +46,20 @@ public:
/**
* Get the array which contains position deltas.
*/
OpenCLArray<mm_float4>& getPosDelta() {
OpenCLArray& getPosDelta() {
return *posDelta;
}
/**
* Get the array which contains random values. Each element is a float4, whose components
* are independent, normally distributed random numbers with mean 0 and variance 1.
*/
OpenCLArray<mm_float4>& getRandom() {
OpenCLArray& getRandom() {
return *random;
}
/**
* Get the array which contains the current step size.
*/
OpenCLArray<mm_float2>& getStepSize() {
OpenCLArray& getStepSize() {
return *stepSize;
}
/**
......@@ -116,32 +116,32 @@ private:
cl::Kernel ccmaPosUpdateKernel, ccmaVelUpdateKernel;
cl::Kernel vsitePositionKernel, vsiteForceKernel;
cl::Kernel randomKernel;
OpenCLArray<mm_float4>* posDelta;
OpenCLArray<mm_int4>* settleAtoms;
OpenCLArray<mm_float2>* settleParams;
OpenCLArray<mm_int4>* shakeAtoms;
OpenCLArray<mm_float4>* shakeParams;
OpenCLArray<mm_float4>* random;
OpenCLArray<mm_int4>* randomSeed;
OpenCLArray<mm_float2>* stepSize;
OpenCLArray<mm_int2>* ccmaAtoms;
OpenCLArray<mm_float4>* ccmaDistance;
OpenCLArray<cl_float>* ccmaReducedMass;
OpenCLArray<cl_int>* ccmaAtomConstraints;
OpenCLArray<cl_int>* ccmaNumAtomConstraints;
OpenCLArray<cl_int>* ccmaConstraintMatrixColumn;
OpenCLArray<cl_float>* ccmaConstraintMatrixValue;
OpenCLArray<cl_float>* ccmaDelta1;
OpenCLArray<cl_float>* ccmaDelta2;
OpenCLArray<cl_int>* ccmaConverged;
OpenCLArray* posDelta;
OpenCLArray* settleAtoms;
OpenCLArray* settleParams;
OpenCLArray* shakeAtoms;
OpenCLArray* shakeParams;
OpenCLArray* random;
OpenCLArray* randomSeed;
OpenCLArray* stepSize;
OpenCLArray* ccmaAtoms;
OpenCLArray* ccmaDistance;
OpenCLArray* ccmaReducedMass;
OpenCLArray* ccmaAtomConstraints;
OpenCLArray* ccmaNumAtomConstraints;
OpenCLArray* ccmaConstraintMatrixColumn;
OpenCLArray* ccmaConstraintMatrixValue;
OpenCLArray* ccmaDelta1;
OpenCLArray* ccmaDelta2;
OpenCLArray* ccmaConverged;
cl::Buffer* ccmaConvergedBuffer;
cl_int* ccmaConvergedMemory;
OpenCLArray<mm_int4>* vsite2AvgAtoms;
OpenCLArray<mm_float2>* vsite2AvgWeights;
OpenCLArray<mm_int4>* vsite3AvgAtoms;
OpenCLArray<mm_float4>* vsite3AvgWeights;
OpenCLArray<mm_int4>* vsiteOutOfPlaneAtoms;
OpenCLArray<mm_float4>* vsiteOutOfPlaneWeights;
OpenCLArray* vsite2AvgAtoms;
OpenCLArray* vsite2AvgWeights;
OpenCLArray* vsite3AvgAtoms;
OpenCLArray* vsite3AvgWeights;
OpenCLArray* vsiteOutOfPlaneAtoms;
OpenCLArray* vsiteOutOfPlaneWeights;
int randomPos;
int lastSeed, numVsites;
bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels;
......
This diff is collapsed.
......@@ -255,7 +255,7 @@ private:
bool hasInitializedKernel;
OpenCLContext& cl;
System& system;
OpenCLArray<mm_float2>* params;
OpenCLArray* params;
};
/**
......@@ -296,7 +296,7 @@ private:
OpenCLContext& cl;
System& system;
OpenCLParameterSet* params;
OpenCLArray<cl_float>* globals;
OpenCLArray* globals;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
};
......@@ -338,7 +338,7 @@ private:
bool hasInitializedKernel;
OpenCLContext& cl;
System& system;
OpenCLArray<mm_float2>* params;
OpenCLArray* params;
};
/**
......@@ -379,7 +379,7 @@ private:
OpenCLContext& cl;
System& system;
OpenCLParameterSet* params;
OpenCLArray<cl_float>* globals;
OpenCLArray* globals;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
};
......@@ -421,7 +421,7 @@ private:
bool hasInitializedKernel;
OpenCLContext& cl;
System& system;
OpenCLArray<mm_float4>* params;
OpenCLArray* params;
};
/**
......@@ -461,7 +461,7 @@ private:
bool hasInitializedKernel;
OpenCLContext& cl;
System& system;
OpenCLArray<mm_float8>* params;
OpenCLArray* params;
};
/**
......@@ -494,9 +494,9 @@ private:
bool hasInitializedKernel;
OpenCLContext& cl;
System& system;
OpenCLArray<mm_float4>* coefficients;
OpenCLArray<mm_int2>* mapPositions;
OpenCLArray<cl_int>* torsionMaps;
OpenCLArray* coefficients;
OpenCLArray* mapPositions;
OpenCLArray* torsionMaps;
};
/**
......@@ -537,7 +537,7 @@ private:
OpenCLContext& cl;
System& system;
OpenCLParameterSet* params;
OpenCLArray<cl_float>* globals;
OpenCLArray* globals;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
};
......@@ -591,18 +591,18 @@ private:
};
OpenCLContext& cl;
bool hasInitializedKernel;
OpenCLArray<mm_float2>* sigmaEpsilon;
OpenCLArray<mm_float4>* exceptionParams;
OpenCLArray<mm_float2>* cosSinSums;
OpenCLArray<mm_float2>* pmeGrid;
OpenCLArray<mm_float2>* pmeGrid2;
OpenCLArray<cl_float>* pmeBsplineModuliX;
OpenCLArray<cl_float>* pmeBsplineModuliY;
OpenCLArray<cl_float>* pmeBsplineModuliZ;
OpenCLArray<mm_float4>* pmeBsplineTheta;
OpenCLArray<mm_float4>* pmeBsplineDTheta;
OpenCLArray<cl_int>* pmeAtomRange;
OpenCLArray<mm_int2>* pmeAtomGridIndex;
OpenCLArray* sigmaEpsilon;
OpenCLArray* exceptionParams;
OpenCLArray* cosSinSums;
OpenCLArray* pmeGrid;
OpenCLArray* pmeGrid2;
OpenCLArray* pmeBsplineModuliX;
OpenCLArray* pmeBsplineModuliY;
OpenCLArray* pmeBsplineModuliZ;
OpenCLArray* pmeBsplineTheta;
OpenCLArray* pmeBsplineDTheta;
OpenCLArray* pmeAtomRange;
OpenCLArray* pmeAtomGridIndex;
OpenCLSort<SortTrait>* sort;
OpenCLFFT3D* fft;
cl::Kernel ewaldSumsKernel;
......@@ -658,11 +658,11 @@ public:
private:
OpenCLContext& cl;
OpenCLParameterSet* params;
OpenCLArray<cl_float>* globals;
OpenCLArray<mm_float4>* tabulatedFunctionParams;
OpenCLArray* globals;
OpenCLArray* tabulatedFunctionParams;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray<mm_float4>*> tabulatedFunctions;
std::vector<OpenCLArray*> tabulatedFunctions;
System& system;
};
......@@ -704,13 +704,13 @@ private:
bool hasCreatedKernels;
int maxTiles;
OpenCLContext& cl;
OpenCLArray<mm_float2>* params;
OpenCLArray<cl_float>* bornSum;
OpenCLArray<cl_long>* longBornSum;
OpenCLArray<cl_float>* bornRadii;
OpenCLArray<cl_float>* bornForce;
OpenCLArray<cl_long>* longBornForce;
OpenCLArray<cl_float>* obcChain;
OpenCLArray* params;
OpenCLArray* bornSum;
OpenCLArray* longBornSum;
OpenCLArray* bornRadii;
OpenCLArray* bornForce;
OpenCLArray* longBornForce;
OpenCLArray* obcChain;
cl::Kernel computeBornSumKernel;
cl::Kernel reduceBornSumKernel;
cl::Kernel force1Kernel;
......@@ -757,14 +757,14 @@ private:
OpenCLParameterSet* params;
OpenCLParameterSet* computedValues;
OpenCLParameterSet* energyDerivs;
OpenCLArray<cl_long>* longEnergyDerivs;
OpenCLArray<cl_float>* globals;
OpenCLArray<cl_float>* valueBuffers;
OpenCLArray<cl_long>* longValueBuffers;
OpenCLArray<mm_float4>* tabulatedFunctionParams;
OpenCLArray* longEnergyDerivs;
OpenCLArray* globals;
OpenCLArray* valueBuffers;
OpenCLArray* longValueBuffers;
OpenCLArray* tabulatedFunctionParams;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray<mm_float4>*> tabulatedFunctions;
std::vector<OpenCLArray*> tabulatedFunctions;
std::vector<bool> pairValueUsesParam, pairEnergyUsesParam, pairEnergyUsesValue;
System& system;
cl::Kernel pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel;
......@@ -808,7 +808,7 @@ private:
OpenCLContext& cl;
System& system;
OpenCLParameterSet* params;
OpenCLArray<cl_float>* globals;
OpenCLArray* globals;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
};
......@@ -853,17 +853,17 @@ private:
OpenCLContext& cl;
OpenCLParameterSet* donorParams;
OpenCLParameterSet* acceptorParams;
OpenCLArray<cl_float>* globals;
OpenCLArray<mm_int4>* donors;
OpenCLArray<mm_int4>* acceptors;
OpenCLArray<mm_int4>* donorBufferIndices;
OpenCLArray<mm_int4>* acceptorBufferIndices;
OpenCLArray<mm_int4>* donorExclusions;
OpenCLArray<mm_int4>* acceptorExclusions;
OpenCLArray<mm_float4>* tabulatedFunctionParams;
OpenCLArray* globals;
OpenCLArray* donors;
OpenCLArray* acceptors;
OpenCLArray* donorBufferIndices;
OpenCLArray* acceptorBufferIndices;
OpenCLArray* donorExclusions;
OpenCLArray* acceptorExclusions;
OpenCLArray* tabulatedFunctionParams;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray<mm_float4>*> tabulatedFunctions;
std::vector<OpenCLArray*> tabulatedFunctions;
System& system;
cl::Kernel donorKernel, acceptorKernel;
};
......@@ -905,11 +905,11 @@ private:
int numBonds;
OpenCLContext& cl;
OpenCLParameterSet* params;
OpenCLArray<cl_float>* globals;
OpenCLArray<mm_float4>* tabulatedFunctionParams;
OpenCLArray* globals;
OpenCLArray* tabulatedFunctionParams;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray<mm_float4>*> tabulatedFunctions;
std::vector<OpenCLArray*> tabulatedFunctions;
System& system;
};
......@@ -970,7 +970,7 @@ private:
OpenCLContext& cl;
double prevTemp, prevFriction, prevStepSize;
bool hasInitializedKernels;
OpenCLArray<cl_float>* params;
OpenCLArray* params;
cl::Kernel kernel1, kernel2;
};
......@@ -1065,7 +1065,7 @@ private:
OpenCLContext& cl;
bool hasInitializedKernels;
int blockSize;
OpenCLArray<cl_float>* params;
OpenCLArray* params;
cl::Kernel kernel1, kernel2, selectSizeKernel;
double prevTemp, prevFriction, prevErrorTol;
};
......@@ -1138,14 +1138,15 @@ private:
int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters;
mutable bool localValuesAreCurrent;
OpenCLArray<cl_float>* globalValues;
OpenCLArray<cl_float>* contextParameterValues;
OpenCLArray<cl_float>* sumBuffer;
OpenCLArray<cl_float>* energy;
OpenCLArray<mm_float4>* uniformRandoms;
OpenCLArray<mm_int4>* randomSeed;
OpenCLArray* globalValues;
OpenCLArray* contextParameterValues;
OpenCLArray* sumBuffer;
OpenCLArray* energy;
OpenCLArray* uniformRandoms;
OpenCLArray* randomSeed;
OpenCLParameterSet* perDofValues;
mutable std::vector<std::vector<cl_float> > localPerDofValues;
std::vector<float> contextValues;
std::vector<std::vector<cl::Kernel> > kernels;
cl::Kernel sumEnergyKernel, randomKernel;
std::vector<CustomIntegrator::ComputationType> stepType;
......@@ -1185,7 +1186,7 @@ private:
OpenCLContext& cl;
bool hasInitializedKernels;
int randomSeed;
OpenCLArray<cl_int>* atomGroups;
OpenCLArray* atomGroups;
cl::Kernel kernel;
};
......@@ -1226,9 +1227,9 @@ private:
OpenCLContext& cl;
bool hasInitializedKernels;
int numMolecules;
OpenCLArray<mm_float4>* savedPositions;
OpenCLArray<cl_int>* moleculeAtoms;
OpenCLArray<cl_int>* moleculeStartIndex;
OpenCLArray* savedPositions;
OpenCLArray* moleculeAtoms;
OpenCLArray* moleculeStartIndex;
cl::Kernel kernel;
};
......@@ -1280,7 +1281,7 @@ public:
private:
OpenCLContext& cl;
int frequency;
OpenCLArray<mm_float4>* cmMomentum;
OpenCLArray* cmMomentum;
cl::Kernel kernel1, kernel2;
};
......
......@@ -191,14 +191,14 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
exclusionIndicesVec.push_back(iter->second);
}
exclusionRowIndicesVec[++currentRow] = exclusionIndicesVec.size();
exclusionIndices = new OpenCLArray<cl_uint>(context, exclusionIndicesVec.size(), "exclusionIndices");
exclusionRowIndices = new OpenCLArray<cl_uint>(context, exclusionRowIndicesVec.size(), "exclusionRowIndices");
exclusionIndices = OpenCLArray::create<cl_uint>(context, exclusionIndicesVec.size(), "exclusionIndices");
exclusionRowIndices = OpenCLArray::create<cl_uint>(context, exclusionRowIndicesVec.size(), "exclusionRowIndices");
exclusionIndices->upload(exclusionIndicesVec);
exclusionRowIndices->upload(exclusionRowIndicesVec);
// Record the exclusion data.
exclusions = new OpenCLArray<cl_uint>(context, tilesWithExclusions.size()*OpenCLContext::TileSize, "exclusions");
exclusions = OpenCLArray::create<cl_uint>(context, tilesWithExclusions.size()*OpenCLContext::TileSize, "exclusions");
vector<cl_uint> exclusionVec(exclusions->getSize());
for (int i = 0; i < exclusions->getSize(); ++i)
exclusionVec[i] = 0xFFFFFFFF;
......@@ -253,13 +253,13 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
maxInteractingTiles = numTiles;
if (maxInteractingTiles < 1)
maxInteractingTiles = 1;
interactingTiles = new OpenCLArray<mm_ushort2>(context, maxInteractingTiles, "interactingTiles");
interactionFlags = new OpenCLArray<cl_uint>(context, context.getSIMDWidth() == 32 ? maxInteractingTiles : (deviceIsCpu ? 2*maxInteractingTiles : 1), "interactionFlags");
interactionCount = new OpenCLArray<cl_uint>(context, 1, "interactionCount", true);
blockCenter = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockCenter");
blockBoundingBox = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockBoundingBox");
interactionCount->set(0, 0);
interactionCount->upload();
interactingTiles = OpenCLArray::create<mm_ushort2>(context, maxInteractingTiles, "interactingTiles");
interactionFlags = OpenCLArray::create<cl_uint>(context, context.getSIMDWidth() == 32 ? maxInteractingTiles : (deviceIsCpu ? 2*maxInteractingTiles : 1), "interactionFlags");
interactionCount = OpenCLArray::create<cl_uint>(context, 1, "interactionCount");
blockCenter = OpenCLArray::create<mm_float4>(context, numAtomBlocks, "blockCenter");
blockBoundingBox = OpenCLArray::create<mm_float4>(context, numAtomBlocks, "blockBoundingBox");
vector<cl_uint> count(1, 0);
interactionCount->upload(count);
}
// Create kernels.
......@@ -353,26 +353,27 @@ void OpenCLNonbondedUtilities::computeInteractions() {
void OpenCLNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff)
return;
interactionCount->download();
if (interactionCount->get(0) <= (unsigned int) interactingTiles->getSize())
unsigned int* pinnedInteractionCount = (unsigned int*) context.getPinnedBuffer();
interactionCount->download(pinnedInteractionCount);
if (pinnedInteractionCount[0] <= (unsigned int) interactingTiles->getSize())
return;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
int newSize = (int) (1.2*interactionCount->get(0));
int newSize = (int) (1.2*pinnedInteractionCount[0]);
int numTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
if (newSize > numTiles)
newSize = numTiles;
delete interactingTiles;
interactingTiles = new OpenCLArray<mm_ushort2>(context, newSize, "interactingTiles");
interactingTiles = OpenCLArray::create<mm_ushort2>(context, newSize, "interactingTiles");
forceKernel.setArg<cl::Buffer>(8, interactingTiles->getDeviceBuffer());
forceKernel.setArg<cl_uint>(12, newSize);
findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl_uint>(9, newSize);
if (context.getSIMDWidth() == 32 || deviceIsCpu) {
delete interactionFlags;
interactionFlags = new OpenCLArray<cl_uint>(context, deviceIsCpu ? 2*newSize : newSize, "interactionFlags");
interactionFlags = OpenCLArray::create<cl_uint>(context, deviceIsCpu ? 2*newSize : newSize, "interactionFlags");
forceKernel.setArg<cl::Buffer>(13, interactionFlags->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactionFlags->getDeviceBuffer());
if (!deviceIsCpu) {
......
......@@ -170,49 +170,49 @@ public:
/**
* Get the array containing the center of each atom block.
*/
OpenCLArray<mm_float4>& getBlockCenters() {
OpenCLArray& getBlockCenters() {
return *blockCenter;
}
/**
* Get the array containing the dimensions of each atom block.
*/
OpenCLArray<mm_float4>& getBlockBoundingBoxes() {
OpenCLArray& getBlockBoundingBoxes() {
return *blockBoundingBox;
}
/**
* Get the array whose first element contains the number of tiles with interactions.
*/
OpenCLArray<cl_uint>& getInteractionCount() {
OpenCLArray& getInteractionCount() {
return *interactionCount;
}
/**
* Get the array containing tiles with interactions.
*/
OpenCLArray<mm_ushort2>& getInteractingTiles() {
OpenCLArray& getInteractingTiles() {
return *interactingTiles;
}
/**
* Get the array containing flags for tiles with interactions.
*/
OpenCLArray<cl_uint>& getInteractionFlags() {
OpenCLArray& getInteractionFlags() {
return *interactionFlags;
}
/**
* Get the array containing exclusion flags.
*/
OpenCLArray<cl_uint>& getExclusions() {
OpenCLArray& getExclusions() {
return *exclusions;
}
/**
* Get the array containing the index into the exclusion array for each tile.
*/
OpenCLArray<cl_uint>& getExclusionIndices() {
OpenCLArray& getExclusionIndices() {
return *exclusionIndices;
}
/**
* Get the array listing where the exclusion data starts for each row.
*/
OpenCLArray<cl_uint>& getExclusionRowIndices() {
OpenCLArray& getExclusionRowIndices() {
return *exclusionRowIndices;
}
/**
......@@ -250,14 +250,14 @@ private:
cl::Kernel findBlockBoundsKernel;
cl::Kernel findInteractingBlocksKernel;
cl::Kernel findInteractionsWithinBlocksKernel;
OpenCLArray<cl_uint>* exclusions;
OpenCLArray<cl_uint>* exclusionIndices;
OpenCLArray<cl_uint>* exclusionRowIndices;
OpenCLArray<mm_ushort2>* interactingTiles;
OpenCLArray<cl_uint>* interactionFlags;
OpenCLArray<cl_uint>* interactionCount;
OpenCLArray<mm_float4>* blockCenter;
OpenCLArray<mm_float4>* blockBoundingBox;
OpenCLArray* exclusions;
OpenCLArray* exclusionIndices;
OpenCLArray* exclusionRowIndices;
OpenCLArray* interactingTiles;
OpenCLArray* interactionFlags;
OpenCLArray* interactionCount;
OpenCLArray* blockCenter;
OpenCLArray* blockBoundingBox;
std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters;
std::vector<ParameterInfo> arguments;
......
......@@ -130,8 +130,8 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
OpenCLContext& cl0 = *data.contexts[0];
if (contextForces == NULL) {
contextForces = new OpenCLArray<mm_float4>(cl0, &cl0.getForceBuffers().getDeviceBuffer(),
data.contexts.size()*cl0.getPaddedNumAtoms(), "contextForces", true);
contextForces = OpenCLArray::create<mm_float4>(cl0, &cl0.getForceBuffers().getDeviceBuffer(),
data.contexts.size()*cl0.getPaddedNumAtoms(), "contextForces");
int bufferBytes = (data.contexts.size()-1)*cl0.getPaddedNumAtoms()*sizeof(mm_float4);
pinnedPositionBuffer = new cl::Buffer(cl0.getContext(), CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedPositionMemory = (mm_float4*) cl0.getQueue().enqueueMapBuffer(*pinnedPositionBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment