"wrappers/python/vscode:/vscode.git/clone" did not exist on "bf3def120a36b03d5d3feea188f45013020c8dde"
Commit 1107aa83 authored by Peter Eastman's avatar Peter Eastman
Browse files

OpenCLArray is no longer templatized and doesn't provide a host buffer. This...

OpenCLArray is no longer templatized and doesn't provide a host buffer.  This is in preparation for adding mixed/double precision support.
parent 5980100d
...@@ -45,7 +45,7 @@ using namespace std; ...@@ -45,7 +45,7 @@ using namespace std;
CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), cutoff(-1.0), useCutoff(false), anyExclusions(false), CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), cutoff(-1.0), useCutoff(false), anyExclusions(false),
exclusionIndices(NULL), exclusionRowIndices(NULL), exclusions(NULL), interactingTiles(NULL), interactionFlags(NULL), exclusionIndices(NULL), exclusionRowIndices(NULL), exclusions(NULL), interactingTiles(NULL), interactionFlags(NULL),
interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), pinnedInteractionCount(NULL), nonbondedForceGroup(0) { interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), nonbondedForceGroup(0) {
// Decide how many thread blocks to use. // Decide how many thread blocks to use.
string errorMessage = "Error initializing nonbonded utilities"; string errorMessage = "Error initializing nonbonded utilities";
...@@ -72,8 +72,6 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() { ...@@ -72,8 +72,6 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
delete blockCenter; delete blockCenter;
if (blockBoundingBox != NULL) if (blockBoundingBox != NULL)
delete blockBoundingBox; delete blockBoundingBox;
if (pinnedInteractionCount != NULL)
cuMemFreeHost(pinnedInteractionCount);
} }
void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) { void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) {
...@@ -240,9 +238,8 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -240,9 +238,8 @@ void CudaNonbondedUtilities::initialize(const System& system) {
blockCenter = CudaArray::create<float4>(context, numAtomBlocks, "blockCenter"); blockCenter = CudaArray::create<float4>(context, numAtomBlocks, "blockCenter");
blockBoundingBox = CudaArray::create<float4>(context, numAtomBlocks, "blockBoundingBox"); blockBoundingBox = CudaArray::create<float4>(context, numAtomBlocks, "blockBoundingBox");
} }
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedInteractionCount, sizeof(unsigned int), 0)); vector<unsigned int> count(1, 0);
pinnedInteractionCount[0] = 0; interactionCount->upload(count);
interactionCount->upload(pinnedInteractionCount);
} }
// Create kernels. // Create kernels.
...@@ -325,6 +322,7 @@ void CudaNonbondedUtilities::computeInteractions() { ...@@ -325,6 +322,7 @@ void CudaNonbondedUtilities::computeInteractions() {
void CudaNonbondedUtilities::updateNeighborListSize() { void CudaNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff) if (!useCutoff)
return; return;
unsigned int* pinnedInteractionCount = (unsigned int*) context.getPinnedBuffer();
interactionCount->download(pinnedInteractionCount); interactionCount->download(pinnedInteractionCount);
if (pinnedInteractionCount[0] <= (unsigned int) maxTiles) if (pinnedInteractionCount[0] <= (unsigned int) maxTiles)
return; return;
......
...@@ -259,7 +259,6 @@ private: ...@@ -259,7 +259,6 @@ private:
CudaArray* interactionCount; CudaArray* interactionCount;
CudaArray* blockCenter; CudaArray* blockCenter;
CudaArray* blockBoundingBox; CudaArray* blockBoundingBox;
unsigned int* pinnedInteractionCount;
std::vector<void*> forceArgs, findBlockBoundsArgs, findInteractingBlocksArgs, findInteractionsWithinBlocksArgs; std::vector<void*> forceArgs, findBlockBoundsArgs, findInteractingBlocksArgs, findInteractionsWithinBlocksArgs;
std::vector<std::vector<int> > atomExclusions; std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters; std::vector<ParameterInfo> parameters;
......
...@@ -432,6 +432,9 @@ void testLargeSystem() { ...@@ -432,6 +432,9 @@ void testLargeSystem() {
cuState = cuContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy); cuState = cuContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy); referenceState = referenceContext.getState(State::Positions | State::Velocities | State::Forces | State::Energy);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
double dx = cuState.getPositions()[i][0]-referenceState.getPositions()[i][0];
double dy = cuState.getPositions()[i][1]-referenceState.getPositions()[i][1];
double dz = cuState.getPositions()[i][2]-referenceState.getPositions()[i][2];
ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][0]-referenceState.getPositions()[i][0], boxSize), 0, tol); ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][0]-referenceState.getPositions()[i][0], boxSize), 0, tol);
ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][1]-referenceState.getPositions()[i][1], boxSize), 0, tol); ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][1]-referenceState.getPositions()[i][1], boxSize), 0, tol);
ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][2]-referenceState.getPositions()[i][2], boxSize), 0, tol); ASSERT_EQUAL_TOL(fmod(cuState.getPositions()[i][2]-referenceState.getPositions()[i][2], boxSize), 0, tol);
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLArray.h"
#include <iostream>
#include <sstream>
#include <vector>
using namespace OpenMM;
OpenCLArray::OpenCLArray(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags) :
context(context), size(size), elementSize(elementSize), name(name), ownsBuffer(true) {
try {
buffer = new cl::Buffer(context.getContext(), flags, size*elementSize);
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error creating array "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
OpenCLArray::OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name) :
context(context), buffer(buffer), size(size), elementSize(elementSize), name(name), ownsBuffer(false) {
}
OpenCLArray::~OpenCLArray() {
if (ownsBuffer)
delete buffer;
}
void OpenCLArray::upload(const void* data, bool blocking) {
try {
context.getQueue().enqueueWriteBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*elementSize, data);
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error uploading array "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
void OpenCLArray::download(void* data, bool blocking) const {
try {
context.getQueue().enqueueReadBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*elementSize, data);
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error downloading array "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
void OpenCLArray::copyTo(OpenCLArray& dest) const {
if (dest.getSize() != size || dest.getElementSize() != elementSize)
throw OpenMMException("Error copying array "+name+" to "+dest.getName()+": The destination array does not match the size of the array");
try {
context.getQueue().enqueueCopyBuffer(*buffer, dest.getDeviceBuffer(), 0, 0, size*elementSize);
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error copying array "<<name<<" to "<<dest.getName()<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009 Stanford University and the Authors. * * Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -37,62 +37,70 @@ namespace OpenMM { ...@@ -37,62 +37,70 @@ namespace OpenMM {
/** /**
* This class encapsulates an OpenCL Buffer. It provides a simplified API for working with it, * This class encapsulates an OpenCL Buffer. It provides a simplified API for working with it,
* an optionally includes a buffer in host memory for copying data to and from the OpenCL Buffer. * and for copying data to and from the OpenCL Buffer.
*/ */
template <class T>
class OpenCLArray { class OpenCLArray {
public: public:
/** /**
* Create an OpenCLArray object. * Create an OpenCLArray object. The object is allocated on the heap with the "new" operator.
* The template argument is the data type of each array element.
* *
* @param context the context for which to create the array * @param context the context for which to create the array
* @param size the number of elements in the array * @param size the number of elements in the array
* @param name the name of the array * @param name the name of the array
* @param createHostBuffer specifies whether to create a buffer in host memory for copying data to and from
* the OpenCL Buffer
* @param flags the set of flags to specify when creating the OpenCL Buffer * @param flags the set of flags to specify when creating the OpenCL Buffer
*/ */
OpenCLArray(OpenCLContext& context, int size, const std::string& name, bool createHostBuffer = false, cl_int flags = CL_MEM_READ_WRITE) : template <class T>
context(context), size(size), name(name), local(createHostBuffer ? size : 0), ownsBuffer(true) { static OpenCLArray* create(OpenCLContext& context, int size, const std::string& name, cl_int flags = CL_MEM_READ_WRITE) {
try { return new OpenCLArray(context, size, sizeof(T), name, flags);
buffer = new cl::Buffer(context.getContext(), flags, size*sizeof(T));
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error creating array "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
} }
/** /**
* Create an OpenCLArray object the uses a preexisting Buffer. * Create an OpenCLArray object that uses a preexisting Buffer. The object is allocated on the heap with the "new" operator.
* The template argument is the data type of each array element.
* *
* @param context the context for which to create the array * @param context the context for which to create the array
* @param buffer the OpenCL Buffer this object encapsulates * @param buffer the OpenCL Buffer this object encapsulates
* @param size the number of elements in the array * @param size the number of elements in the array
* @param name the name of the array * @param name the name of the array
* @param createHostBuffer specifies whether to create a buffer in host memory for copying data to and from
* the OpenCL Buffer
*/ */
OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, const std::string& name, bool createHostBuffer = false) : template <class T>
context(context), buffer(buffer), size(size), name(name), local(createHostBuffer ? size : 0), ownsBuffer(false) { static OpenCLArray* create(OpenCLContext& context, cl::Buffer* buffer, int size, const std::string& name) {
} return new OpenCLArray(context, buffer, size, sizeof(T), name);
~OpenCLArray() {
if (ownsBuffer)
delete buffer;
}
const T& operator[](int index) const {
return local[index];
}
T& operator[](int index) {
return local[index];
} }
/**
* Create an OpenCLArray object.
*
* @param context the context for which to create the array
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
* @param flags the set of flags to specify when creating the OpenCL Buffer
*/
OpenCLArray(OpenCLContext& context, int size, int elementSize, const std::string& name, cl_int flags = CL_MEM_READ_WRITE);
/**
* Create an OpenCLArray object that uses a preexisting Buffer.
*
* @param context the context for which to create the array
* @param buffer the OpenCL Buffer this object encapsulates
* @param size the number of elements in the array
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
OpenCLArray(OpenCLContext& context, cl::Buffer* buffer, int size, int elementSize, const std::string& name);
~OpenCLArray();
/** /**
* Get the size of the array. * Get the size of the array.
*/ */
int getSize() const { int getSize() const {
return size; return size;
} }
/**
* Get the size of each element in bytes.
*/
int getElementSize() const {
return elementSize;
}
/** /**
* Get the name of the array. * Get the name of the array.
*/ */
...@@ -105,85 +113,50 @@ public: ...@@ -105,85 +113,50 @@ public:
cl::Buffer& getDeviceBuffer() { cl::Buffer& getDeviceBuffer() {
return *buffer; return *buffer;
} }
/**
* Get a pointer to the host buffer.
*/
T* getHostBuffer() {
return &local[0];
}
/**
* Get an element of the host buffer.
*/
const T& get(int index) const {
return local[index];
}
/**
* Set an element of the host buffer.
*/
void set(int index, const T& value) {
local[index] = value;
}
/** /**
* Copy the values in a vector to the Buffer. * Copy the values in a vector to the Buffer.
*/ */
void upload(std::vector<T>& data, bool blocking = true) { template <class T>
void upload(const std::vector<T>& data, bool blocking = true) {
if (sizeof(T) != elementSize || data.size() != size)
throw OpenMMException("Error uploading array "+name+": The specified vector does not match the size of the array");
upload(&data[0], blocking); upload(&data[0], blocking);
} }
/** /**
* Copy the values in the Buffer to a vector. * Copy the values in the Buffer to a vector.
*/ */
void download(std::vector<T>& data) const { template <class T>
void download(std::vector<T>& data, bool blocking = true) const {
if (sizeof(T) != elementSize)
throw OpenMMException("Error downloading array "+name+": The specified vector has the wrong element size");
if (data.size() != size) if (data.size() != size)
data.resize(size); data.resize(size);
download(&data[0]); download(&data[0], blocking);
} }
/** /**
* Copy the values in an array to the Buffer. * Copy the values in an array to the Buffer.
*
* @param data the data to copy
* @param blocking if true, this call will block until the transfer is complete.
*/ */
void upload(T* data, bool blocking = true) { void upload(const void* data, bool blocking = true);
try {
context.getQueue().enqueueWriteBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*sizeof(T), data);
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error uploading array "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
/** /**
* Copy the values in the Buffer to an array. * Copy the values in the Buffer to an array.
*
* @param data the array to copy the memory to
* @param blocking if true, this call will block until the transfer is complete.
*/ */
void download(T* data) const { void download(void* data, bool blocking = true) const;
try {
context.getQueue().enqueueReadBuffer(*buffer, CL_TRUE, 0, size*sizeof(T), data);
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error downloading array "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
/**
* Copy the values in the host buffer to the OpenCL Buffer.
*/
void upload(bool blocking = true) {
if (local.size() == 0)
throw OpenMMException(name+": Called upload() on an OpenCLArray with no host buffer");
upload(local, blocking);
}
/** /**
* Copy the values in the Buffer to the host buffer. * Copy the values in the Buffer to a second OpenCLArray.
*
* @param dest the destination array to copy to
*/ */
void download() { void copyTo(OpenCLArray& dest) const;
if (local.size() == 0)
throw OpenMMException(name+": Called download() on an OpenCLArray with no host buffer");
download(local);
}
private: private:
OpenCLContext& context; OpenCLContext& context;
cl::Buffer* buffer; cl::Buffer* buffer;
std::vector<T> local; int size, elementSize;
int size;
bool ownsBuffer; bool ownsBuffer;
std::string name; std::string name;
}; };
......
...@@ -87,7 +87,7 @@ void OpenCLBondedUtilities::initialize(const System& system) { ...@@ -87,7 +87,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for (int atom = 0; atom < numAtoms; atom++) for (int atom = 0; atom < numAtoms; atom++)
indexVec[bond*width+atom] = forceAtoms[i][bond][atom]; indexVec[bond*width+atom] = forceAtoms[i][bond][atom];
} }
OpenCLArray<cl_uint>* indices = new OpenCLArray<cl_uint>(context, indexVec.size(), "bondedIndices"); OpenCLArray* indices = OpenCLArray::create<cl_uint>(context, indexVec.size(), "bondedIndices");
indices->upload(indexVec); indices->upload(indexVec);
atomIndices.push_back(indices); atomIndices.push_back(indices);
bufferVec[i].resize(width*numBonds, 0); bufferVec[i].resize(width*numBonds, 0);
...@@ -151,7 +151,7 @@ void OpenCLBondedUtilities::initialize(const System& system) { ...@@ -151,7 +151,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
for (int bond = 0; bond < numBonds; bond++) for (int bond = 0; bond < numBonds; bond++)
for (int atom = 0; atom < numAtoms; atom++) for (int atom = 0; atom < numAtoms; atom++)
bufferVec[force][bond*width+atom] += bufferCounter[forceSets[i][k]][forceAtoms[force][bond][atom]]; bufferVec[force][bond*width+atom] += bufferCounter[forceSets[i][k]][forceAtoms[force][bond][atom]];
OpenCLArray<cl_uint>* buffers = new OpenCLArray<cl_uint>(context, bufferVec[force].size(), "bondedBufferIndices"); OpenCLArray* buffers = OpenCLArray::create<cl_uint>(context, bufferVec[force].size(), "bondedBufferIndices");
buffers->upload(bufferVec[force]); buffers->upload(bufferVec[force]);
bufferIndices[force] = buffers; bufferIndices[force] = buffers;
} }
......
...@@ -134,8 +134,8 @@ private: ...@@ -134,8 +134,8 @@ private:
std::vector<std::vector<int> > forceSets; std::vector<std::vector<int> > forceSets;
std::vector<cl::Memory*> arguments; std::vector<cl::Memory*> arguments;
std::vector<std::string> argTypes; std::vector<std::string> argTypes;
std::vector<OpenCLArray<cl_uint>*> atomIndices; std::vector<OpenCLArray*> atomIndices;
std::vector<OpenCLArray<cl_uint>*> bufferIndices; std::vector<OpenCLArray*> bufferIndices;
std::vector<std::string> prefixCode; std::vector<std::string> prefixCode;
int numForceBuffers, maxBonds; int numForceBuffers, maxBonds;
bool hasInitializedKernels; bool hasInitializedKernels;
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
using namespace OpenMM; using namespace OpenMM;
OpenCLCompact::OpenCLCompact(OpenCLContext& context) : context(context), dgBlockCounts(NULL) { OpenCLCompact::OpenCLCompact(OpenCLContext& context) : context(context), dgBlockCounts(NULL) {
dgBlockCounts = new OpenCLArray<cl_uint>(context, context.getNumThreadBlocks(), "dgBlockCounts"); dgBlockCounts = OpenCLArray::create<cl_uint>(context, context.getNumThreadBlocks(), "dgBlockCounts");
cl::Program program = context.createProgram(OpenCLKernelSources::compact); cl::Program program = context.createProgram(OpenCLKernelSources::compact);
countKernel = cl::Kernel(program, "countElts"); countKernel = cl::Kernel(program, "countElts");
moveValidKernel = cl::Kernel(program, "moveValidElementsStaged"); moveValidKernel = cl::Kernel(program, "moveValidElementsStaged");
...@@ -41,7 +41,7 @@ OpenCLCompact::~OpenCLCompact() { ...@@ -41,7 +41,7 @@ OpenCLCompact::~OpenCLCompact() {
delete dgBlockCounts; delete dgBlockCounts;
} }
void OpenCLCompact::compactStream(OpenCLArray<cl_uint>& dOut, OpenCLArray<cl_uint>& dIn, OpenCLArray<cl_uint>& dValid, OpenCLArray<cl_uint>& numValid) { void OpenCLCompact::compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid) {
// Figure out # elements per block // Figure out # elements per block
unsigned int len = dIn.getSize(); unsigned int len = dIn.getSize();
unsigned int numBlocks = context.getNumThreadBlocks(); unsigned int numBlocks = context.getNumThreadBlocks();
......
...@@ -33,10 +33,10 @@ class OPENMM_EXPORT OpenCLCompact { ...@@ -33,10 +33,10 @@ class OPENMM_EXPORT OpenCLCompact {
public: public:
OpenCLCompact(OpenCLContext& context); OpenCLCompact(OpenCLContext& context);
~OpenCLCompact(); ~OpenCLCompact();
void compactStream(OpenCLArray<cl_uint>& dOut, OpenCLArray<cl_uint>& dIn, OpenCLArray<cl_uint>& dValid, OpenCLArray<cl_uint>& numValid); void compactStream(OpenCLArray& dOut, OpenCLArray& dIn, OpenCLArray& dValid, OpenCLArray& numValid);
private: private:
OpenCLContext& context; OpenCLContext& context;
OpenCLArray<cl_uint>* dgBlockCounts; OpenCLArray* dgBlockCounts;
cl::Kernel countKernel; cl::Kernel countKernel;
cl::Kernel moveValidKernel; cl::Kernel moveValidKernel;
}; };
......
...@@ -67,7 +67,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i ...@@ -67,7 +67,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, OpenCLPlatform::PlatformData& platformData) : OpenCLContext::OpenCLContext(const System& system, int platformIndex, int deviceIndex, OpenCLPlatform::PlatformData& platformData) :
system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), atomsWereReordered(false), posq(NULL), system(system), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), atomsWereReordered(false), posq(NULL),
velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndex(NULL), integration(NULL), velm(NULL), forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL),
bonded(NULL), nonbonded(NULL), thread(NULL) { bonded(NULL), nonbonded(NULL), thread(NULL) {
try { try {
contextIndex = platformData.contexts.size(); contextIndex = platformData.contexts.size();
...@@ -217,8 +217,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -217,8 +217,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
numThreadBlocks = numThreadBlocksPerComputeUnit*device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(); numThreadBlocks = numThreadBlocksPerComputeUnit*device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
bonded = new OpenCLBondedUtilities(*this); bonded = new OpenCLBondedUtilities(*this);
nonbonded = new OpenCLNonbondedUtilities(*this); nonbonded = new OpenCLNonbondedUtilities(*this);
posq = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "posq", true); posq = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms, "posq");
velm = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "velm", true); velm = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms, "velm");
posCellOffsets.resize(paddedNumAtoms, mm_int4(0, 0, 0, 0)); posCellOffsets.resize(paddedNumAtoms, mm_int4(0, 0, 0, 0));
} }
catch (cl::Error err) { catch (cl::Error err) {
...@@ -242,19 +242,20 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -242,19 +242,20 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use. // Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
cl::Kernel accuracyKernel(utilities, "determineNativeAccuracy"); cl::Kernel accuracyKernel(utilities, "determineNativeAccuracy");
OpenCLArray<mm_float8> values(*this, 20, "values", true); OpenCLArray valuesArray(*this, 20, sizeof(mm_float8), "values");
vector<mm_float8> values(valuesArray.getSize());
float nextValue = 1e-4f; float nextValue = 1e-4f;
for (int i = 0; i < values.getSize(); ++i) { for (int i = 0; i < (int) values.size(); ++i) {
values[i].s0 = nextValue; values[i].s0 = nextValue;
nextValue *= (float) M_PI; nextValue *= (float) M_PI;
} }
values.upload(); valuesArray.upload(values);
accuracyKernel.setArg<cl::Buffer>(0, values.getDeviceBuffer()); accuracyKernel.setArg<cl::Buffer>(0, valuesArray.getDeviceBuffer());
accuracyKernel.setArg<cl_int>(1, values.getSize()); accuracyKernel.setArg<cl_int>(1, values.size());
executeKernel(accuracyKernel, values.getSize()); executeKernel(accuracyKernel, values.size());
values.download(); valuesArray.download(values);
double maxSqrtError = 0.0, maxRsqrtError = 0.0, maxRecipError = 0.0, maxExpError = 0.0, maxLogError = 0.0; double maxSqrtError = 0.0, maxRsqrtError = 0.0, maxRecipError = 0.0, maxExpError = 0.0, maxLogError = 0.0;
for (int i = 0; i < values.getSize(); ++i) { for (int i = 0; i < (int) values.size(); ++i) {
double v = values[i].s0; double v = values[i].s0;
double correctSqrt = sqrt(v); double correctSqrt = sqrt(v);
maxSqrtError = max(maxSqrtError, fabs(correctSqrt-values[i].s1)/correctSqrt); maxSqrtError = max(maxSqrtError, fabs(correctSqrt-values[i].s1)/correctSqrt);
...@@ -283,6 +284,8 @@ OpenCLContext::~OpenCLContext() { ...@@ -283,6 +284,8 @@ OpenCLContext::~OpenCLContext() {
delete forces[i]; delete forces[i];
for (int i = 0; i < (int) reorderListeners.size(); i++) for (int i = 0; i < (int) reorderListeners.size(); i++)
delete reorderListeners[i]; delete reorderListeners[i];
if (pinnedBuffer != NULL)
delete pinnedBuffer;
if (posq != NULL) if (posq != NULL)
delete posq; delete posq;
if (velm != NULL) if (velm != NULL)
...@@ -295,8 +298,8 @@ OpenCLContext::~OpenCLContext() { ...@@ -295,8 +298,8 @@ OpenCLContext::~OpenCLContext() {
delete longForceBuffer; delete longForceBuffer;
if (energyBuffer != NULL) if (energyBuffer != NULL)
delete energyBuffer; delete energyBuffer;
if (atomIndex != NULL) if (atomIndexDevice != NULL)
delete atomIndex; delete atomIndexDevice;
if (integration != NULL) if (integration != NULL)
delete integration; delete integration;
if (bonded != NULL) if (bonded != NULL)
...@@ -308,19 +311,20 @@ OpenCLContext::~OpenCLContext() { ...@@ -308,19 +311,20 @@ OpenCLContext::~OpenCLContext() {
} }
void OpenCLContext::initialize() { void OpenCLContext::initialize() {
vector<mm_float4> v(paddedNumAtoms, mm_float4(0, 0, 0, 0));
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
double mass = system.getParticleMass(i); double mass = system.getParticleMass(i);
(*velm)[i].w = (float) (mass == 0.0 ? 0.0 : 1.0/mass); v[i].w = (float) (mass == 0.0 ? 0.0 : 1.0/mass);
} }
velm->upload(); velm->upload(v);
bonded->initialize(system); bonded->initialize(system);
numForceBuffers = platformData.contexts.size(); numForceBuffers = platformData.contexts.size();
numForceBuffers = std::max(numForceBuffers, bonded->getNumForceBuffers()); numForceBuffers = std::max(numForceBuffers, bonded->getNumForceBuffers());
for (int i = 0; i < (int) forces.size(); i++) for (int i = 0; i < (int) forces.size(); i++)
numForceBuffers = std::max(numForceBuffers, forces[i]->getRequiredForceBuffers()); numForceBuffers = std::max(numForceBuffers, forces[i]->getRequiredForceBuffers());
forceBuffers = new OpenCLArray<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers", false); forceBuffers = OpenCLArray::create<mm_float4>(*this, paddedNumAtoms*numForceBuffers, "forceBuffers");
if (supports64BitGlobalAtomics) { if (supports64BitGlobalAtomics) {
longForceBuffer = new OpenCLArray<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer", false); longForceBuffer = OpenCLArray::create<cl_long>(*this, 3*paddedNumAtoms, "longForceBuffer");
reduceForcesKernel.setArg<cl::Buffer>(0, longForceBuffer->getDeviceBuffer()); reduceForcesKernel.setArg<cl::Buffer>(0, longForceBuffer->getDeviceBuffer());
reduceForcesKernel.setArg<cl::Buffer>(1, forceBuffers->getDeviceBuffer()); reduceForcesKernel.setArg<cl::Buffer>(1, forceBuffers->getDeviceBuffer());
reduceForcesKernel.setArg<cl_int>(2, paddedNumAtoms); reduceForcesKernel.setArg<cl_int>(2, paddedNumAtoms);
...@@ -328,13 +332,17 @@ void OpenCLContext::initialize() { ...@@ -328,13 +332,17 @@ void OpenCLContext::initialize() {
addAutoclearBuffer(longForceBuffer->getDeviceBuffer(), longForceBuffer->getSize()*2); addAutoclearBuffer(longForceBuffer->getDeviceBuffer(), longForceBuffer->getSize()*2);
} }
addAutoclearBuffer(forceBuffers->getDeviceBuffer(), forceBuffers->getSize()*4); addAutoclearBuffer(forceBuffers->getDeviceBuffer(), forceBuffers->getSize()*4);
force = new OpenCLArray<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force", true); force = OpenCLArray::create<mm_float4>(*this, &forceBuffers->getDeviceBuffer(), paddedNumAtoms, "force");
energyBuffer = new OpenCLArray<cl_float>(*this, max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()), "energyBuffer", true); energyBuffer = OpenCLArray::create<cl_float>(*this, max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()), "energyBuffer");
addAutoclearBuffer(energyBuffer->getDeviceBuffer(), energyBuffer->getSize()); addAutoclearBuffer(energyBuffer->getDeviceBuffer(), energyBuffer->getSize());
atomIndex = new OpenCLArray<cl_int>(*this, paddedNumAtoms, "atomIndex", true); int bufferBytes = max(posq->getSize()*sizeof(mm_float4), energyBuffer->getSize()*sizeof(cl_float));
pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedMemory = queue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
atomIndexDevice = OpenCLArray::create<cl_int>(*this, paddedNumAtoms, "atomIndexDevice");
atomIndex.resize(paddedNumAtoms);
for (int i = 0; i < paddedNumAtoms; ++i) for (int i = 0; i < paddedNumAtoms; ++i)
(*atomIndex)[i] = i; atomIndex[i] = i;
atomIndex->upload(); atomIndexDevice->upload(atomIndex);
findMoleculeGroups(); findMoleculeGroups();
moleculesInvalid = false; moleculesInvalid = false;
nonbonded->initialize(system); nonbonded->initialize(system);
...@@ -410,12 +418,8 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi ...@@ -410,12 +418,8 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
} }
} }
void OpenCLContext::clearBuffer(OpenCLArray<float>& array) { void OpenCLContext::clearBuffer(OpenCLArray& array) {
clearBuffer(array.getDeviceBuffer(), array.getSize()); clearBuffer(array.getDeviceBuffer(), array.getSize()*array.getElementSize()/sizeof(cl_float));
}
void OpenCLContext::clearBuffer(OpenCLArray<mm_float4>& array) {
clearBuffer(array.getDeviceBuffer(), array.getSize()*4);
} }
void OpenCLContext::clearBuffer(cl::Memory& memory, int size) { void OpenCLContext::clearBuffer(cl::Memory& memory, int size) {
...@@ -500,7 +504,7 @@ void OpenCLContext::reduceForces() { ...@@ -500,7 +504,7 @@ void OpenCLContext::reduceForces() {
reduceBuffer(*forceBuffers, numForceBuffers); reduceBuffer(*forceBuffers, numForceBuffers);
} }
void OpenCLContext::reduceBuffer(OpenCLArray<mm_float4>& array, int numBuffers) { void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
int bufferSize = array.getSize()/numBuffers; int bufferSize = array.getSize()/numBuffers;
reduceFloat4Kernel.setArg<cl::Buffer>(0, array.getDeviceBuffer()); reduceFloat4Kernel.setArg<cl::Buffer>(0, array.getDeviceBuffer());
reduceFloat4Kernel.setArg<cl_int>(1, bufferSize); reduceFloat4Kernel.setArg<cl_int>(1, bufferSize);
...@@ -760,26 +764,28 @@ void OpenCLContext::validateMolecules() { ...@@ -760,26 +764,28 @@ void OpenCLContext::validateMolecules() {
// atoms to their original order, rebuild the list of identical molecules, and sort them // atoms to their original order, rebuild the list of identical molecules, and sort them
// again. // again.
vector<mm_float4> newPosq(numAtoms); vector<mm_float4> oldPosq(paddedNumAtoms);
vector<mm_float4> newVelm(numAtoms); vector<mm_float4> newPosq(paddedNumAtoms);
vector<mm_float4> oldVelm(paddedNumAtoms);
vector<mm_float4> newVelm(paddedNumAtoms);
vector<mm_int4> newCellOffsets(numAtoms); vector<mm_int4> newCellOffsets(numAtoms);
posq->download(); posq->download(oldPosq);
velm->download(); velm->download(oldVelm);
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
int index = atomIndex->get(i); int index = atomIndex[i];
newPosq[index] = posq->get(i); newPosq[index] = oldPosq[i];
newVelm[index] = velm->get(i); newVelm[index] = oldVelm[i];
newCellOffsets[index] = posCellOffsets[i]; newCellOffsets[index] = posCellOffsets[i];
} }
posq->upload(newPosq);
velm->upload(newVelm);
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
posq->set(i, newPosq[i]); atomIndex[i] = i;
velm->set(i, newVelm[i]);
atomIndex->set(i, i);
posCellOffsets[i] = newCellOffsets[i]; posCellOffsets[i] = newCellOffsets[i];
} }
posq->upload(); posq->upload(newPosq);
velm->upload(); velm->upload(newVelm);
atomIndex->upload(); atomIndexDevice->upload(atomIndex);
findMoleculeGroups(); findMoleculeGroups();
for (int i = 0; i < (int) reorderListeners.size(); i++) for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute(); reorderListeners[i]->execute();
...@@ -794,11 +800,13 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) { ...@@ -794,11 +800,13 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
// Find the range of positions and the number of bins along each axis. // Find the range of positions and the number of bins along each axis.
posq->download(); vector<mm_float4> oldPosq(paddedNumAtoms);
velm->download(); vector<mm_float4> oldVelm(paddedNumAtoms);
float minx = posq->get(0).x, maxx = posq->get(0).x; posq->download(oldPosq);
float miny = posq->get(0).y, maxy = posq->get(0).y; velm->download(oldVelm);
float minz = posq->get(0).z, maxz = posq->get(0).z; float minx = oldPosq[0].x, maxx = oldPosq[0].x;
float miny = oldPosq[0].y, maxy = oldPosq[0].y;
float minz = oldPosq[0].z, maxz = oldPosq[0].z;
if (nonbonded->getUsePeriodic()) { if (nonbonded->getUsePeriodic()) {
minx = miny = minz = 0.0; minx = miny = minz = 0.0;
maxx = periodicBoxSize.x; maxx = periodicBoxSize.x;
...@@ -807,7 +815,7 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) { ...@@ -807,7 +815,7 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
} }
else { else {
for (int i = 1; i < numAtoms; i++) { for (int i = 1; i < numAtoms; i++) {
const mm_float4& pos = posq->get(i); const mm_float4& pos = oldPosq[i];
minx = min(minx, pos.x); minx = min(minx, pos.x);
maxx = max(maxx, pos.x); maxx = max(maxx, pos.x);
miny = min(miny, pos.y); miny = min(miny, pos.y);
...@@ -820,8 +828,8 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) { ...@@ -820,8 +828,8 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
// Loop over each group of identical molecules and reorder them. // Loop over each group of identical molecules and reorder them.
vector<int> originalIndex(numAtoms); vector<int> originalIndex(numAtoms);
vector<mm_float4> newPosq(numAtoms); vector<mm_float4> newPosq(paddedNumAtoms);
vector<mm_float4> newVelm(numAtoms); vector<mm_float4> newVelm(paddedNumAtoms);
vector<mm_int4> newCellOffsets(numAtoms); vector<mm_int4> newCellOffsets(numAtoms);
for (int group = 0; group < (int) moleculeGroups.size(); group++) { for (int group = 0; group < (int) moleculeGroups.size(); group++) {
// Find the center of each molecule. // Find the center of each molecule.
...@@ -837,7 +845,7 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) { ...@@ -837,7 +845,7 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
molPos[i].z = 0.0f; molPos[i].z = 0.0f;
for (int j = 0; j < (int)atoms.size(); j++) { for (int j = 0; j < (int)atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i]; int atom = atoms[j]+mol.offsets[i];
const mm_float4& pos = posq->get(atom); const mm_float4& pos = oldPosq[atom];
molPos[i].x += pos.x; molPos[i].x += pos.x;
molPos[i].y += pos.y; molPos[i].y += pos.y;
molPos[i].z += pos.z; molPos[i].z += pos.z;
...@@ -863,11 +871,11 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) { ...@@ -863,11 +871,11 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
if (enforcePeriodic) { if (enforcePeriodic) {
for (int j = 0; j < (int) atoms.size(); j++) { for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.offsets[i]; int atom = atoms[j]+mol.offsets[i];
mm_float4 p = posq->get(atom); mm_float4 p = oldPosq[atom];
p.x -= dx; p.x -= dx;
p.y -= dy; p.y -= dy;
p.z -= dz; p.z -= dz;
posq->set(atom, p); oldPosq[atom] = p;
posCellOffsets[atom].x -= xcell; posCellOffsets[atom].x -= xcell;
posCellOffsets[atom].y -= ycell; posCellOffsets[atom].y -= ycell;
posCellOffsets[atom].z -= zcell; posCellOffsets[atom].z -= zcell;
...@@ -918,9 +926,9 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) { ...@@ -918,9 +926,9 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
for (int j = 0; j < (int)atoms.size(); j++) { for (int j = 0; j < (int)atoms.size(); j++) {
int oldIndex = mol.offsets[molBins[i].second]+atoms[j]; int oldIndex = mol.offsets[molBins[i].second]+atoms[j];
int newIndex = mol.offsets[i]+atoms[j]; int newIndex = mol.offsets[i]+atoms[j];
originalIndex[newIndex] = atomIndex->get(oldIndex); originalIndex[newIndex] = atomIndex[oldIndex];
newPosq[newIndex] = posq->get(oldIndex); newPosq[newIndex] = oldPosq[oldIndex];
newVelm[newIndex] = velm->get(oldIndex); newVelm[newIndex] = oldVelm[oldIndex];
newCellOffsets[newIndex] = posCellOffsets[oldIndex]; newCellOffsets[newIndex] = posCellOffsets[oldIndex];
} }
} }
...@@ -929,14 +937,12 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) { ...@@ -929,14 +937,12 @@ void OpenCLContext::reorderAtoms(bool enforcePeriodic) {
// Update the streams. // Update the streams.
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
posq->set(i, newPosq[i]); atomIndex[i] = originalIndex[i];
velm->set(i, newVelm[i]);
atomIndex->set(i, originalIndex[i]);
posCellOffsets[i] = newCellOffsets[i]; posCellOffsets[i] = newCellOffsets[i];
} }
posq->upload(); posq->upload(newPosq);
velm->upload(); velm->upload(newVelm);
atomIndex->upload(); atomIndexDevice->upload(atomIndex);
for (int i = 0; i < (int) reorderListeners.size(); i++) for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute(); reorderListeners[i]->execute();
} }
......
...@@ -42,7 +42,6 @@ ...@@ -42,7 +42,6 @@
namespace OpenMM { namespace OpenMM {
template <class T>
class OpenCLArray; class OpenCLArray;
class OpenCLForceInfo; class OpenCLForceInfo;
class OpenCLIntegrationUtilities; class OpenCLIntegrationUtilities;
...@@ -196,44 +195,57 @@ public: ...@@ -196,44 +195,57 @@ public:
/** /**
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom. * Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
*/ */
OpenCLArray<mm_float4>& getPosq() { OpenCLArray& getPosq() {
return *posq; return *posq;
} }
/** /**
* Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom. * Get the array which contains the velocity (the xyz components) and inverse mass (the w component) of each atom.
*/ */
OpenCLArray<mm_float4>& getVelm() { OpenCLArray& getVelm() {
return *velm; return *velm;
} }
/** /**
* Get the array which contains the force on each atom. * Get the array which contains the force on each atom.
*/ */
OpenCLArray<mm_float4>& getForce() { OpenCLArray& getForce() {
return *force; return *force;
} }
/** /**
* Get the array which contains the buffers in which forces are computed. * Get the array which contains the buffers in which forces are computed.
*/ */
OpenCLArray<mm_float4>& getForceBuffers() { OpenCLArray& getForceBuffers() {
return *forceBuffers; return *forceBuffers;
} }
/** /**
* Get the array which contains a contribution to each force represented as 64 bit fixed point. * Get the array which contains a contribution to each force represented as 64 bit fixed point.
*/ */
OpenCLArray<cl_long>& getLongForceBuffer() { OpenCLArray& getLongForceBuffer() {
return *longForceBuffer; return *longForceBuffer;
} }
/** /**
* Get the array which contains the buffer in which energy is computed. * Get the array which contains the buffer in which energy is computed.
*/ */
OpenCLArray<cl_float>& getEnergyBuffer() { OpenCLArray& getEnergyBuffer() {
return *energyBuffer; return *energyBuffer;
} }
/**
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
* This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
*/
void* getPinnedBuffer() {
return pinnedMemory;
}
/**
* Get the host-side vector which contains the index of each atom.
*/
const std::vector<int>& getAtomIndex() const {
return atomIndex;
}
/** /**
* Get the array which contains the index of each atom. * Get the array which contains the index of each atom.
*/ */
OpenCLArray<cl_int>& getAtomIndex() { OpenCLArray& getAtomIndexArray() {
return *atomIndex; return *atomIndexDevice;
} }
/** /**
* Get the number of cells by which the positions are offset. * Get the number of cells by which the positions are offset.
...@@ -277,11 +289,7 @@ public: ...@@ -277,11 +289,7 @@ public:
/** /**
* Set all elements of an array to 0. * Set all elements of an array to 0.
*/ */
void clearBuffer(OpenCLArray<float>& array); void clearBuffer(OpenCLArray& array);
/**
* Set all elements of an array to 0.
*/
void clearBuffer(OpenCLArray<mm_float4>& array);
/** /**
* Set all elements of an array to 0. * Set all elements of an array to 0.
* *
...@@ -307,7 +315,7 @@ public: ...@@ -307,7 +315,7 @@ public:
* @param array the array containing the buffers to reduce * @param array the array containing the buffers to reduce
* @param numBuffers the number of buffers packed into the array * @param numBuffers the number of buffers packed into the array
*/ */
void reduceBuffer(OpenCLArray<mm_float4>& array, int numBuffers); void reduceBuffer(OpenCLArray& array, int numBuffers);
/** /**
* Sum the buffesr containing forces. * Sum the buffesr containing forces.
*/ */
...@@ -527,13 +535,16 @@ private: ...@@ -527,13 +535,16 @@ private:
std::vector<Molecule> molecules; std::vector<Molecule> molecules;
std::vector<MoleculeGroup> moleculeGroups; std::vector<MoleculeGroup> moleculeGroups;
std::vector<mm_int4> posCellOffsets; std::vector<mm_int4> posCellOffsets;
OpenCLArray<mm_float4>* posq; cl::Buffer* pinnedBuffer;
OpenCLArray<mm_float4>* velm; void* pinnedMemory;
OpenCLArray<mm_float4>* force; OpenCLArray* posq;
OpenCLArray<mm_float4>* forceBuffers; OpenCLArray* velm;
OpenCLArray<cl_long>* longForceBuffer; OpenCLArray* force;
OpenCLArray<cl_float>* energyBuffer; OpenCLArray* forceBuffers;
OpenCLArray<cl_int>* atomIndex; OpenCLArray* longForceBuffer;
OpenCLArray* energyBuffer;
OpenCLArray* atomIndexDevice;
std::vector<int> atomIndex;
std::vector<cl::Memory*> autoclearBuffers; std::vector<cl::Memory*> autoclearBuffers;
std::vector<int> autoclearBufferSizes; std::vector<int> autoclearBufferSizes;
std::vector<ReorderListener*> reorderListeners; std::vector<ReorderListener*> reorderListeners;
......
...@@ -41,7 +41,7 @@ OpenCLFFT3D::OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize ...@@ -41,7 +41,7 @@ OpenCLFFT3D::OpenCLFFT3D(OpenCLContext& context, int xsize, int ysize, int zsize
ykernel = createKernel(zsize, xsize, ysize); ykernel = createKernel(zsize, xsize, ysize);
} }
void OpenCLFFT3D::execFFT(OpenCLArray<mm_float2>& in, OpenCLArray<mm_float2>& out, bool forward) { void OpenCLFFT3D::execFFT(OpenCLArray& in, OpenCLArray& out, bool forward) {
int maxSize = xkernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()); int maxSize = xkernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice());
if (context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU) if (context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU)
maxSize = 1; maxSize = 1;
......
...@@ -72,7 +72,7 @@ public: ...@@ -72,7 +72,7 @@ public:
* @param out on exit, this contains the transformed data * @param out on exit, this contains the transformed data
* @param forward true to perform a forward transform, false to perform an inverse transform * @param forward true to perform a forward transform, false to perform an inverse transform
*/ */
void execFFT(OpenCLArray<mm_float2>& in, OpenCLArray<mm_float2>& out, bool forward = true); void execFFT(OpenCLArray& in, OpenCLArray& out, bool forward = true);
/** /**
* Get the smallest legal size for a dimension of the grid (that is, a size with no prime * Get the smallest legal size for a dimension of the grid (that is, a size with no prime
* factors other than 2, 3, and 5). * factors other than 2, 3, and 5).
......
...@@ -96,12 +96,12 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -96,12 +96,12 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), hasInitializedPosConstraintKernels(false), hasInitializedVelConstraintKernels(false) { vsiteOutOfPlaneAtoms(NULL), vsiteOutOfPlaneWeights(NULL), hasInitializedPosConstraintKernels(false), hasInitializedVelConstraintKernels(false) {
// Create workspace arrays. // Create workspace arrays.
posDelta = new OpenCLArray<mm_float4>(context, context.getPaddedNumAtoms(), "posDelta"); posDelta = OpenCLArray::create<mm_float4>(context, context.getPaddedNumAtoms(), "posDelta");
vector<mm_float4> deltas(posDelta->getSize(), mm_float4(0.0, 0.0, 0.0, 0.0)); vector<mm_float4> deltas(posDelta->getSize(), mm_float4(0.0, 0.0, 0.0, 0.0));
posDelta->upload(deltas); posDelta->upload(deltas);
stepSize = new OpenCLArray<mm_float2>(context, 1, "stepSize", true); stepSize = OpenCLArray::create<mm_float2>(context, 1, "stepSize");
stepSize->set(0, mm_float2(0.0f, 0.0f)); vector<mm_float2> step(1, mm_float2(0.0f, 0.0f));
stepSize->upload(); stepSize->upload(step);
// Create kernels for enforcing constraints. // Create kernels for enforcing constraints.
...@@ -192,8 +192,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -192,8 +192,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
isShakeAtom[atom2] = true; isShakeAtom[atom2] = true;
isShakeAtom[atom3] = true; isShakeAtom[atom3] = true;
} }
settleAtoms = new OpenCLArray<mm_int4>(context, atoms.size(), "settleAtoms"); settleAtoms = OpenCLArray::create<mm_int4>(context, atoms.size(), "settleAtoms");
settleParams = new OpenCLArray<mm_float2>(context, params.size(), "settleParams"); settleParams = OpenCLArray::create<mm_float2>(context, params.size(), "settleParams");
settleAtoms->upload(atoms); settleAtoms->upload(atoms);
settleParams->upload(params); settleParams->upload(params);
} }
...@@ -274,8 +274,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -274,8 +274,8 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
isShakeAtom[cluster.peripheralID[2]] = true; isShakeAtom[cluster.peripheralID[2]] = true;
++index; ++index;
} }
shakeAtoms = new OpenCLArray<mm_int4>(context, atoms.size(), "shakeAtoms"); shakeAtoms = OpenCLArray::create<mm_int4>(context, atoms.size(), "shakeAtoms");
shakeParams = new OpenCLArray<mm_float4>(context, params.size(), "shakeParams"); shakeParams = OpenCLArray::create<mm_float4>(context, params.size(), "shakeParams");
shakeAtoms->upload(atoms); shakeAtoms->upload(atoms);
shakeParams->upload(params); shakeParams->upload(params);
} }
...@@ -457,18 +457,18 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -457,18 +457,18 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
// Record the CCMA data structures. // Record the CCMA data structures.
ccmaAtoms = new OpenCLArray<mm_int2>(context, numCCMA, "CcmaAtoms"); ccmaAtoms = OpenCLArray::create<mm_int2>(context, numCCMA, "CcmaAtoms");
ccmaDistance = new OpenCLArray<mm_float4>(context, numCCMA, "CcmaDistance"); ccmaDistance = OpenCLArray::create<mm_float4>(context, numCCMA, "CcmaDistance");
ccmaAtomConstraints = new OpenCLArray<cl_int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints"); ccmaAtomConstraints = OpenCLArray::create<cl_int>(context, numAtoms*maxAtomConstraints, "CcmaAtomConstraints");
ccmaNumAtomConstraints = new OpenCLArray<cl_int>(context, numAtoms, "CcmaAtomConstraintsIndex"); ccmaNumAtomConstraints = OpenCLArray::create<cl_int>(context, numAtoms, "CcmaAtomConstraintsIndex");
ccmaDelta1 = new OpenCLArray<cl_float>(context, numCCMA, "CcmaDelta1"); ccmaDelta1 = OpenCLArray::create<cl_float>(context, numCCMA, "CcmaDelta1");
ccmaDelta2 = new OpenCLArray<cl_float>(context, numCCMA, "CcmaDelta2"); ccmaDelta2 = OpenCLArray::create<cl_float>(context, numCCMA, "CcmaDelta2");
ccmaConverged = new OpenCLArray<cl_int>(context, 2, "CcmaConverged"); ccmaConverged = OpenCLArray::create<cl_int>(context, 2, "CcmaConverged");
ccmaConvergedBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, 2*sizeof(cl_int)); ccmaConvergedBuffer = new cl::Buffer(context.getContext(), CL_MEM_ALLOC_HOST_PTR, 2*sizeof(cl_int));
ccmaConvergedMemory = (cl_int*) context.getQueue().enqueueMapBuffer(*ccmaConvergedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, 2*sizeof(cl_int)); ccmaConvergedMemory = (cl_int*) context.getQueue().enqueueMapBuffer(*ccmaConvergedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, 2*sizeof(cl_int));
ccmaReducedMass = new OpenCLArray<cl_float>(context, numCCMA, "CcmaReducedMass"); ccmaReducedMass = OpenCLArray::create<cl_float>(context, numCCMA, "CcmaReducedMass");
ccmaConstraintMatrixColumn = new OpenCLArray<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn"); ccmaConstraintMatrixColumn = OpenCLArray::create<cl_int>(context, numCCMA*maxRowElements, "ConstraintMatrixColumn");
ccmaConstraintMatrixValue = new OpenCLArray<cl_float>(context, numCCMA*maxRowElements, "ConstraintMatrixValue"); ccmaConstraintMatrixValue = OpenCLArray::create<cl_float>(context, numCCMA*maxRowElements, "ConstraintMatrixValue");
vector<mm_int2> atomsVec(ccmaAtoms->getSize()); vector<mm_int2> atomsVec(ccmaAtoms->getSize());
vector<mm_float4> distanceVec(ccmaDistance->getSize()); vector<mm_float4> distanceVec(ccmaDistance->getSize());
vector<cl_int> atomConstraintsVec(ccmaAtomConstraints->getSize()); vector<cl_int> atomConstraintsVec(ccmaAtomConstraints->getSize());
...@@ -556,12 +556,12 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c ...@@ -556,12 +556,12 @@ OpenCLIntegrationUtilities::OpenCLIntegrationUtilities(OpenCLContext& context, c
int num2Avg = vsite2AvgAtomVec.size(); int num2Avg = vsite2AvgAtomVec.size();
int num3Avg = vsite3AvgAtomVec.size(); int num3Avg = vsite3AvgAtomVec.size();
int numOutOfPlane = vsiteOutOfPlaneAtomVec.size(); int numOutOfPlane = vsiteOutOfPlaneAtomVec.size();
vsite2AvgAtoms = new OpenCLArray<mm_int4>(context, max(1, num2Avg), "vsite2AvgAtoms"); vsite2AvgAtoms = OpenCLArray::create<mm_int4>(context, max(1, num2Avg), "vsite2AvgAtoms");
vsite2AvgWeights = new OpenCLArray<mm_float2>(context, max(1, num2Avg), "vsite2AvgWeights"); vsite2AvgWeights = OpenCLArray::create<mm_float2>(context, max(1, num2Avg), "vsite2AvgWeights");
vsite3AvgAtoms = new OpenCLArray<mm_int4>(context, max(1, num3Avg), "vsite3AvgAtoms"); vsite3AvgAtoms = OpenCLArray::create<mm_int4>(context, max(1, num3Avg), "vsite3AvgAtoms");
vsite3AvgWeights = new OpenCLArray<mm_float4>(context, max(1, num3Avg), "vsite3AvgWeights"); vsite3AvgWeights = OpenCLArray::create<mm_float4>(context, max(1, num3Avg), "vsite3AvgWeights");
vsiteOutOfPlaneAtoms = new OpenCLArray<mm_int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms"); vsiteOutOfPlaneAtoms = OpenCLArray::create<mm_int4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneAtoms");
vsiteOutOfPlaneWeights = new OpenCLArray<mm_float4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights"); vsiteOutOfPlaneWeights = OpenCLArray::create<mm_float4>(context, max(1, numOutOfPlane), "vsiteOutOfPlaneWeights");
if (num2Avg > 0) { if (num2Avg > 0) {
vsite2AvgAtoms->upload(vsite2AvgAtomVec); vsite2AvgAtoms->upload(vsite2AvgAtomVec);
vsite2AvgWeights->upload(vsite2AvgWeightVec); vsite2AvgWeights->upload(vsite2AvgWeightVec);
...@@ -779,8 +779,8 @@ void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNu ...@@ -779,8 +779,8 @@ void OpenCLIntegrationUtilities::initRandomNumberGenerator(unsigned int randomNu
// Create the random number arrays. // Create the random number arrays.
lastSeed = randomNumberSeed; lastSeed = randomNumberSeed;
random = new OpenCLArray<mm_float4>(context, 32*context.getPaddedNumAtoms(), "random"); random = OpenCLArray::create<mm_float4>(context, 32*context.getPaddedNumAtoms(), "random");
randomSeed = new OpenCLArray<mm_int4>(context, context.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed"); randomSeed = OpenCLArray::create<mm_int4>(context, context.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed");
randomPos = random->getSize(); randomPos = random->getSize();
// Use a quick and dirty RNG to pick seeds for the real random number generator. // Use a quick and dirty RNG to pick seeds for the real random number generator.
...@@ -809,7 +809,7 @@ int OpenCLIntegrationUtilities::prepareRandomNumbers(int numValues) { ...@@ -809,7 +809,7 @@ int OpenCLIntegrationUtilities::prepareRandomNumbers(int numValues) {
} }
if (numValues > random->getSize()) { if (numValues > random->getSize()) {
delete random; delete random;
random = new OpenCLArray<mm_float4>(context, numValues, "random"); random = OpenCLArray::create<mm_float4>(context, numValues, "random");
} }
randomKernel.setArg<cl_int>(0, random->getSize()); randomKernel.setArg<cl_int>(0, random->getSize());
randomKernel.setArg<cl::Buffer>(1, random->getDeviceBuffer()); randomKernel.setArg<cl::Buffer>(1, random->getDeviceBuffer());
......
...@@ -46,20 +46,20 @@ public: ...@@ -46,20 +46,20 @@ public:
/** /**
* Get the array which contains position deltas. * Get the array which contains position deltas.
*/ */
OpenCLArray<mm_float4>& getPosDelta() { OpenCLArray& getPosDelta() {
return *posDelta; return *posDelta;
} }
/** /**
* Get the array which contains random values. Each element is a float4, whose components * Get the array which contains random values. Each element is a float4, whose components
* are independent, normally distributed random numbers with mean 0 and variance 1. * are independent, normally distributed random numbers with mean 0 and variance 1.
*/ */
OpenCLArray<mm_float4>& getRandom() { OpenCLArray& getRandom() {
return *random; return *random;
} }
/** /**
* Get the array which contains the current step size. * Get the array which contains the current step size.
*/ */
OpenCLArray<mm_float2>& getStepSize() { OpenCLArray& getStepSize() {
return *stepSize; return *stepSize;
} }
/** /**
...@@ -116,32 +116,32 @@ private: ...@@ -116,32 +116,32 @@ private:
cl::Kernel ccmaPosUpdateKernel, ccmaVelUpdateKernel; cl::Kernel ccmaPosUpdateKernel, ccmaVelUpdateKernel;
cl::Kernel vsitePositionKernel, vsiteForceKernel; cl::Kernel vsitePositionKernel, vsiteForceKernel;
cl::Kernel randomKernel; cl::Kernel randomKernel;
OpenCLArray<mm_float4>* posDelta; OpenCLArray* posDelta;
OpenCLArray<mm_int4>* settleAtoms; OpenCLArray* settleAtoms;
OpenCLArray<mm_float2>* settleParams; OpenCLArray* settleParams;
OpenCLArray<mm_int4>* shakeAtoms; OpenCLArray* shakeAtoms;
OpenCLArray<mm_float4>* shakeParams; OpenCLArray* shakeParams;
OpenCLArray<mm_float4>* random; OpenCLArray* random;
OpenCLArray<mm_int4>* randomSeed; OpenCLArray* randomSeed;
OpenCLArray<mm_float2>* stepSize; OpenCLArray* stepSize;
OpenCLArray<mm_int2>* ccmaAtoms; OpenCLArray* ccmaAtoms;
OpenCLArray<mm_float4>* ccmaDistance; OpenCLArray* ccmaDistance;
OpenCLArray<cl_float>* ccmaReducedMass; OpenCLArray* ccmaReducedMass;
OpenCLArray<cl_int>* ccmaAtomConstraints; OpenCLArray* ccmaAtomConstraints;
OpenCLArray<cl_int>* ccmaNumAtomConstraints; OpenCLArray* ccmaNumAtomConstraints;
OpenCLArray<cl_int>* ccmaConstraintMatrixColumn; OpenCLArray* ccmaConstraintMatrixColumn;
OpenCLArray<cl_float>* ccmaConstraintMatrixValue; OpenCLArray* ccmaConstraintMatrixValue;
OpenCLArray<cl_float>* ccmaDelta1; OpenCLArray* ccmaDelta1;
OpenCLArray<cl_float>* ccmaDelta2; OpenCLArray* ccmaDelta2;
OpenCLArray<cl_int>* ccmaConverged; OpenCLArray* ccmaConverged;
cl::Buffer* ccmaConvergedBuffer; cl::Buffer* ccmaConvergedBuffer;
cl_int* ccmaConvergedMemory; cl_int* ccmaConvergedMemory;
OpenCLArray<mm_int4>* vsite2AvgAtoms; OpenCLArray* vsite2AvgAtoms;
OpenCLArray<mm_float2>* vsite2AvgWeights; OpenCLArray* vsite2AvgWeights;
OpenCLArray<mm_int4>* vsite3AvgAtoms; OpenCLArray* vsite3AvgAtoms;
OpenCLArray<mm_float4>* vsite3AvgWeights; OpenCLArray* vsite3AvgWeights;
OpenCLArray<mm_int4>* vsiteOutOfPlaneAtoms; OpenCLArray* vsiteOutOfPlaneAtoms;
OpenCLArray<mm_float4>* vsiteOutOfPlaneWeights; OpenCLArray* vsiteOutOfPlaneWeights;
int randomPos; int randomPos;
int lastSeed, numVsites; int lastSeed, numVsites;
bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels; bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels;
......
...@@ -116,9 +116,10 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, ...@@ -116,9 +116,10 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
cl.getIntegrationUtilities().distributeForcesFromVirtualSites(); cl.getIntegrationUtilities().distributeForcesFromVirtualSites();
double sum = 0.0f; double sum = 0.0f;
if (includeEnergy) { if (includeEnergy) {
OpenCLArray<cl_float>& energy = cl.getEnergyBuffer(); OpenCLArray& energyArray = cl.getEnergyBuffer();
energy.download(); cl_float* energy = (cl_float*) cl.getPinnedBuffer();
for (int i = 0; i < energy.getSize(); i++) energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
sum += energy[i]; sum += energy[i];
} }
return sum; return sum;
...@@ -138,9 +139,9 @@ void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) { ...@@ -138,9 +139,9 @@ void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
} }
void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) { void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
OpenCLArray<mm_float4>& posq = cl.getPosq(); mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
posq.download(); cl.getPosq().download(posq);
OpenCLArray<cl_int>& order = cl.getAtomIndex(); const vector<cl_int>& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles); positions.resize(numParticles);
mm_float4 periodicBoxSize = cl.getPeriodicBoxSize(); mm_float4 periodicBoxSize = cl.getPeriodicBoxSize();
...@@ -152,8 +153,9 @@ void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3 ...@@ -152,8 +153,9 @@ void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3
} }
void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<Vec3>& positions) { void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<Vec3>& positions) {
OpenCLArray<mm_float4>& posq = cl.getPosq(); mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
OpenCLArray<cl_int>& order = cl.getAtomIndex(); cl.getPosq().download(posq);
const vector<cl_int>& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
mm_float4& pos = posq[i]; mm_float4& pos = posq[i];
...@@ -164,15 +166,15 @@ void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const vecto ...@@ -164,15 +166,15 @@ void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const vecto
} }
for (int i = numParticles; i < cl.getPaddedNumAtoms(); i++) for (int i = numParticles; i < cl.getPaddedNumAtoms(); i++)
posq[i] = mm_float4(0.0f, 0.0f, 0.0f, 0.0f); posq[i] = mm_float4(0.0f, 0.0f, 0.0f, 0.0f);
posq.upload(); cl.getPosq().upload(posq);
for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++) for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++)
cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0); cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0);
} }
void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) { void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) {
OpenCLArray<mm_float4>& velm = cl.getVelm(); mm_float4* velm = (mm_float4*) cl.getPinnedBuffer();
velm.download(); cl.getVelm().download(velm);
OpenCLArray<cl_int>& order = cl.getAtomIndex(); const vector<cl_int>& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
velocities.resize(numParticles); velocities.resize(numParticles);
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
...@@ -182,8 +184,9 @@ void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec ...@@ -182,8 +184,9 @@ void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec
} }
void OpenCLUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector<Vec3>& velocities) { void OpenCLUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector<Vec3>& velocities) {
OpenCLArray<mm_float4>& velm = cl.getVelm(); mm_float4* velm = (mm_float4*) cl.getPinnedBuffer();
OpenCLArray<cl_int>& order = cl.getAtomIndex(); cl.getVelm().download(velm);
const vector<cl_int>& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
mm_float4& vel = velm[i]; mm_float4& vel = velm[i];
...@@ -194,13 +197,13 @@ void OpenCLUpdateStateDataKernel::setVelocities(ContextImpl& context, const vect ...@@ -194,13 +197,13 @@ void OpenCLUpdateStateDataKernel::setVelocities(ContextImpl& context, const vect
} }
for (int i = numParticles; i < cl.getPaddedNumAtoms(); i++) for (int i = numParticles; i < cl.getPaddedNumAtoms(); i++)
velm[i] = mm_float4(0.0f, 0.0f, 0.0f, 0.0f); velm[i] = mm_float4(0.0f, 0.0f, 0.0f, 0.0f);
velm.upload(); cl.getVelm().upload(velm);
} }
void OpenCLUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& forces) { void OpenCLUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& forces) {
OpenCLArray<mm_float4>& force = cl.getForce(); mm_float4* force = (mm_float4*) cl.getPinnedBuffer();
force.download(); cl.getForce().download(force);
OpenCLArray<cl_int>& order = cl.getAtomIndex(); const vector<cl_int>& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
forces.resize(numParticles); forces.resize(numParticles);
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
...@@ -231,11 +234,12 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream ...@@ -231,11 +234,12 @@ void OpenCLUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream
stream.write((char*) &stepCount, sizeof(int)); stream.write((char*) &stepCount, sizeof(int));
int computeForceCount = cl.getComputeForceCount(); int computeForceCount = cl.getComputeForceCount();
stream.write((char*) &computeForceCount, sizeof(int)); stream.write((char*) &computeForceCount, sizeof(int));
cl.getPosq().download(); char* buffer = (char*) cl.getPinnedBuffer();
stream.write((char*) &cl.getPosq()[0], sizeof(mm_float4)*cl.getPosq().getSize()); cl.getPosq().download((mm_float4*) buffer);
cl.getVelm().download(); stream.write(buffer, sizeof(mm_float4)*cl.getPosq().getSize());
stream.write((char*) &cl.getVelm()[0], sizeof(mm_float4)*cl.getVelm().getSize()); cl.getVelm().download((mm_float4*) buffer);
stream.write((char*) &cl.getAtomIndex()[0], sizeof(cl_int)*cl.getAtomIndex().getSize()); stream.write(buffer, sizeof(mm_float4)*cl.getVelm().getSize());
stream.write((char*) &cl.getAtomIndex()[0], sizeof(cl_int)*cl.getAtomIndex().size());
stream.write((char*) &cl.getPosCellOffsets()[0], sizeof(mm_int4)*cl.getPosCellOffsets().size()); stream.write((char*) &cl.getPosCellOffsets()[0], sizeof(mm_int4)*cl.getPosCellOffsets().size());
mm_float4 box = cl.getPeriodicBoxSize(); mm_float4 box = cl.getPeriodicBoxSize();
stream.write((char*) &box, sizeof(mm_float4)); stream.write((char*) &box, sizeof(mm_float4));
...@@ -259,12 +263,13 @@ void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& ...@@ -259,12 +263,13 @@ void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream&
contexts[i]->setStepCount(stepCount); contexts[i]->setStepCount(stepCount);
contexts[i]->setComputeForceCount(computeForceCount); contexts[i]->setComputeForceCount(computeForceCount);
} }
stream.read((char*) &cl.getPosq()[0], sizeof(mm_float4)*cl.getPosq().getSize()); char* buffer = (char*) cl.getPinnedBuffer();
cl.getPosq().upload(); stream.read(buffer, sizeof(mm_float4)*cl.getPosq().getSize());
stream.read((char*) &cl.getVelm()[0], sizeof(mm_float4)*cl.getVelm().getSize()); cl.getPosq().upload(buffer);
cl.getVelm().upload(); stream.read(buffer, sizeof(mm_float4)*cl.getVelm().getSize());
stream.read((char*) &cl.getAtomIndex()[0], sizeof(cl_int)*cl.getAtomIndex().getSize()); cl.getVelm().upload(buffer);
cl.getAtomIndex().upload(); stream.read((char*) &cl.getAtomIndex()[0], sizeof(cl_int)*cl.getAtomIndex().size());
cl.getAtomIndexArray().upload(cl.getAtomIndex());
stream.read((char*) &cl.getPosCellOffsets()[0], sizeof(mm_int4)*cl.getPosCellOffsets().size()); stream.read((char*) &cl.getPosCellOffsets()[0], sizeof(mm_int4)*cl.getPosCellOffsets().size());
mm_float4 box; mm_float4 box;
stream.read((char*) &box, sizeof(mm_float4)); stream.read((char*) &box, sizeof(mm_float4));
...@@ -342,7 +347,7 @@ void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const H ...@@ -342,7 +347,7 @@ void OpenCLCalcHarmonicBondForceKernel::initialize(const System& system, const H
if (numBonds == 0) if (numBonds == 0)
return; return;
vector<vector<int> > atoms(numBonds, vector<int>(2)); vector<vector<int> > atoms(numBonds, vector<int>(2));
params = new OpenCLArray<mm_float2>(cl, numBonds, "bondParams"); params = OpenCLArray::create<mm_float2>(cl, numBonds, "bondParams");
vector<mm_float2> paramVector(numBonds); vector<mm_float2> paramVector(numBonds);
for (int i = 0; i < numBonds; i++) { for (int i = 0; i < numBonds; i++) {
double length, k; double length, k;
...@@ -463,7 +468,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus ...@@ -463,7 +468,7 @@ void OpenCLCalcCustomBondForceKernel::initialize(const System& system, const Cus
variables[name] = "bondParams"+params->getParameterSuffix(i); variables[name] = "bondParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = new OpenCLArray<cl_float>(cl, force.getNumGlobalParameters(), "customBondGlobals", false, CL_MEM_READ_ONLY); globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customBondGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues); globals->upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float"); string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
...@@ -565,7 +570,7 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const ...@@ -565,7 +570,7 @@ void OpenCLCalcHarmonicAngleForceKernel::initialize(const System& system, const
if (numAngles == 0) if (numAngles == 0)
return; return;
vector<vector<int> > atoms(numAngles, vector<int>(3)); vector<vector<int> > atoms(numAngles, vector<int>(3));
params = new OpenCLArray<mm_float2>(cl, numAngles, "angleParams"); params = OpenCLArray::create<mm_float2>(cl, numAngles, "angleParams");
vector<mm_float2> paramVector(numAngles); vector<mm_float2> paramVector(numAngles);
for (int i = 0; i < numAngles; i++) { for (int i = 0; i < numAngles; i++) {
double angle, k; double angle, k;
...@@ -688,7 +693,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu ...@@ -688,7 +693,7 @@ void OpenCLCalcCustomAngleForceKernel::initialize(const System& system, const Cu
variables[name] = "angleParams"+params->getParameterSuffix(i); variables[name] = "angleParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = new OpenCLArray<cl_float>(cl, force.getNumGlobalParameters(), "customAngleGlobals", false, CL_MEM_READ_ONLY); globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customAngleGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues); globals->upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float"); string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
...@@ -791,7 +796,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons ...@@ -791,7 +796,7 @@ void OpenCLCalcPeriodicTorsionForceKernel::initialize(const System& system, cons
if (numTorsions == 0) if (numTorsions == 0)
return; return;
vector<vector<int> > atoms(numTorsions, vector<int>(4)); vector<vector<int> > atoms(numTorsions, vector<int>(4));
params = new OpenCLArray<mm_float4>(cl, numTorsions, "periodicTorsionParams"); params = OpenCLArray::create<mm_float4>(cl, numTorsions, "periodicTorsionParams");
vector<mm_float4> paramVector(numTorsions); vector<mm_float4> paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) { for (int i = 0; i < numTorsions; i++) {
int periodicity; int periodicity;
...@@ -875,7 +880,7 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo ...@@ -875,7 +880,7 @@ void OpenCLCalcRBTorsionForceKernel::initialize(const System& system, const RBTo
if (numTorsions == 0) if (numTorsions == 0)
return; return;
vector<vector<int> > atoms(numTorsions, vector<int>(4)); vector<vector<int> > atoms(numTorsions, vector<int>(4));
params = new OpenCLArray<mm_float8>(cl, numTorsions, "rbTorsionParams"); params = OpenCLArray::create<mm_float8>(cl, numTorsions, "rbTorsionParams");
vector<mm_float8> paramVector(numTorsions); vector<mm_float8> paramVector(numTorsions);
for (int i = 0; i < numTorsions; i++) { for (int i = 0; i < numTorsions; i++) {
double c0, c1, c2, c3, c4, c5; double c0, c1, c2, c3, c4, c5;
...@@ -987,9 +992,9 @@ void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CM ...@@ -987,9 +992,9 @@ void OpenCLCalcCMAPTorsionForceKernel::initialize(const System& system, const CM
vector<cl_int> torsionMapsVec(numTorsions); vector<cl_int> torsionMapsVec(numTorsions);
for (int i = 0; i < numTorsions; i++) for (int i = 0; i < numTorsions; i++)
force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]); force.getTorsionParameters(startIndex+i, torsionMapsVec[i], atoms[i][0], atoms[i][1], atoms[i][2], atoms[i][3], atoms[i][4], atoms[i][5], atoms[i][6], atoms[i][7]);
coefficients = new OpenCLArray<mm_float4>(cl, coeffVec.size(), "cmapTorsionCoefficients"); coefficients = OpenCLArray::create<mm_float4>(cl, coeffVec.size(), "cmapTorsionCoefficients");
mapPositions = new OpenCLArray<mm_int2>(cl, numMaps, "cmapTorsionMapPositions"); mapPositions = OpenCLArray::create<mm_int2>(cl, numMaps, "cmapTorsionMapPositions");
torsionMaps = new OpenCLArray<cl_int>(cl, numTorsions, "cmapTorsionMaps"); torsionMaps = OpenCLArray::create<cl_int>(cl, numTorsions, "cmapTorsionMaps");
coefficients->upload(coeffVec); coefficients->upload(coeffVec);
mapPositions->upload(mapPositionsVec); mapPositions->upload(mapPositionsVec);
torsionMaps->upload(torsionMapsVec); torsionMaps->upload(torsionMapsVec);
...@@ -1086,7 +1091,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const ...@@ -1086,7 +1091,7 @@ void OpenCLCalcCustomTorsionForceKernel::initialize(const System& system, const
variables[name] = "torsionParams"+params->getParameterSuffix(i); variables[name] = "torsionParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = new OpenCLArray<cl_float>(cl, force.getNumGlobalParameters(), "customTorsionGlobals", false, CL_MEM_READ_ONLY); globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customTorsionGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues); globals->upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float"); string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
...@@ -1229,8 +1234,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1229,8 +1234,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Initialize nonbonded interactions. // Initialize nonbonded interactions.
int numParticles = force.getNumParticles(); int numParticles = force.getNumParticles();
sigmaEpsilon = new OpenCLArray<mm_float2>(cl, cl.getPaddedNumAtoms(), "sigmaEpsilon"); sigmaEpsilon = OpenCLArray::create<mm_float2>(cl, cl.getPaddedNumAtoms(), "sigmaEpsilon");
OpenCLArray<mm_float4>& posq = cl.getPosq(); vector<mm_float4> posq(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_float2> sigmaEpsilonVector(cl.getPaddedNumAtoms()); vector<mm_float2> sigmaEpsilonVector(cl.getPaddedNumAtoms());
vector<vector<int> > exclusionList(numParticles); vector<vector<int> > exclusionList(numParticles);
double sumSquaredCharges = 0.0; double sumSquaredCharges = 0.0;
...@@ -1252,7 +1257,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1252,7 +1257,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
exclusionList[exclusions[i].first].push_back(exclusions[i].second); exclusionList[exclusions[i].first].push_back(exclusions[i].second);
exclusionList[exclusions[i].second].push_back(exclusions[i].first); exclusionList[exclusions[i].second].push_back(exclusions[i].first);
} }
posq.upload(); cl.getPosq().upload(posq);
sigmaEpsilon->upload(sigmaEpsilonVector); sigmaEpsilon->upload(sigmaEpsilonVector);
bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff); bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic); bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic);
...@@ -1293,7 +1298,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1293,7 +1298,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
cl::Program program = cl.createProgram(OpenCLKernelSources::ewald, replacements); cl::Program program = cl.createProgram(OpenCLKernelSources::ewald, replacements);
ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums"); ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums");
ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces"); ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces");
cosSinSums = new OpenCLArray<mm_float2>(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), "cosSinSums"); cosSinSums = OpenCLArray::create<mm_float2>(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), "cosSinSums");
} }
else if (force.getNonbondedMethod() == NonbondedForce::PME) { else if (force.getNonbondedMethod() == NonbondedForce::PME) {
// Compute the PME parameters. // Compute the PME parameters.
...@@ -1317,18 +1322,18 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1317,18 +1322,18 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Create required data structures. // Create required data structures.
pmeGrid = new OpenCLArray<mm_float2>(cl, gridSizeX*gridSizeY*gridSizeZ, "pmeGrid"); pmeGrid = OpenCLArray::create<mm_float2>(cl, gridSizeX*gridSizeY*gridSizeZ, "pmeGrid");
cl.addAutoclearBuffer(pmeGrid->getDeviceBuffer(), pmeGrid->getSize()*2); cl.addAutoclearBuffer(pmeGrid->getDeviceBuffer(), pmeGrid->getSize()*2);
pmeGrid2 = new OpenCLArray<mm_float2>(cl, gridSizeX*gridSizeY*gridSizeZ, "pmeGrid2"); pmeGrid2 = OpenCLArray::create<mm_float2>(cl, gridSizeX*gridSizeY*gridSizeZ, "pmeGrid2");
pmeBsplineModuliX = new OpenCLArray<cl_float>(cl, gridSizeX, "pmeBsplineModuliX"); pmeBsplineModuliX = OpenCLArray::create<cl_float>(cl, gridSizeX, "pmeBsplineModuliX");
pmeBsplineModuliY = new OpenCLArray<cl_float>(cl, gridSizeY, "pmeBsplineModuliY"); pmeBsplineModuliY = OpenCLArray::create<cl_float>(cl, gridSizeY, "pmeBsplineModuliY");
pmeBsplineModuliZ = new OpenCLArray<cl_float>(cl, gridSizeZ, "pmeBsplineModuliZ"); pmeBsplineModuliZ = OpenCLArray::create<cl_float>(cl, gridSizeZ, "pmeBsplineModuliZ");
pmeBsplineTheta = new OpenCLArray<mm_float4>(cl, PmeOrder*numParticles, "pmeBsplineTheta"); pmeBsplineTheta = OpenCLArray::create<mm_float4>(cl, PmeOrder*numParticles, "pmeBsplineTheta");
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU); bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
if (deviceIsCpu) if (deviceIsCpu)
pmeBsplineDTheta = new OpenCLArray<mm_float4>(cl, PmeOrder*numParticles, "pmeBsplineDTheta"); pmeBsplineDTheta = OpenCLArray::create<mm_float4>(cl, PmeOrder*numParticles, "pmeBsplineDTheta");
pmeAtomRange = new OpenCLArray<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange"); pmeAtomRange = OpenCLArray::create<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = new OpenCLArray<mm_int2>(cl, numParticles, "pmeAtomGridIndex"); pmeAtomGridIndex = OpenCLArray::create<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
sort = new OpenCLSort<SortTrait>(cl, cl.getNumAtoms()); sort = new OpenCLSort<SortTrait>(cl, cl.getNumAtoms());
fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ); fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ);
...@@ -1411,7 +1416,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1411,7 +1416,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if (numExceptions > 0) { if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions); exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2)); vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams = new OpenCLArray<mm_float4>(cl, numExceptions, "exceptionParams"); exceptionParams = OpenCLArray::create<mm_float4>(cl, numExceptions, "exceptionParams");
vector<mm_float4> exceptionParamsVector(numExceptions); vector<mm_float4> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) { for (int i = 0; i < numExceptions; i++) {
double chargeProd, sigma, epsilon; double chargeProd, sigma, epsilon;
...@@ -1577,20 +1582,21 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex ...@@ -1577,20 +1582,21 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
// Record the per-particle parameters. // Record the per-particle parameters.
OpenCLArray<mm_float4>& posq = cl.getPosq(); OpenCLArray& posq = cl.getPosq();
posq.download(); posq.download((mm_float4*) cl.getPinnedBuffer());
mm_float4* posqf = (mm_float4*) cl.getPinnedBuffer();
vector<mm_float2> sigmaEpsilonVector(cl.getPaddedNumAtoms()); vector<mm_float2> sigmaEpsilonVector(cl.getPaddedNumAtoms());
double sumSquaredCharges = 0.0; double sumSquaredCharges = 0.0;
OpenCLArray<cl_int>& order = cl.getAtomIndex(); const vector<cl_int>& order = cl.getAtomIndex();
for (int i = 0; i < force.getNumParticles(); i++) { for (int i = 0; i < force.getNumParticles(); i++) {
int index = order[i]; int index = order[i];
double charge, sigma, epsilon; double charge, sigma, epsilon;
force.getParticleParameters(index, charge, sigma, epsilon); force.getParticleParameters(index, charge, sigma, epsilon);
posq[i].w = (float) charge; posqf[i].w = (float) charge;
sigmaEpsilonVector[index] = mm_float2((float) (0.5*sigma), (float) (2.0*sqrt(epsilon))); sigmaEpsilonVector[index] = mm_float2((float) (0.5*sigma), (float) (2.0*sqrt(epsilon)));
sumSquaredCharges += charge*charge; sumSquaredCharges += charge*charge;
} }
posq.upload(); posq.upload(cl.getPinnedBuffer());
sigmaEpsilon->upload(sigmaEpsilonVector); sigmaEpsilon->upload(sigmaEpsilonVector);
// Record the exceptions. // Record the exceptions.
...@@ -1669,7 +1675,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons ...@@ -1669,7 +1675,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
int numParticles = force.getNumParticles(); int numParticles = force.getNumParticles();
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters"); params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customNonbondedParameters");
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
globals = new OpenCLArray<cl_float>(cl, force.getNumGlobalParameters(), "customNonbondedGlobals", false, CL_MEM_READ_ONLY); globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customNonbondedGlobals", CL_MEM_READ_ONLY);
vector<vector<cl_float> > paramVector(numParticles); vector<vector<cl_float> > paramVector(numParticles);
vector<vector<int> > exclusionList(numParticles); vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
...@@ -1704,12 +1710,12 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons ...@@ -1704,12 +1710,12 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
functions[name] = &fp; functions[name] = &fp;
tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2); tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max); vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max);
tabulatedFunctions.push_back(new OpenCLArray<mm_float4>(cl, values.size()-1, "TabulatedFunction")); tabulatedFunctions.push_back(OpenCLArray::create<mm_float4>(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer())); cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer()));
} }
if (force.getNumFunctions() > 0) { if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = new OpenCLArray<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY); tabulatedFunctionParams = OpenCLArray::create<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", CL_MEM_READ_ONLY);
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec); tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(cl_float4), tabulatedFunctionParams->getDeviceBuffer())); cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(cl_float4), tabulatedFunctionParams->getDeviceBuffer()));
} }
...@@ -1838,25 +1844,25 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB ...@@ -1838,25 +1844,25 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
if (cl.getPlatformData().contexts.size() > 1) if (cl.getPlatformData().contexts.size() > 1)
throw OpenMMException("GBSAOBCForce does not support using multiple OpenCL devices"); throw OpenMMException("GBSAOBCForce does not support using multiple OpenCL devices");
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities(); OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
params = new OpenCLArray<mm_float2>(cl, cl.getPaddedNumAtoms(), "gbsaObcParams"); params = OpenCLArray::create<mm_float2>(cl, cl.getPaddedNumAtoms(), "gbsaObcParams");
bornRadii = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms(), "bornRadii"); bornRadii = OpenCLArray::create<cl_float>(cl, cl.getPaddedNumAtoms(), "bornRadii");
obcChain = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms(), "obcChain"); obcChain = OpenCLArray::create<cl_float>(cl, cl.getPaddedNumAtoms(), "obcChain");
if (cl.getSupports64BitGlobalAtomics()) { if (cl.getSupports64BitGlobalAtomics()) {
longBornSum = new OpenCLArray<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornSum"); longBornSum = OpenCLArray::create<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornSum");
longBornForce = new OpenCLArray<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornForce"); longBornForce = OpenCLArray::create<cl_long>(cl, cl.getPaddedNumAtoms(), "longBornForce");
bornForce = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms(), "bornForce"); bornForce = OpenCLArray::create<cl_float>(cl, cl.getPaddedNumAtoms(), "bornForce");
cl.addAutoclearBuffer(longBornSum->getDeviceBuffer(), 2*longBornSum->getSize()); cl.addAutoclearBuffer(longBornSum->getDeviceBuffer(), 2*longBornSum->getSize());
cl.addAutoclearBuffer(longBornForce->getDeviceBuffer(), 2*longBornForce->getSize()); cl.addAutoclearBuffer(longBornForce->getDeviceBuffer(), 2*longBornForce->getSize());
} }
else { else {
bornSum = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornSum"); bornSum = OpenCLArray::create<cl_float>(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornSum");
bornForce = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornForce"); bornForce = OpenCLArray::create<cl_float>(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornForce");
cl.addAutoclearBuffer(bornSum->getDeviceBuffer(), bornSum->getSize()); cl.addAutoclearBuffer(bornSum->getDeviceBuffer(), bornSum->getSize());
cl.addAutoclearBuffer(bornForce->getDeviceBuffer(), bornForce->getSize()); cl.addAutoclearBuffer(bornForce->getDeviceBuffer(), bornForce->getSize());
} }
OpenCLArray<mm_float4>& posq = cl.getPosq(); vector<mm_float4> posq(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
int numParticles = force.getNumParticles(); int numParticles = force.getNumParticles();
vector<mm_float2> paramsVector(numParticles); vector<mm_float2> paramsVector(cl.getPaddedNumAtoms());
const double dielectricOffset = 0.009; const double dielectricOffset = 0.009;
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
double charge, radius, scalingFactor; double charge, radius, scalingFactor;
...@@ -1865,7 +1871,7 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB ...@@ -1865,7 +1871,7 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius)); paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
posq[i].w = (float) charge; posq[i].w = (float) charge;
} }
posq.upload(); cl.getPosq().upload(posq);
params->upload(paramsVector); params->upload(paramsVector);
prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric())); prefactor = -ONE_4PI_EPS0*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff); bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
...@@ -2006,18 +2012,19 @@ void OpenCLCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -2006,18 +2012,19 @@ void OpenCLCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context,
// Record the per-particle parameters. // Record the per-particle parameters.
OpenCLArray<mm_float4>& posq = cl.getPosq(); OpenCLArray& posq = cl.getPosq();
posq.download(); posq.download((mm_float4*) cl.getPinnedBuffer());
vector<mm_float2> paramsVector(numParticles); mm_float4* posqf = (mm_float4*) cl.getPinnedBuffer();
vector<mm_float2> paramsVector(cl.getPaddedNumAtoms());
const double dielectricOffset = 0.009; const double dielectricOffset = 0.009;
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
double charge, radius, scalingFactor; double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor); force.getParticleParameters(i, charge, radius, scalingFactor);
radius -= dielectricOffset; radius -= dielectricOffset;
paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius)); paramsVector[i] = mm_float2((float) radius, (float) (scalingFactor*radius));
posq[i].w = (float) charge; posqf[i].w = (float) charge;
} }
posq.upload(); posq.upload(cl.getPinnedBuffer());
params->upload(paramsVector); params->upload(paramsVector);
// Mark that the current reordering may be invalid. // Mark that the current reordering may be invalid.
...@@ -2107,7 +2114,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -2107,7 +2114,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customGBParameters", true); params = new OpenCLParameterSet(cl, force.getNumPerParticleParameters(), numParticles, "customGBParameters", true);
computedValues = new OpenCLParameterSet(cl, force.getNumComputedValues(), numParticles, "customGBComputedValues", true); computedValues = new OpenCLParameterSet(cl, force.getNumComputedValues(), numParticles, "customGBComputedValues", true);
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
globals = new OpenCLArray<cl_float>(cl, force.getNumGlobalParameters(), "customGBGlobals", false, CL_MEM_READ_ONLY); globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customGBGlobals", CL_MEM_READ_ONLY);
vector<vector<cl_float> > paramVector(numParticles); vector<vector<cl_float> > paramVector(numParticles);
vector<vector<int> > exclusionList(numParticles); vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
...@@ -2143,13 +2150,13 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -2143,13 +2150,13 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
functions[name] = &fp; functions[name] = &fp;
tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2); tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max); vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max);
tabulatedFunctions.push_back(new OpenCLArray<mm_float4>(cl, values.size()-1, "TabulatedFunction")); tabulatedFunctions.push_back(OpenCLArray::create<mm_float4>(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer())); cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float", 4, sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer()));
tableArgs << ", __global const float4* restrict " << arrayName; tableArgs << ", __global const float4* restrict " << arrayName;
} }
if (force.getNumFunctions() > 0) { if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = new OpenCLArray<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY); tabulatedFunctionParams = OpenCLArray::create<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", CL_MEM_READ_ONLY);
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec); tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(cl_float4), tabulatedFunctionParams->getDeviceBuffer())); cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float", 4, sizeof(cl_float4), tabulatedFunctionParams->getDeviceBuffer()));
tableArgs << ", __global const float4* " << prefix << "functionParams"; tableArgs << ", __global const float4* " << prefix << "functionParams";
...@@ -2207,7 +2214,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -2207,7 +2214,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU); bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
bool useLong = (cl.getSupports64BitGlobalAtomics() && !deviceIsCpu); bool useLong = (cl.getSupports64BitGlobalAtomics() && !deviceIsCpu);
if (useLong) { if (useLong) {
longEnergyDerivs = new OpenCLArray<cl_long>(cl, force.getNumComputedValues()*cl.getPaddedNumAtoms(), "customGBLongEnergyDerivatives"); longEnergyDerivs = OpenCLArray::create<cl_long>(cl, force.getNumComputedValues()*cl.getPaddedNumAtoms(), "customGBLongEnergyDerivatives");
energyDerivs = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivatives", true); energyDerivs = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms(), "customGBEnergyDerivatives", true);
} }
else else
...@@ -2748,12 +2755,12 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -2748,12 +2755,12 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0); maxTiles = (nb.getUseCutoff() ? nb.getInteractingTiles().getSize() : 0);
bool useLong = (cl.getSupports64BitGlobalAtomics() && !deviceIsCpu); bool useLong = (cl.getSupports64BitGlobalAtomics() && !deviceIsCpu);
if (useLong) { if (useLong) {
longValueBuffers = new OpenCLArray<cl_long>(cl, cl.getPaddedNumAtoms(), "customGBLongValueBuffers"); longValueBuffers = OpenCLArray::create<cl_long>(cl, cl.getPaddedNumAtoms(), "customGBLongValueBuffers");
cl.addAutoclearBuffer(longValueBuffers->getDeviceBuffer(), 2*longValueBuffers->getSize()); cl.addAutoclearBuffer(longValueBuffers->getDeviceBuffer(), 2*longValueBuffers->getSize());
cl.clearBuffer(longValueBuffers->getDeviceBuffer(), 2*longValueBuffers->getSize()); cl.clearBuffer(longValueBuffers->getDeviceBuffer(), 2*longValueBuffers->getSize());
} }
else { else {
valueBuffers = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), "customGBValueBuffers"); valueBuffers = OpenCLArray::create<cl_float>(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), "customGBValueBuffers");
cl.addAutoclearBuffer(valueBuffers->getDeviceBuffer(), valueBuffers->getSize()); cl.addAutoclearBuffer(valueBuffers->getDeviceBuffer(), valueBuffers->getSize());
cl.clearBuffer(*valueBuffers); cl.clearBuffer(*valueBuffers);
} }
...@@ -3045,7 +3052,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const ...@@ -3045,7 +3052,7 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
variables[name] = "particleParams"+params->getParameterSuffix(i); variables[name] = "particleParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = new OpenCLArray<cl_float>(cl, force.getNumGlobalParameters(), "customExternalGlobals", false, CL_MEM_READ_ONLY); globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customExternalGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues); globals->upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float"); string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
...@@ -3232,12 +3239,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -3232,12 +3239,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
if (numDonors == 0 || numAcceptors == 0) if (numDonors == 0 || numAcceptors == 0)
return; return;
int numParticles = system.getNumParticles(); int numParticles = system.getNumParticles();
donors = new OpenCLArray<mm_int4>(cl, numDonors, "customHbondDonors"); donors = OpenCLArray::create<mm_int4>(cl, numDonors, "customHbondDonors");
acceptors = new OpenCLArray<mm_int4>(cl, numAcceptors, "customHbondAcceptors"); acceptors = OpenCLArray::create<mm_int4>(cl, numAcceptors, "customHbondAcceptors");
donorParams = new OpenCLParameterSet(cl, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters"); donorParams = new OpenCLParameterSet(cl, force.getNumPerDonorParameters(), numDonors, "customHbondDonorParameters");
acceptorParams = new OpenCLParameterSet(cl, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters"); acceptorParams = new OpenCLParameterSet(cl, force.getNumPerAcceptorParameters(), numAcceptors, "customHbondAcceptorParameters");
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
globals = new OpenCLArray<cl_float>(cl, force.getNumGlobalParameters(), "customHbondGlobals", false, CL_MEM_READ_ONLY); globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customHbondGlobals", CL_MEM_READ_ONLY);
vector<vector<cl_float> > donorParamVector(numDonors); vector<vector<cl_float> > donorParamVector(numDonors);
vector<mm_int4> donorVector(numDonors); vector<mm_int4> donorVector(numDonors);
for (int i = 0; i < numDonors; i++) { for (int i = 0; i < numDonors; i++) {
...@@ -3263,8 +3270,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -3263,8 +3270,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
// Select an output buffer index for each donor and acceptor. // Select an output buffer index for each donor and acceptor.
donorBufferIndices = new OpenCLArray<mm_int4>(cl, numDonors, "customHbondDonorBuffers"); donorBufferIndices = OpenCLArray::create<mm_int4>(cl, numDonors, "customHbondDonorBuffers");
acceptorBufferIndices = new OpenCLArray<mm_int4>(cl, numAcceptors, "customHbondAcceptorBuffers"); acceptorBufferIndices = OpenCLArray::create<mm_int4>(cl, numAcceptors, "customHbondAcceptorBuffers");
vector<mm_int4> donorBufferVector(numDonors); vector<mm_int4> donorBufferVector(numDonors);
vector<mm_int4> acceptorBufferVector(numAcceptors); vector<mm_int4> acceptorBufferVector(numAcceptors);
vector<int> donorBufferCounter(numParticles, 0); vector<int> donorBufferCounter(numParticles, 0);
...@@ -3317,8 +3324,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -3317,8 +3324,8 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
else else
throw OpenMMException("CustomHbondForce: OpenCLPlatform does not support more than four exclusions per acceptor"); throw OpenMMException("CustomHbondForce: OpenCLPlatform does not support more than four exclusions per acceptor");
} }
donorExclusions = new OpenCLArray<mm_int4>(cl, numDonors, "customHbondDonorExclusions"); donorExclusions = OpenCLArray::create<mm_int4>(cl, numDonors, "customHbondDonorExclusions");
acceptorExclusions = new OpenCLArray<mm_int4>(cl, numAcceptors, "customHbondAcceptorExclusions"); acceptorExclusions = OpenCLArray::create<mm_int4>(cl, numAcceptors, "customHbondAcceptorExclusions");
donorExclusions->upload(donorExclusionVector); donorExclusions->upload(donorExclusionVector);
acceptorExclusions->upload(acceptorExclusionVector); acceptorExclusions->upload(acceptorExclusionVector);
...@@ -3339,12 +3346,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu ...@@ -3339,12 +3346,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
functions[name] = &fp; functions[name] = &fp;
tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2); tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max); vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max);
tabulatedFunctions.push_back(new OpenCLArray<mm_float4>(cl, values.size()-1, "TabulatedFunction")); tabulatedFunctions.push_back(OpenCLArray::create<mm_float4>(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f); tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
tableArgs << ", __global const float4* restrict " << arrayName; tableArgs << ", __global const float4* restrict " << arrayName;
} }
if (force.getNumFunctions() > 0) { if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = new OpenCLArray<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY); tabulatedFunctionParams = OpenCLArray::create<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", CL_MEM_READ_ONLY);
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec); tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
tableArgs << ", __global const float4* restrict functionParams"; tableArgs << ", __global const float4* restrict functionParams";
} }
...@@ -3728,7 +3735,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c ...@@ -3728,7 +3735,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
functions[name] = &fp; functions[name] = &fp;
tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2); tabulatedFunctionParamsVec[i] = mm_float4((float) min, (float) max, (float) ((values.size()-1)/(max-min)), (float) values.size()-2);
vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max); vector<mm_float4> f = OpenCLExpressionUtilities::computeFunctionCoefficients(values, min, max);
OpenCLArray<mm_float4>* array = new OpenCLArray<mm_float4>(cl, values.size()-1, "TabulatedFunction"); OpenCLArray* array = OpenCLArray::create<mm_float4>(cl, values.size()-1, "TabulatedFunction");
tabulatedFunctions.push_back(array); tabulatedFunctions.push_back(array);
array->upload(f); array->upload(f);
string arrayName = cl.getBondedUtilities().addArgument(array->getDeviceBuffer(), "float4"); string arrayName = cl.getBondedUtilities().addArgument(array->getDeviceBuffer(), "float4");
...@@ -3736,7 +3743,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c ...@@ -3736,7 +3743,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
} }
string functionParamsName; string functionParamsName;
if (force.getNumFunctions() > 0) { if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = new OpenCLArray<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY); tabulatedFunctionParams = OpenCLArray::create<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", CL_MEM_READ_ONLY);
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec); tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
functionParamsName = cl.getBondedUtilities().addArgument(tabulatedFunctionParams->getDeviceBuffer(), "float4"); functionParamsName = cl.getBondedUtilities().addArgument(tabulatedFunctionParams->getDeviceBuffer(), "float4");
} }
...@@ -3761,7 +3768,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c ...@@ -3761,7 +3768,7 @@ void OpenCLCalcCustomCompoundBondForceKernel::initialize(const System& system, c
variables[name] = "bondParams"+params->getParameterSuffix(i); variables[name] = "bondParams"+params->getParameterSuffix(i);
} }
if (force.getNumGlobalParameters() > 0) { if (force.getNumGlobalParameters() > 0) {
globals = new OpenCLArray<cl_float>(cl, force.getNumGlobalParameters(), "customCompoundBondGlobals", false, CL_MEM_READ_ONLY); globals = OpenCLArray::create<cl_float>(cl, force.getNumGlobalParameters(), "customCompoundBondGlobals", CL_MEM_READ_ONLY);
globals->upload(globalParamValues); globals->upload(globalParamValues);
string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float"); string argName = cl.getBondedUtilities().addArgument(globals->getDeviceBuffer(), "float");
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
...@@ -4042,7 +4049,7 @@ void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const L ...@@ -4042,7 +4049,7 @@ void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const L
cl::Program program = cl.createProgram(OpenCLKernelSources::langevin, defines, ""); cl::Program program = cl.createProgram(OpenCLKernelSources::langevin, defines, "");
kernel1 = cl::Kernel(program, "integrateLangevinPart1"); kernel1 = cl::Kernel(program, "integrateLangevinPart1");
kernel2 = cl::Kernel(program, "integrateLangevinPart2"); kernel2 = cl::Kernel(program, "integrateLangevinPart2");
params = new OpenCLArray<cl_float>(cl, 3, "langevinParams"); params = OpenCLArray::create<cl_float>(cl, 3, "langevinParams");
prevStepSize = -1.0; prevStepSize = -1.0;
} }
...@@ -4078,8 +4085,8 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang ...@@ -4078,8 +4085,8 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
p[1] = (cl_float) fscale; p[1] = (cl_float) fscale;
p[2] = (cl_float) noisescale; p[2] = (cl_float) noisescale;
params->upload(p); params->upload(p);
integration.getStepSize()[0].y = (cl_float) stepSize; mm_float2 ss = mm_float2(0, (float) stepSize);
integration.getStepSize().upload(); integration.getStepSize().upload(&ss);
prevTemp = temperature; prevTemp = temperature;
prevFriction = friction; prevFriction = friction;
prevStepSize = stepSize; prevStepSize = stepSize;
...@@ -4222,8 +4229,9 @@ double OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, co ...@@ -4222,8 +4229,9 @@ double OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, co
// Update the time and step count. // Update the time and step count.
cl.getIntegrationUtilities().getStepSize().download(); mm_float2 stepSize;
double dt = cl.getIntegrationUtilities().getStepSize()[0].y; cl.getIntegrationUtilities().getStepSize().download(&stepSize);
double dt = stepSize.y;
double time = cl.getTime()+dt; double time = cl.getTime()+dt;
if (dt == maxStepSize) if (dt == maxStepSize)
time = maxTime; // Avoid round-off error time = maxTime; // Avoid round-off error
...@@ -4247,7 +4255,7 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system, ...@@ -4247,7 +4255,7 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system,
kernel1 = cl::Kernel(program, "integrateLangevinPart1"); kernel1 = cl::Kernel(program, "integrateLangevinPart1");
kernel2 = cl::Kernel(program, "integrateLangevinPart2"); kernel2 = cl::Kernel(program, "integrateLangevinPart2");
selectSizeKernel = cl::Kernel(program, "selectLangevinStepSize"); selectSizeKernel = cl::Kernel(program, "selectLangevinStepSize");
params = new OpenCLArray<cl_float>(cl, 3, "langevinParams"); params = OpenCLArray::create<cl_float>(cl, 3, "langevinParams");
blockSize = min(256, system.getNumParticles()); blockSize = min(256, system.getNumParticles());
blockSize = max(blockSize, params->getSize()); blockSize = max(blockSize, params->getSize());
blockSize = min(blockSize, (int) cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>()); blockSize = min(blockSize, (int) cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
...@@ -4301,8 +4309,9 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, ...@@ -4301,8 +4309,9 @@ double OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context,
// Update the time and step count. // Update the time and step count.
cl.getIntegrationUtilities().getStepSize().download(); mm_float2 stepSize;
double dt = cl.getIntegrationUtilities().getStepSize()[0].y; cl.getIntegrationUtilities().getStepSize().download(&stepSize);
double dt = stepSize.y;
double time = cl.getTime()+dt; double time = cl.getTime()+dt;
if (dt == maxStepSize) if (dt == maxStepSize)
time = maxTime; // Avoid round-off error time = maxTime; // Avoid round-off error
...@@ -4334,7 +4343,7 @@ public: ...@@ -4334,7 +4343,7 @@ public:
swap[3*lastAtomOrder[i]+1] = localPerDofValues[3*i+1]; swap[3*lastAtomOrder[i]+1] = localPerDofValues[3*i+1];
swap[3*lastAtomOrder[i]+2] = localPerDofValues[3*i+2]; swap[3*lastAtomOrder[i]+2] = localPerDofValues[3*i+2];
} }
OpenCLArray<cl_int>& order = cl.getAtomIndex(); const vector<cl_int>& order = cl.getAtomIndex();
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
localPerDofValues[3*i] = swap[3*order[i]]; localPerDofValues[3*i] = swap[3*order[i]];
localPerDofValues[3*i+1] = swap[3*order[i]+1]; localPerDofValues[3*i+1] = swap[3*order[i]+1];
...@@ -4374,9 +4383,9 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus ...@@ -4374,9 +4383,9 @@ void OpenCLIntegrateCustomStepKernel::initialize(const System& system, const Cus
cl.getPlatformData().initializeContexts(system); cl.getPlatformData().initializeContexts(system);
cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed()); cl.getIntegrationUtilities().initRandomNumberGenerator(integrator.getRandomNumberSeed());
numGlobalVariables = integrator.getNumGlobalVariables(); numGlobalVariables = integrator.getNumGlobalVariables();
globalValues = new OpenCLArray<cl_float>(cl, max(1, numGlobalVariables), "globalVariables", true); globalValues = OpenCLArray::create<cl_float>(cl, max(1, numGlobalVariables), "globalVariables");
sumBuffer = new OpenCLArray<cl_float>(cl, 3*system.getNumParticles(), "sumBuffer"); sumBuffer = OpenCLArray::create<cl_float>(cl, 3*system.getNumParticles(), "sumBuffer");
energy = new OpenCLArray<cl_float>(cl, 1, "energy"); energy = OpenCLArray::create<cl_float>(cl, 1, "energy");
perDofValues = new OpenCLParameterSet(cl, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables"); perDofValues = new OpenCLParameterSet(cl, integrator.getNumPerDofVariables(), 3*system.getNumParticles(), "perDofVariables");
cl.addReorderListener(new ReorderListener(cl, *perDofValues, localPerDofValues, deviceValuesAreCurrent)); cl.addReorderListener(new ReorderListener(cl, *perDofValues, localPerDofValues, deviceValuesAreCurrent));
prevStepSize = -1.0; prevStepSize = -1.0;
...@@ -4459,12 +4468,13 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -4459,12 +4468,13 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
// Initialize various data structures. // Initialize various data structures.
const map<string, double>& params = context.getParameters(); const map<string, double>& params = context.getParameters();
contextParameterValues = new OpenCLArray<cl_float>(cl, max(1, (int) params.size()), "contextParameters", true); contextParameterValues = OpenCLArray::create<cl_float>(cl, max(1, (int) params.size()), "contextParameters");
contextValues.resize(contextParameterValues->getSize());
for (map<string, double>::const_iterator iter = params.begin(); iter != params.end(); ++iter) { for (map<string, double>::const_iterator iter = params.begin(); iter != params.end(); ++iter) {
contextParameterValues->set(parameterNames.size(), (float) iter->second); contextValues[parameterNames.size()] = (float) iter->second;
parameterNames.push_back(iter->first); parameterNames.push_back(iter->first);
} }
contextParameterValues->upload(); contextParameterValues->upload(contextValues);
kernels.resize(integrator.getNumComputations()); kernels.resize(integrator.getNumComputations());
requiredGaussian.resize(integrator.getNumComputations(), 0); requiredGaussian.resize(integrator.getNumComputations(), 0);
requiredUniform.resize(integrator.getNumComputations(), 0); requiredUniform.resize(integrator.getNumComputations(), 0);
...@@ -4480,8 +4490,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -4480,8 +4490,8 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
// Initialize the random number generator. // Initialize the random number generator.
uniformRandoms = new OpenCLArray<mm_float4>(cl, cl.getNumAtoms(), "uniformRandoms"); uniformRandoms = OpenCLArray::create<mm_float4>(cl, cl.getNumAtoms(), "uniformRandoms");
randomSeed = new OpenCLArray<mm_int4>(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed"); randomSeed = OpenCLArray::create<mm_int4>(cl, cl.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize, "randomSeed");
vector<mm_int4> seed(randomSeed->getSize()); vector<mm_int4> seed(randomSeed->getSize());
unsigned int r = integrator.getRandomNumberSeed()+1; unsigned int r = integrator.getRandomNumberSeed()+1;
for (int i = 0; i < randomSeed->getSize(); i++) { for (int i = 0; i < randomSeed->getSize(); i++) {
...@@ -4744,20 +4754,20 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -4744,20 +4754,20 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
localValuesAreCurrent = false; localValuesAreCurrent = false;
double stepSize = integrator.getStepSize(); double stepSize = integrator.getStepSize();
if (stepSize != prevStepSize) { if (stepSize != prevStepSize) {
integration.getStepSize()[0].y = (cl_float) stepSize; mm_float2 ss = mm_float2(0, (float) stepSize);
integration.getStepSize().upload(); integration.getStepSize().upload(&ss);
prevStepSize = stepSize; prevStepSize = stepSize;
} }
bool paramsChanged = false; bool paramsChanged = false;
for (int i = 0; i < (int) parameterNames.size(); i++) { for (int i = 0; i < (int) parameterNames.size(); i++) {
float value = (float) context.getParameter(parameterNames[i]); float value = (float) context.getParameter(parameterNames[i]);
if (value != contextParameterValues->get(i)) { if (value != contextValues[i]) {
contextParameterValues->set(i, value); contextValues[i] = value;
paramsChanged = true; paramsChanged = true;
} }
} }
if (paramsChanged) if (paramsChanged)
contextParameterValues->upload(); contextParameterValues->upload(contextValues);
// Loop over computation steps in the integrator and execute them. // Loop over computation steps in the integrator and execute them.
...@@ -4829,25 +4839,33 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr ...@@ -4829,25 +4839,33 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
void OpenCLIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& context) { void OpenCLIntegrateCustomStepKernel::recordChangedParameters(ContextImpl& context) {
if (!modifiesParameters) if (!modifiesParameters)
return; return;
contextParameterValues->download(); contextParameterValues->download(contextValues);
for (int i = 0; i < (int) parameterNames.size(); i++) { for (int i = 0; i < (int) parameterNames.size(); i++) {
float value = (float) context.getParameter(parameterNames[i]); float value = (float) context.getParameter(parameterNames[i]);
if (value != contextParameterValues->get(i)) if (value != contextValues[i])
context.setParameter(parameterNames[i], contextParameterValues->get(i)); context.setParameter(parameterNames[i], contextValues[i]);
} }
} }
void OpenCLIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vector<double>& values) const { void OpenCLIntegrateCustomStepKernel::getGlobalVariables(ContextImpl& context, vector<double>& values) const {
globalValues->download(); if (numGlobalVariables == 0) {
values.resize(0);
return;
}
vector<cl_float> buffer;
globalValues->download(buffer);
values.resize(numGlobalVariables); values.resize(numGlobalVariables);
for (int i = 0; i < numGlobalVariables; i++) for (int i = 0; i < numGlobalVariables; i++)
values[i] = globalValues->get(i); values[i] = buffer[i];
} }
void OpenCLIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) { void OpenCLIntegrateCustomStepKernel::setGlobalVariables(ContextImpl& context, const vector<double>& values) {
if (numGlobalVariables == 0)
return;
vector<cl_float> valuesVec(numGlobalVariables);
for (int i = 0; i < numGlobalVariables; i++) for (int i = 0; i < numGlobalVariables; i++)
globalValues->set(i, (float) values[i]); valuesVec[i] = (float) values[i];
globalValues->upload(); globalValues->upload(valuesVec);
} }
void OpenCLIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int variable, vector<Vec3>& values) const { void OpenCLIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, int variable, vector<Vec3>& values) const {
...@@ -4856,7 +4874,7 @@ void OpenCLIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, in ...@@ -4856,7 +4874,7 @@ void OpenCLIntegrateCustomStepKernel::getPerDofVariable(ContextImpl& context, in
localValuesAreCurrent = true; localValuesAreCurrent = true;
} }
values.resize(perDofValues->getNumObjects()/3); values.resize(perDofValues->getNumObjects()/3);
OpenCLArray<cl_int>& order = cl.getAtomIndex(); const vector<cl_int>& order = cl.getAtomIndex();
for (int i = 0; i < (int) values.size(); i++) for (int i = 0; i < (int) values.size(); i++)
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
values[order[i]][j] = localPerDofValues[3*i+j][variable]; values[order[i]][j] = localPerDofValues[3*i+j][variable];
...@@ -4867,7 +4885,7 @@ void OpenCLIntegrateCustomStepKernel::setPerDofVariable(ContextImpl& context, in ...@@ -4867,7 +4885,7 @@ void OpenCLIntegrateCustomStepKernel::setPerDofVariable(ContextImpl& context, in
perDofValues->getParameterValues(localPerDofValues); perDofValues->getParameterValues(localPerDofValues);
localValuesAreCurrent = true; localValuesAreCurrent = true;
} }
OpenCLArray<cl_int>& order = cl.getAtomIndex(); const vector<cl_int>& order = cl.getAtomIndex();
for (int i = 0; i < (int) values.size(); i++) for (int i = 0; i < (int) values.size(); i++)
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
localPerDofValues[3*i+j][variable] = (float) values[order[i]][j]; localPerDofValues[3*i+j][variable] = (float) values[order[i]][j];
...@@ -4890,7 +4908,7 @@ void OpenCLApplyAndersenThermostatKernel::initialize(const System& system, const ...@@ -4890,7 +4908,7 @@ void OpenCLApplyAndersenThermostatKernel::initialize(const System& system, const
// Create the arrays with the group definitions. // Create the arrays with the group definitions.
vector<vector<int> > groups = AndersenThermostatImpl::calcParticleGroups(system); vector<vector<int> > groups = AndersenThermostatImpl::calcParticleGroups(system);
atomGroups = new OpenCLArray<int>(cl, cl.getNumAtoms(), "atomGroups"); atomGroups = OpenCLArray::create<int>(cl, cl.getNumAtoms(), "atomGroups");
vector<int> atoms(atomGroups->getSize()); vector<int> atoms(atomGroups->getSize());
for (int i = 0; i < (int) groups.size(); i++) { for (int i = 0; i < (int) groups.size(); i++) {
for (int j = 0; j < (int) groups[i].size(); j++) for (int j = 0; j < (int) groups[i].size(); j++)
...@@ -4923,7 +4941,7 @@ OpenCLApplyMonteCarloBarostatKernel::~OpenCLApplyMonteCarloBarostatKernel() { ...@@ -4923,7 +4941,7 @@ OpenCLApplyMonteCarloBarostatKernel::~OpenCLApplyMonteCarloBarostatKernel() {
} }
void OpenCLApplyMonteCarloBarostatKernel::initialize(const System& system, const MonteCarloBarostat& thermostat) { void OpenCLApplyMonteCarloBarostatKernel::initialize(const System& system, const MonteCarloBarostat& thermostat) {
savedPositions = new OpenCLArray<mm_float4>(cl, cl.getPaddedNumAtoms(), "savedPositions"); savedPositions = OpenCLArray::create<mm_float4>(cl, cl.getPaddedNumAtoms(), "savedPositions");
cl::Program program = cl.createProgram(OpenCLKernelSources::monteCarloBarostat); cl::Program program = cl.createProgram(OpenCLKernelSources::monteCarloBarostat);
kernel = cl::Kernel(program, "scalePositions"); kernel = cl::Kernel(program, "scalePositions");
} }
...@@ -4936,8 +4954,8 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, ...@@ -4936,8 +4954,8 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
vector<vector<int> > molecules = context.getMolecules(); vector<vector<int> > molecules = context.getMolecules();
numMolecules = molecules.size(); numMolecules = molecules.size();
moleculeAtoms = new OpenCLArray<int>(cl, cl.getNumAtoms(), "moleculeAtoms"); moleculeAtoms = OpenCLArray::create<int>(cl, cl.getNumAtoms(), "moleculeAtoms");
moleculeStartIndex = new OpenCLArray<int>(cl, numMolecules+1, "moleculeStartIndex"); moleculeStartIndex = OpenCLArray::create<int>(cl, numMolecules+1, "moleculeStartIndex");
vector<int> atoms(moleculeAtoms->getSize()); vector<int> atoms(moleculeAtoms->getSize());
vector<int> startIndex(moleculeStartIndex->getSize()); vector<int> startIndex(moleculeStartIndex->getSize());
int index = 0; int index = 0;
...@@ -4981,10 +4999,10 @@ double OpenCLCalcKineticEnergyKernel::execute(ContextImpl& context) { ...@@ -4981,10 +4999,10 @@ double OpenCLCalcKineticEnergyKernel::execute(ContextImpl& context) {
// We don't currently have a GPU kernel to do this, so we retrieve the velocities and calculate the energy // We don't currently have a GPU kernel to do this, so we retrieve the velocities and calculate the energy
// on the CPU. // on the CPU.
OpenCLArray<mm_float4>& velm = cl.getVelm(); mm_float4* velm = (mm_float4*) cl.getPinnedBuffer();
velm.download(); cl.getVelm().download(velm);
double energy = 0.0; double energy = 0.0;
OpenCLArray<cl_int>& order = cl.getAtomIndex(); const vector<cl_int>& order = cl.getAtomIndex();
for (size_t i = 0; i < masses.size(); ++i) { for (size_t i = 0; i < masses.size(); ++i) {
mm_float4 v = velm[i]; mm_float4 v = velm[i];
energy += masses[order[i]]*(v.x*v.x+v.y*v.y+v.z*v.z); energy += masses[order[i]]*(v.x*v.x+v.y*v.y+v.z*v.z);
...@@ -5000,7 +5018,7 @@ OpenCLRemoveCMMotionKernel::~OpenCLRemoveCMMotionKernel() { ...@@ -5000,7 +5018,7 @@ OpenCLRemoveCMMotionKernel::~OpenCLRemoveCMMotionKernel() {
void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) { void OpenCLRemoveCMMotionKernel::initialize(const System& system, const CMMotionRemover& force) {
frequency = force.getFrequency(); frequency = force.getFrequency();
int numAtoms = cl.getNumAtoms(); int numAtoms = cl.getNumAtoms();
cmMomentum = new OpenCLArray<mm_float4>(cl, (numAtoms+OpenCLContext::ThreadBlockSize-1)/OpenCLContext::ThreadBlockSize, "cmMomentum"); cmMomentum = OpenCLArray::create<mm_float4>(cl, (numAtoms+OpenCLContext::ThreadBlockSize-1)/OpenCLContext::ThreadBlockSize, "cmMomentum");
double totalMass = 0.0; double totalMass = 0.0;
for (int i = 0; i < numAtoms; i++) for (int i = 0; i < numAtoms; i++)
totalMass += system.getParticleMass(i); totalMass += system.getParticleMass(i);
......
...@@ -255,7 +255,7 @@ private: ...@@ -255,7 +255,7 @@ private:
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
System& system; System& system;
OpenCLArray<mm_float2>* params; OpenCLArray* params;
}; };
/** /**
...@@ -296,7 +296,7 @@ private: ...@@ -296,7 +296,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
System& system; System& system;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray<cl_float>* globals; OpenCLArray* globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
}; };
...@@ -338,7 +338,7 @@ private: ...@@ -338,7 +338,7 @@ private:
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
System& system; System& system;
OpenCLArray<mm_float2>* params; OpenCLArray* params;
}; };
/** /**
...@@ -379,7 +379,7 @@ private: ...@@ -379,7 +379,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
System& system; System& system;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray<cl_float>* globals; OpenCLArray* globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
}; };
...@@ -421,7 +421,7 @@ private: ...@@ -421,7 +421,7 @@ private:
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
System& system; System& system;
OpenCLArray<mm_float4>* params; OpenCLArray* params;
}; };
/** /**
...@@ -461,7 +461,7 @@ private: ...@@ -461,7 +461,7 @@ private:
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
System& system; System& system;
OpenCLArray<mm_float8>* params; OpenCLArray* params;
}; };
/** /**
...@@ -494,9 +494,9 @@ private: ...@@ -494,9 +494,9 @@ private:
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLContext& cl; OpenCLContext& cl;
System& system; System& system;
OpenCLArray<mm_float4>* coefficients; OpenCLArray* coefficients;
OpenCLArray<mm_int2>* mapPositions; OpenCLArray* mapPositions;
OpenCLArray<cl_int>* torsionMaps; OpenCLArray* torsionMaps;
}; };
/** /**
...@@ -537,7 +537,7 @@ private: ...@@ -537,7 +537,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
System& system; System& system;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray<cl_float>* globals; OpenCLArray* globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
}; };
...@@ -591,18 +591,18 @@ private: ...@@ -591,18 +591,18 @@ private:
}; };
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernel; bool hasInitializedKernel;
OpenCLArray<mm_float2>* sigmaEpsilon; OpenCLArray* sigmaEpsilon;
OpenCLArray<mm_float4>* exceptionParams; OpenCLArray* exceptionParams;
OpenCLArray<mm_float2>* cosSinSums; OpenCLArray* cosSinSums;
OpenCLArray<mm_float2>* pmeGrid; OpenCLArray* pmeGrid;
OpenCLArray<mm_float2>* pmeGrid2; OpenCLArray* pmeGrid2;
OpenCLArray<cl_float>* pmeBsplineModuliX; OpenCLArray* pmeBsplineModuliX;
OpenCLArray<cl_float>* pmeBsplineModuliY; OpenCLArray* pmeBsplineModuliY;
OpenCLArray<cl_float>* pmeBsplineModuliZ; OpenCLArray* pmeBsplineModuliZ;
OpenCLArray<mm_float4>* pmeBsplineTheta; OpenCLArray* pmeBsplineTheta;
OpenCLArray<mm_float4>* pmeBsplineDTheta; OpenCLArray* pmeBsplineDTheta;
OpenCLArray<cl_int>* pmeAtomRange; OpenCLArray* pmeAtomRange;
OpenCLArray<mm_int2>* pmeAtomGridIndex; OpenCLArray* pmeAtomGridIndex;
OpenCLSort<SortTrait>* sort; OpenCLSort<SortTrait>* sort;
OpenCLFFT3D* fft; OpenCLFFT3D* fft;
cl::Kernel ewaldSumsKernel; cl::Kernel ewaldSumsKernel;
...@@ -658,11 +658,11 @@ public: ...@@ -658,11 +658,11 @@ public:
private: private:
OpenCLContext& cl; OpenCLContext& cl;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray<cl_float>* globals; OpenCLArray* globals;
OpenCLArray<mm_float4>* tabulatedFunctionParams; OpenCLArray* tabulatedFunctionParams;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray<mm_float4>*> tabulatedFunctions; std::vector<OpenCLArray*> tabulatedFunctions;
System& system; System& system;
}; };
...@@ -704,13 +704,13 @@ private: ...@@ -704,13 +704,13 @@ private:
bool hasCreatedKernels; bool hasCreatedKernels;
int maxTiles; int maxTiles;
OpenCLContext& cl; OpenCLContext& cl;
OpenCLArray<mm_float2>* params; OpenCLArray* params;
OpenCLArray<cl_float>* bornSum; OpenCLArray* bornSum;
OpenCLArray<cl_long>* longBornSum; OpenCLArray* longBornSum;
OpenCLArray<cl_float>* bornRadii; OpenCLArray* bornRadii;
OpenCLArray<cl_float>* bornForce; OpenCLArray* bornForce;
OpenCLArray<cl_long>* longBornForce; OpenCLArray* longBornForce;
OpenCLArray<cl_float>* obcChain; OpenCLArray* obcChain;
cl::Kernel computeBornSumKernel; cl::Kernel computeBornSumKernel;
cl::Kernel reduceBornSumKernel; cl::Kernel reduceBornSumKernel;
cl::Kernel force1Kernel; cl::Kernel force1Kernel;
...@@ -757,14 +757,14 @@ private: ...@@ -757,14 +757,14 @@ private:
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLParameterSet* computedValues; OpenCLParameterSet* computedValues;
OpenCLParameterSet* energyDerivs; OpenCLParameterSet* energyDerivs;
OpenCLArray<cl_long>* longEnergyDerivs; OpenCLArray* longEnergyDerivs;
OpenCLArray<cl_float>* globals; OpenCLArray* globals;
OpenCLArray<cl_float>* valueBuffers; OpenCLArray* valueBuffers;
OpenCLArray<cl_long>* longValueBuffers; OpenCLArray* longValueBuffers;
OpenCLArray<mm_float4>* tabulatedFunctionParams; OpenCLArray* tabulatedFunctionParams;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray<mm_float4>*> tabulatedFunctions; std::vector<OpenCLArray*> tabulatedFunctions;
std::vector<bool> pairValueUsesParam, pairEnergyUsesParam, pairEnergyUsesValue; std::vector<bool> pairValueUsesParam, pairEnergyUsesParam, pairEnergyUsesValue;
System& system; System& system;
cl::Kernel pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel; cl::Kernel pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel, gradientChainRuleKernel;
...@@ -808,7 +808,7 @@ private: ...@@ -808,7 +808,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
System& system; System& system;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray<cl_float>* globals; OpenCLArray* globals;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
}; };
...@@ -853,17 +853,17 @@ private: ...@@ -853,17 +853,17 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
OpenCLParameterSet* donorParams; OpenCLParameterSet* donorParams;
OpenCLParameterSet* acceptorParams; OpenCLParameterSet* acceptorParams;
OpenCLArray<cl_float>* globals; OpenCLArray* globals;
OpenCLArray<mm_int4>* donors; OpenCLArray* donors;
OpenCLArray<mm_int4>* acceptors; OpenCLArray* acceptors;
OpenCLArray<mm_int4>* donorBufferIndices; OpenCLArray* donorBufferIndices;
OpenCLArray<mm_int4>* acceptorBufferIndices; OpenCLArray* acceptorBufferIndices;
OpenCLArray<mm_int4>* donorExclusions; OpenCLArray* donorExclusions;
OpenCLArray<mm_int4>* acceptorExclusions; OpenCLArray* acceptorExclusions;
OpenCLArray<mm_float4>* tabulatedFunctionParams; OpenCLArray* tabulatedFunctionParams;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray<mm_float4>*> tabulatedFunctions; std::vector<OpenCLArray*> tabulatedFunctions;
System& system; System& system;
cl::Kernel donorKernel, acceptorKernel; cl::Kernel donorKernel, acceptorKernel;
}; };
...@@ -905,11 +905,11 @@ private: ...@@ -905,11 +905,11 @@ private:
int numBonds; int numBonds;
OpenCLContext& cl; OpenCLContext& cl;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray<cl_float>* globals; OpenCLArray* globals;
OpenCLArray<mm_float4>* tabulatedFunctionParams; OpenCLArray* tabulatedFunctionParams;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray<mm_float4>*> tabulatedFunctions; std::vector<OpenCLArray*> tabulatedFunctions;
System& system; System& system;
}; };
...@@ -970,7 +970,7 @@ private: ...@@ -970,7 +970,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
double prevTemp, prevFriction, prevStepSize; double prevTemp, prevFriction, prevStepSize;
bool hasInitializedKernels; bool hasInitializedKernels;
OpenCLArray<cl_float>* params; OpenCLArray* params;
cl::Kernel kernel1, kernel2; cl::Kernel kernel1, kernel2;
}; };
...@@ -1065,7 +1065,7 @@ private: ...@@ -1065,7 +1065,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
int blockSize; int blockSize;
OpenCLArray<cl_float>* params; OpenCLArray* params;
cl::Kernel kernel1, kernel2, selectSizeKernel; cl::Kernel kernel1, kernel2, selectSizeKernel;
double prevTemp, prevFriction, prevErrorTol; double prevTemp, prevFriction, prevErrorTol;
}; };
...@@ -1138,14 +1138,15 @@ private: ...@@ -1138,14 +1138,15 @@ private:
int numGlobalVariables; int numGlobalVariables;
bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters; bool hasInitializedKernels, deviceValuesAreCurrent, modifiesParameters;
mutable bool localValuesAreCurrent; mutable bool localValuesAreCurrent;
OpenCLArray<cl_float>* globalValues; OpenCLArray* globalValues;
OpenCLArray<cl_float>* contextParameterValues; OpenCLArray* contextParameterValues;
OpenCLArray<cl_float>* sumBuffer; OpenCLArray* sumBuffer;
OpenCLArray<cl_float>* energy; OpenCLArray* energy;
OpenCLArray<mm_float4>* uniformRandoms; OpenCLArray* uniformRandoms;
OpenCLArray<mm_int4>* randomSeed; OpenCLArray* randomSeed;
OpenCLParameterSet* perDofValues; OpenCLParameterSet* perDofValues;
mutable std::vector<std::vector<cl_float> > localPerDofValues; mutable std::vector<std::vector<cl_float> > localPerDofValues;
std::vector<float> contextValues;
std::vector<std::vector<cl::Kernel> > kernels; std::vector<std::vector<cl::Kernel> > kernels;
cl::Kernel sumEnergyKernel, randomKernel; cl::Kernel sumEnergyKernel, randomKernel;
std::vector<CustomIntegrator::ComputationType> stepType; std::vector<CustomIntegrator::ComputationType> stepType;
...@@ -1185,7 +1186,7 @@ private: ...@@ -1185,7 +1186,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
int randomSeed; int randomSeed;
OpenCLArray<cl_int>* atomGroups; OpenCLArray* atomGroups;
cl::Kernel kernel; cl::Kernel kernel;
}; };
...@@ -1226,9 +1227,9 @@ private: ...@@ -1226,9 +1227,9 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
int numMolecules; int numMolecules;
OpenCLArray<mm_float4>* savedPositions; OpenCLArray* savedPositions;
OpenCLArray<cl_int>* moleculeAtoms; OpenCLArray* moleculeAtoms;
OpenCLArray<cl_int>* moleculeStartIndex; OpenCLArray* moleculeStartIndex;
cl::Kernel kernel; cl::Kernel kernel;
}; };
...@@ -1280,7 +1281,7 @@ public: ...@@ -1280,7 +1281,7 @@ public:
private: private:
OpenCLContext& cl; OpenCLContext& cl;
int frequency; int frequency;
OpenCLArray<mm_float4>* cmMomentum; OpenCLArray* cmMomentum;
cl::Kernel kernel1, kernel2; cl::Kernel kernel1, kernel2;
}; };
......
...@@ -191,14 +191,14 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -191,14 +191,14 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
exclusionIndicesVec.push_back(iter->second); exclusionIndicesVec.push_back(iter->second);
} }
exclusionRowIndicesVec[++currentRow] = exclusionIndicesVec.size(); exclusionRowIndicesVec[++currentRow] = exclusionIndicesVec.size();
exclusionIndices = new OpenCLArray<cl_uint>(context, exclusionIndicesVec.size(), "exclusionIndices"); exclusionIndices = OpenCLArray::create<cl_uint>(context, exclusionIndicesVec.size(), "exclusionIndices");
exclusionRowIndices = new OpenCLArray<cl_uint>(context, exclusionRowIndicesVec.size(), "exclusionRowIndices"); exclusionRowIndices = OpenCLArray::create<cl_uint>(context, exclusionRowIndicesVec.size(), "exclusionRowIndices");
exclusionIndices->upload(exclusionIndicesVec); exclusionIndices->upload(exclusionIndicesVec);
exclusionRowIndices->upload(exclusionRowIndicesVec); exclusionRowIndices->upload(exclusionRowIndicesVec);
// Record the exclusion data. // Record the exclusion data.
exclusions = new OpenCLArray<cl_uint>(context, tilesWithExclusions.size()*OpenCLContext::TileSize, "exclusions"); exclusions = OpenCLArray::create<cl_uint>(context, tilesWithExclusions.size()*OpenCLContext::TileSize, "exclusions");
vector<cl_uint> exclusionVec(exclusions->getSize()); vector<cl_uint> exclusionVec(exclusions->getSize());
for (int i = 0; i < exclusions->getSize(); ++i) for (int i = 0; i < exclusions->getSize(); ++i)
exclusionVec[i] = 0xFFFFFFFF; exclusionVec[i] = 0xFFFFFFFF;
...@@ -253,13 +253,13 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -253,13 +253,13 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
maxInteractingTiles = numTiles; maxInteractingTiles = numTiles;
if (maxInteractingTiles < 1) if (maxInteractingTiles < 1)
maxInteractingTiles = 1; maxInteractingTiles = 1;
interactingTiles = new OpenCLArray<mm_ushort2>(context, maxInteractingTiles, "interactingTiles"); interactingTiles = OpenCLArray::create<mm_ushort2>(context, maxInteractingTiles, "interactingTiles");
interactionFlags = new OpenCLArray<cl_uint>(context, context.getSIMDWidth() == 32 ? maxInteractingTiles : (deviceIsCpu ? 2*maxInteractingTiles : 1), "interactionFlags"); interactionFlags = OpenCLArray::create<cl_uint>(context, context.getSIMDWidth() == 32 ? maxInteractingTiles : (deviceIsCpu ? 2*maxInteractingTiles : 1), "interactionFlags");
interactionCount = new OpenCLArray<cl_uint>(context, 1, "interactionCount", true); interactionCount = OpenCLArray::create<cl_uint>(context, 1, "interactionCount");
blockCenter = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockCenter"); blockCenter = OpenCLArray::create<mm_float4>(context, numAtomBlocks, "blockCenter");
blockBoundingBox = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockBoundingBox"); blockBoundingBox = OpenCLArray::create<mm_float4>(context, numAtomBlocks, "blockBoundingBox");
interactionCount->set(0, 0); vector<cl_uint> count(1, 0);
interactionCount->upload(); interactionCount->upload(count);
} }
// Create kernels. // Create kernels.
...@@ -353,26 +353,27 @@ void OpenCLNonbondedUtilities::computeInteractions() { ...@@ -353,26 +353,27 @@ void OpenCLNonbondedUtilities::computeInteractions() {
void OpenCLNonbondedUtilities::updateNeighborListSize() { void OpenCLNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff) if (!useCutoff)
return; return;
interactionCount->download(); unsigned int* pinnedInteractionCount = (unsigned int*) context.getPinnedBuffer();
if (interactionCount->get(0) <= (unsigned int) interactingTiles->getSize()) interactionCount->download(pinnedInteractionCount);
if (pinnedInteractionCount[0] <= (unsigned int) interactingTiles->getSize())
return; return;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent // The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future. // this from happening in the future.
int newSize = (int) (1.2*interactionCount->get(0)); int newSize = (int) (1.2*pinnedInteractionCount[0]);
int numTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2; int numTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
if (newSize > numTiles) if (newSize > numTiles)
newSize = numTiles; newSize = numTiles;
delete interactingTiles; delete interactingTiles;
interactingTiles = new OpenCLArray<mm_ushort2>(context, newSize, "interactingTiles"); interactingTiles = OpenCLArray::create<mm_ushort2>(context, newSize, "interactingTiles");
forceKernel.setArg<cl::Buffer>(8, interactingTiles->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(8, interactingTiles->getDeviceBuffer());
forceKernel.setArg<cl_uint>(12, newSize); forceKernel.setArg<cl_uint>(12, newSize);
findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl_uint>(9, newSize); findInteractingBlocksKernel.setArg<cl_uint>(9, newSize);
if (context.getSIMDWidth() == 32 || deviceIsCpu) { if (context.getSIMDWidth() == 32 || deviceIsCpu) {
delete interactionFlags; delete interactionFlags;
interactionFlags = new OpenCLArray<cl_uint>(context, deviceIsCpu ? 2*newSize : newSize, "interactionFlags"); interactionFlags = OpenCLArray::create<cl_uint>(context, deviceIsCpu ? 2*newSize : newSize, "interactionFlags");
forceKernel.setArg<cl::Buffer>(13, interactionFlags->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(13, interactionFlags->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactionFlags->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactionFlags->getDeviceBuffer());
if (!deviceIsCpu) { if (!deviceIsCpu) {
......
...@@ -170,49 +170,49 @@ public: ...@@ -170,49 +170,49 @@ public:
/** /**
* Get the array containing the center of each atom block. * Get the array containing the center of each atom block.
*/ */
OpenCLArray<mm_float4>& getBlockCenters() { OpenCLArray& getBlockCenters() {
return *blockCenter; return *blockCenter;
} }
/** /**
* Get the array containing the dimensions of each atom block. * Get the array containing the dimensions of each atom block.
*/ */
OpenCLArray<mm_float4>& getBlockBoundingBoxes() { OpenCLArray& getBlockBoundingBoxes() {
return *blockBoundingBox; return *blockBoundingBox;
} }
/** /**
* Get the array whose first element contains the number of tiles with interactions. * Get the array whose first element contains the number of tiles with interactions.
*/ */
OpenCLArray<cl_uint>& getInteractionCount() { OpenCLArray& getInteractionCount() {
return *interactionCount; return *interactionCount;
} }
/** /**
* Get the array containing tiles with interactions. * Get the array containing tiles with interactions.
*/ */
OpenCLArray<mm_ushort2>& getInteractingTiles() { OpenCLArray& getInteractingTiles() {
return *interactingTiles; return *interactingTiles;
} }
/** /**
* Get the array containing flags for tiles with interactions. * Get the array containing flags for tiles with interactions.
*/ */
OpenCLArray<cl_uint>& getInteractionFlags() { OpenCLArray& getInteractionFlags() {
return *interactionFlags; return *interactionFlags;
} }
/** /**
* Get the array containing exclusion flags. * Get the array containing exclusion flags.
*/ */
OpenCLArray<cl_uint>& getExclusions() { OpenCLArray& getExclusions() {
return *exclusions; return *exclusions;
} }
/** /**
* Get the array containing the index into the exclusion array for each tile. * Get the array containing the index into the exclusion array for each tile.
*/ */
OpenCLArray<cl_uint>& getExclusionIndices() { OpenCLArray& getExclusionIndices() {
return *exclusionIndices; return *exclusionIndices;
} }
/** /**
* Get the array listing where the exclusion data starts for each row. * Get the array listing where the exclusion data starts for each row.
*/ */
OpenCLArray<cl_uint>& getExclusionRowIndices() { OpenCLArray& getExclusionRowIndices() {
return *exclusionRowIndices; return *exclusionRowIndices;
} }
/** /**
...@@ -250,14 +250,14 @@ private: ...@@ -250,14 +250,14 @@ private:
cl::Kernel findBlockBoundsKernel; cl::Kernel findBlockBoundsKernel;
cl::Kernel findInteractingBlocksKernel; cl::Kernel findInteractingBlocksKernel;
cl::Kernel findInteractionsWithinBlocksKernel; cl::Kernel findInteractionsWithinBlocksKernel;
OpenCLArray<cl_uint>* exclusions; OpenCLArray* exclusions;
OpenCLArray<cl_uint>* exclusionIndices; OpenCLArray* exclusionIndices;
OpenCLArray<cl_uint>* exclusionRowIndices; OpenCLArray* exclusionRowIndices;
OpenCLArray<mm_ushort2>* interactingTiles; OpenCLArray* interactingTiles;
OpenCLArray<cl_uint>* interactionFlags; OpenCLArray* interactionFlags;
OpenCLArray<cl_uint>* interactionCount; OpenCLArray* interactionCount;
OpenCLArray<mm_float4>* blockCenter; OpenCLArray* blockCenter;
OpenCLArray<mm_float4>* blockBoundingBox; OpenCLArray* blockBoundingBox;
std::vector<std::vector<int> > atomExclusions; std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters; std::vector<ParameterInfo> parameters;
std::vector<ParameterInfo> arguments; std::vector<ParameterInfo> arguments;
......
...@@ -130,8 +130,8 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -130,8 +130,8 @@ void OpenCLParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) { void OpenCLParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
OpenCLContext& cl0 = *data.contexts[0]; OpenCLContext& cl0 = *data.contexts[0];
if (contextForces == NULL) { if (contextForces == NULL) {
contextForces = new OpenCLArray<mm_float4>(cl0, &cl0.getForceBuffers().getDeviceBuffer(), contextForces = OpenCLArray::create<mm_float4>(cl0, &cl0.getForceBuffers().getDeviceBuffer(),
data.contexts.size()*cl0.getPaddedNumAtoms(), "contextForces", true); data.contexts.size()*cl0.getPaddedNumAtoms(), "contextForces");
int bufferBytes = (data.contexts.size()-1)*cl0.getPaddedNumAtoms()*sizeof(mm_float4); int bufferBytes = (data.contexts.size()-1)*cl0.getPaddedNumAtoms()*sizeof(mm_float4);
pinnedPositionBuffer = new cl::Buffer(cl0.getContext(), CL_MEM_ALLOC_HOST_PTR, bufferBytes); pinnedPositionBuffer = new cl::Buffer(cl0.getContext(), CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedPositionMemory = (mm_float4*) cl0.getQueue().enqueueMapBuffer(*pinnedPositionBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes); pinnedPositionMemory = (mm_float4*) cl0.getQueue().enqueueMapBuffer(*pinnedPositionBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment