Commit 94aa8c3f authored by peastman's avatar peastman
Browse files

OpenCL platform does PME on a separate stream

parent 832e7f04
......@@ -211,11 +211,17 @@ public:
return contextIndex;
}
/**
* Get the cl::CommandQueue associated with this object.
* Get the cl::CommandQueue currently being used for execution.
*/
cl::CommandQueue& getQueue() {
return queue;
}
cl::CommandQueue& getQueue();
/**
* Set the cl::ComandQueue to use for execution.
*/
void setQueue(cl::CommandQueue& queue);
/**
* Reset the context to using the default queue for execution.
*/
void restoreDefaultQueue();
/**
* Get the array which contains the position (the xyz components) and charge (the w component) of each atom.
*/
......@@ -629,7 +635,7 @@ private:
std::map<std::string, std::string> compilationDefines;
cl::Context context;
cl::Device device;
cl::CommandQueue queue;
cl::CommandQueue defaultQueue, currentQueue;
cl::Kernel clearBufferKernel;
cl::Kernel clearTwoBuffersKernel;
cl::Kernel clearThreeBuffersKernel;
......
......@@ -599,6 +599,8 @@ private:
class PmeIO;
class PmePreComputation;
class PmePostComputation;
class SyncQueuePreComputation;
class SyncQueuePostComputation;
OpenCLContext& cl;
bool hasInitializedKernel;
OpenCLArray* sigmaEpsilon;
......@@ -613,6 +615,8 @@ private:
OpenCLArray* pmeAtomRange;
OpenCLArray* pmeAtomGridIndex;
OpenCLSort* sort;
cl::CommandQueue pmeQueue;
cl::Event pmeSyncEvent;
OpenCLFFT3D* fft;
Kernel cpuPme;
PmeIO* pmeio;
......
......@@ -248,7 +248,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
contextDevices.push_back(device);
cl_context_properties cprops[] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platforms[bestPlatform](), 0};
context = cl::Context(contextDevices, cprops, errorCallback);
queue = cl::CommandQueue(context, device);
defaultQueue = cl::CommandQueue(context, device);
currentQueue = defaultQueue;
numAtoms = system.getNumParticles();
paddedNumAtoms = TileSize*((numAtoms+TileSize-1)/TileSize);
numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize;
......@@ -414,7 +415,7 @@ void OpenCLContext::initialize() {
addAutoclearBuffer(*energyBuffer);
int bufferBytes = max(velm->getSize()*velm->getElementSize(), energyBuffer->getSize()*energyBuffer->getElementSize());
pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedMemory = queue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
pinnedMemory = currentQueue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
for (int i = 0; i < numAtoms; i++) {
double mass = system.getParticleMass(i);
if (useDoublePrecision || useMixedPrecision)
......@@ -514,6 +515,18 @@ cl::Program OpenCLContext::createProgram(const string source, const map<string,
return program;
}
cl::CommandQueue& OpenCLContext::getQueue() {
return currentQueue;
}
void OpenCLContext::setQueue(cl::CommandQueue& queue) {
currentQueue = queue;
}
void OpenCLContext::restoreDefaultQueue() {
currentQueue = defaultQueue;
}
string OpenCLContext::doubleToString(double value) {
stringstream s;
s.precision(useDoublePrecision ? 16 : 8);
......@@ -534,7 +547,7 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
blockSize = ThreadBlockSize;
int size = std::min((workUnits+blockSize-1)/blockSize, numThreadBlocks)*blockSize;
try {
queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(size), cl::NDRange(blockSize));
currentQueue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(size), cl::NDRange(blockSize));
}
catch (cl::Error err) {
stringstream str;
......
......@@ -1384,6 +1384,35 @@ private:
CalcPmeReciprocalForceKernel::IO& io;
};
class OpenCLCalcNonbondedForceKernel::SyncQueuePreComputation : public OpenCLContext::ForcePreComputation {
public:
SyncQueuePreComputation(OpenCLContext& cl, cl::CommandQueue queue) : cl(cl), queue(queue), events(1) {
}
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
cl.getQueue().enqueueMarker(&events[0]);
queue.enqueueWaitForEvents(events);
}
private:
OpenCLContext& cl;
cl::CommandQueue queue;
vector<cl::Event> events;
};
class OpenCLCalcNonbondedForceKernel::SyncQueuePostComputation : public OpenCLContext::ForcePostComputation {
public:
SyncQueuePostComputation(OpenCLContext& cl, cl::Event& event) : cl(cl), event(event), events(1) {
}
double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
events[0] = event;
cl.getQueue().enqueueWaitForEvents(events);
return 0.0;
}
private:
OpenCLContext& cl;
cl::Event& event;
vector<cl::Event> events;
};
OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
......@@ -1574,6 +1603,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
pmeAtomGridIndex = OpenCLArray::create<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms());
fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ);
pmeQueue = cl::CommandQueue(cl.getContext(), cl.getDevice());
cl.addPreComputation(new SyncQueuePreComputation(cl, pmeQueue));
cl.addPostComputation(new SyncQueuePostComputation(cl, pmeSyncEvent));
// Initialize the b-spline moduli.
......@@ -1753,6 +1785,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms());
}
if (pmeGrid != NULL && includeReciprocal) {
cl.setQueue(pmeQueue);
setPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 4);
setInvPeriodicBoxSizeArg(cl, pmeUpdateBsplinesKernel, 5);
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
......@@ -1795,6 +1828,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
else
cl.executeKernel(pmeInterpolateForceKernel, cl.getNumAtoms());
pmeQueue.enqueueMarker(&pmeSyncEvent);
cl.restoreDefaultQueue();
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (dispersionCoefficient != 0.0 && includeDirect) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment