"platforms/hip/tests/TestHipRGForce.cpp" did not exist on "3bcd80b397b7ea640fc0444b428933f20d62dbbb"
Unverified Commit dd320bcf authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Unified interface for queues (#4913)

* Unified interface for queues

* Simplified stream handling in CudaFFT3D

* HIP implementation of ComputeQueue
parent baf7942c
#ifndef OPENMM_OPENCLQUEUE_H_
#define OPENMM_OPENCLQUEUE_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2025 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/common/ComputeQueue.h"
#include "opencl.hpp"
namespace OpenMM {
/**
* This is the OpenCL implementation of the ComputeQueue interface. It wraps a cl::CommandQueue.
*/
class OpenCLQueue : public ComputeQueueImpl {
public:
/**
* Create an OpenCLQueue that wraps a cl::CommandQueue.
*/
OpenCLQueue(cl::CommandQueue queue) : queue(queue) {
}
/**
* Get the cl::CommandQueue.
*/
cl::CommandQueue getQueue() {
return queue;
}
private:
cl::CommandQueue queue;
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLQUEUE_H_*/
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012-2022 Stanford University and the Authors. *
* Portions copyright (c) 2012-2025 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -26,6 +26,7 @@
#include "OpenCLArray.h"
#include "OpenCLContext.h"
#include "OpenCLQueue.h"
#include <iostream>
#include <sstream>
#include <vector>
......@@ -96,13 +97,17 @@ ComputeContext& OpenCLArray::getContext() {
return *context;
}
cl::CommandQueue OpenCLArray::getQueue() const {
return dynamic_cast<OpenCLQueue*>(context->getCurrentQueue().get())->getQueue();
}
void OpenCLArray::uploadSubArray(const void* data, int offset, int elements, bool blocking) {
if (buffer == NULL)
throw OpenMMException("OpenCLArray has not been initialized");
if (offset < 0 || offset+elements > getSize())
throw OpenMMException("uploadSubArray: data exceeds range of array");
try {
context->getQueue().enqueueWriteBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, offset*elementSize, elements*elementSize, data);
getQueue().enqueueWriteBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, offset*elementSize, elements*elementSize, data);
}
catch (cl::Error err) {
std::stringstream str;
......@@ -115,7 +120,7 @@ void OpenCLArray::download(void* data, bool blocking) const {
if (buffer == NULL)
throw OpenMMException("OpenCLArray has not been initialized");
try {
context->getQueue().enqueueReadBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*elementSize, data);
getQueue().enqueueReadBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*elementSize, data);
}
catch (cl::Error err) {
std::stringstream str;
......@@ -131,7 +136,7 @@ void OpenCLArray::copyTo(ArrayInterface& dest) const {
throw OpenMMException("Error copying array "+name+" to "+dest.getName()+": The destination array does not match the size of the array");
OpenCLArray& clDest = context->unwrap(dest);
try {
context->getQueue().enqueueCopyBuffer(*buffer, clDest.getDeviceBuffer(), 0, 0, size*elementSize);
getQueue().enqueueCopyBuffer(*buffer, clDest.getDeviceBuffer(), 0, 0, size*elementSize);
}
catch (cl::Error err) {
std::stringstream str;
......
......@@ -37,6 +37,7 @@
#include "OpenCLKernelSources.h"
#include "OpenCLNonbondedUtilities.h"
#include "OpenCLProgram.h"
#include "OpenCLQueue.h"
#include "openmm/common/ComputeArray.h"
#include "openmm/MonteCarloFlexibleBarostat.h"
#include "openmm/Platform.h"
......@@ -302,10 +303,10 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
if (originalContext == NULL) {
context = cl::Context(contextDevices, cprops, errorCallback);
#ifdef ENABLE_PROFILING
defaultQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE);
defaultQueue = shared_ptr<ComputeQueueImpl>(new OpenCLQueue(cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE)));
printf("[ ");
#else
defaultQueue = cl::CommandQueue(context, device);
defaultQueue = shared_ptr<ComputeQueueImpl>(new OpenCLQueue(cl::CommandQueue(context, device)));
#endif
}
else {
......@@ -559,7 +560,7 @@ void OpenCLContext::initialize() {
energyBufferSize*energyBuffer.getElementSize()),
(int) longForceBuffer.getSize()*longForceBuffer.getElementSize());
pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedMemory = currentQueue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
pinnedMemory = getQueue().enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
for (int i = 0; i < numAtoms; i++) {
double mass = system.getParticleMass(i);
if (useDoublePrecision || useMixedPrecision)
......@@ -670,16 +671,12 @@ double& OpenCLContext::getEnergyWorkspace() {
return platformData.contextEnergy[contextIndex];
}
cl::CommandQueue& OpenCLContext::getQueue() {
return currentQueue;
ComputeQueue OpenCLContext::createQueue() {
return shared_ptr<ComputeQueueImpl>(new OpenCLQueue(cl::CommandQueue(context, device)));
}
void OpenCLContext::setQueue(cl::CommandQueue& queue) {
currentQueue = queue;
}
void OpenCLContext::restoreDefaultQueue() {
currentQueue = defaultQueue;
cl::CommandQueue OpenCLContext::getQueue() {
return dynamic_cast<OpenCLQueue*>(currentQueue.get())->getQueue();
}
OpenCLArray* OpenCLContext::createArray() {
......@@ -714,13 +711,13 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
try {
#ifdef ENABLE_PROFILING
cl::Event event;
currentQueue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(size), cl::NDRange(blockSize), NULL, &event);
getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(size), cl::NDRange(blockSize), NULL, &event);
profilingEvents.push_back(event);
profilingKernelNames.push_back(kernel.getInfo<CL_KERNEL_FUNCTION_NAME>());
if (profilingEvents.size() >= 500)
printProfilingEvents();
#else
currentQueue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(size), cl::NDRange(blockSize));
getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(size), cl::NDRange(blockSize));
#endif
}
catch (cl::Error err) {
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Portions copyright (c) 2019-2025 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -25,6 +25,7 @@
* -------------------------------------------------------------------------- */
#include "OpenCLEvent.h"
#include "OpenCLQueue.h"
using namespace OpenMM;
......@@ -32,7 +33,7 @@ OpenCLEvent::OpenCLEvent(OpenCLContext& context) : context(context) {
}
void OpenCLEvent::enqueue() {
context.getQueue().enqueueMarkerWithWaitList(NULL, &event);
dynamic_cast<OpenCLQueue*>(context.getCurrentQueue().get())->getQueue().enqueueMarkerWithWaitList(NULL, &event);
}
void OpenCLEvent::wait() {
......
......@@ -35,6 +35,7 @@
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLNonbondedUtilities.h"
#include "OpenCLKernelSources.h"
#include "OpenCLQueue.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h"
#include <algorithm>
......@@ -222,18 +223,18 @@ private:
class OpenCLCalcNonbondedForceKernel::SyncQueuePreComputation : public OpenCLContext::ForcePreComputation {
public:
SyncQueuePreComputation(OpenCLContext& cl, cl::CommandQueue queue, int forceGroup) : cl(cl), queue(queue), forceGroup(forceGroup) {
SyncQueuePreComputation(OpenCLContext& cl, ComputeQueue queue, int forceGroup) : cl(cl), queue(queue), forceGroup(forceGroup) {
}
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
if ((groups&(1<<forceGroup)) != 0) {
vector<cl::Event> events(1);
cl.getQueue().enqueueMarkerWithWaitList(NULL, &events[0]);
queue.enqueueBarrierWithWaitList(&events);
dynamic_cast<OpenCLQueue*>(queue.get())->getQueue().enqueueBarrierWithWaitList(&events);
}
}
private:
OpenCLContext& cl;
cl::CommandQueue queue;
ComputeQueue queue;
int forceGroup;
};
......@@ -523,7 +524,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
usePmeQueue = (!cl.getPlatformData().disablePmeStream && !cl.getPlatformData().useCpuPme && isNvidia);
if (usePmeQueue) {
pmeDefines["USE_PME_STREAM"] = "1";
pmeQueue = cl::CommandQueue(cl.getContext(), cl.getDevice());
pmeQueue = cl.createQueue();
int recipForceGroup = force.getReciprocalSpaceForceGroup();
if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup();
......@@ -941,7 +942,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (usePmeQueue) {
vector<cl::Event> events(1);
cl.getQueue().enqueueMarkerWithWaitList(NULL, &events[0]);
pmeQueue.enqueueBarrierWithWaitList(&events);
dynamic_cast<OpenCLQueue*>(pmeQueue.get())->getQueue().enqueueBarrierWithWaitList(&events);
}
if (hasOffsets) {
// The Ewald self energy was computed in the kernel.
......@@ -979,7 +980,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
}
if (pmeGrid1.isInitialized() && includeReciprocal) {
if (usePmeQueue && !includeEnergy)
cl.setQueue(pmeQueue);
cl.setCurrentQueue(pmeQueue);
// Invert the periodic box vectors.
......@@ -1131,7 +1132,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionInterpolateForceKernel, cl.getNumAtoms());
}
if (usePmeQueue) {
pmeQueue.enqueueMarkerWithWaitList(NULL, &pmeSyncEvent);
dynamic_cast<OpenCLQueue*>(pmeQueue.get())->getQueue().enqueueMarkerWithWaitList(NULL, &pmeSyncEvent);
cl.restoreDefaultQueue();
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment