"platforms/cuda/tests/TestCudaPythonForce.cpp" did not exist on "29e3fa57628302b218bf9d1d1c3a821065293a34"
Unverified Commit dd320bcf authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Unified interface for queues (#4913)

* Unified interface for queues

* Simplified stream handling in CudaFFT3D

* HIP implementation of ComputeQueue
parent baf7942c
#ifndef OPENMM_OPENCLQUEUE_H_
#define OPENMM_OPENCLQUEUE_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2025 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/common/ComputeQueue.h"
#include "opencl.hpp"
namespace OpenMM {
/**
* This is the OpenCL implementation of the ComputeQueue interface. It wraps a cl::CommandQueue.
*/
class OpenCLQueue : public ComputeQueueImpl {
public:
/**
* Create an OpenCLQueue that wraps a cl::CommandQueue.
*/
OpenCLQueue(cl::CommandQueue queue) : queue(queue) {
}
/**
* Get the cl::CommandQueue.
*/
cl::CommandQueue getQueue() {
return queue;
}
private:
cl::CommandQueue queue;
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLQUEUE_H_*/
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2012-2022 Stanford University and the Authors. * * Portions copyright (c) 2012-2025 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "OpenCLArray.h" #include "OpenCLArray.h"
#include "OpenCLContext.h" #include "OpenCLContext.h"
#include "OpenCLQueue.h"
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
...@@ -96,13 +97,17 @@ ComputeContext& OpenCLArray::getContext() { ...@@ -96,13 +97,17 @@ ComputeContext& OpenCLArray::getContext() {
return *context; return *context;
} }
cl::CommandQueue OpenCLArray::getQueue() const {
return dynamic_cast<OpenCLQueue*>(context->getCurrentQueue().get())->getQueue();
}
void OpenCLArray::uploadSubArray(const void* data, int offset, int elements, bool blocking) { void OpenCLArray::uploadSubArray(const void* data, int offset, int elements, bool blocking) {
if (buffer == NULL) if (buffer == NULL)
throw OpenMMException("OpenCLArray has not been initialized"); throw OpenMMException("OpenCLArray has not been initialized");
if (offset < 0 || offset+elements > getSize()) if (offset < 0 || offset+elements > getSize())
throw OpenMMException("uploadSubArray: data exceeds range of array"); throw OpenMMException("uploadSubArray: data exceeds range of array");
try { try {
context->getQueue().enqueueWriteBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, offset*elementSize, elements*elementSize, data); getQueue().enqueueWriteBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, offset*elementSize, elements*elementSize, data);
} }
catch (cl::Error err) { catch (cl::Error err) {
std::stringstream str; std::stringstream str;
...@@ -115,7 +120,7 @@ void OpenCLArray::download(void* data, bool blocking) const { ...@@ -115,7 +120,7 @@ void OpenCLArray::download(void* data, bool blocking) const {
if (buffer == NULL) if (buffer == NULL)
throw OpenMMException("OpenCLArray has not been initialized"); throw OpenMMException("OpenCLArray has not been initialized");
try { try {
context->getQueue().enqueueReadBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*elementSize, data); getQueue().enqueueReadBuffer(*buffer, blocking ? CL_TRUE : CL_FALSE, 0, size*elementSize, data);
} }
catch (cl::Error err) { catch (cl::Error err) {
std::stringstream str; std::stringstream str;
...@@ -131,7 +136,7 @@ void OpenCLArray::copyTo(ArrayInterface& dest) const { ...@@ -131,7 +136,7 @@ void OpenCLArray::copyTo(ArrayInterface& dest) const {
throw OpenMMException("Error copying array "+name+" to "+dest.getName()+": The destination array does not match the size of the array"); throw OpenMMException("Error copying array "+name+" to "+dest.getName()+": The destination array does not match the size of the array");
OpenCLArray& clDest = context->unwrap(dest); OpenCLArray& clDest = context->unwrap(dest);
try { try {
context->getQueue().enqueueCopyBuffer(*buffer, clDest.getDeviceBuffer(), 0, 0, size*elementSize); getQueue().enqueueCopyBuffer(*buffer, clDest.getDeviceBuffer(), 0, 0, size*elementSize);
} }
catch (cl::Error err) { catch (cl::Error err) {
std::stringstream str; std::stringstream str;
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include "OpenCLKernelSources.h" #include "OpenCLKernelSources.h"
#include "OpenCLNonbondedUtilities.h" #include "OpenCLNonbondedUtilities.h"
#include "OpenCLProgram.h" #include "OpenCLProgram.h"
#include "OpenCLQueue.h"
#include "openmm/common/ComputeArray.h" #include "openmm/common/ComputeArray.h"
#include "openmm/MonteCarloFlexibleBarostat.h" #include "openmm/MonteCarloFlexibleBarostat.h"
#include "openmm/Platform.h" #include "openmm/Platform.h"
...@@ -302,10 +303,10 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device ...@@ -302,10 +303,10 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
if (originalContext == NULL) { if (originalContext == NULL) {
context = cl::Context(contextDevices, cprops, errorCallback); context = cl::Context(contextDevices, cprops, errorCallback);
#ifdef ENABLE_PROFILING #ifdef ENABLE_PROFILING
defaultQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE); defaultQueue = shared_ptr<ComputeQueueImpl>(new OpenCLQueue(cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE)));
printf("[ "); printf("[ ");
#else #else
defaultQueue = cl::CommandQueue(context, device); defaultQueue = shared_ptr<ComputeQueueImpl>(new OpenCLQueue(cl::CommandQueue(context, device)));
#endif #endif
} }
else { else {
...@@ -559,7 +560,7 @@ void OpenCLContext::initialize() { ...@@ -559,7 +560,7 @@ void OpenCLContext::initialize() {
energyBufferSize*energyBuffer.getElementSize()), energyBufferSize*energyBuffer.getElementSize()),
(int) longForceBuffer.getSize()*longForceBuffer.getElementSize()); (int) longForceBuffer.getSize()*longForceBuffer.getElementSize());
pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes); pinnedBuffer = new cl::Buffer(context, CL_MEM_ALLOC_HOST_PTR, bufferBytes);
pinnedMemory = currentQueue.enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes); pinnedMemory = getQueue().enqueueMapBuffer(*pinnedBuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, bufferBytes);
for (int i = 0; i < numAtoms; i++) { for (int i = 0; i < numAtoms; i++) {
double mass = system.getParticleMass(i); double mass = system.getParticleMass(i);
if (useDoublePrecision || useMixedPrecision) if (useDoublePrecision || useMixedPrecision)
...@@ -670,16 +671,12 @@ double& OpenCLContext::getEnergyWorkspace() { ...@@ -670,16 +671,12 @@ double& OpenCLContext::getEnergyWorkspace() {
return platformData.contextEnergy[contextIndex]; return platformData.contextEnergy[contextIndex];
} }
cl::CommandQueue& OpenCLContext::getQueue() { ComputeQueue OpenCLContext::createQueue() {
return currentQueue; return shared_ptr<ComputeQueueImpl>(new OpenCLQueue(cl::CommandQueue(context, device)));
} }
void OpenCLContext::setQueue(cl::CommandQueue& queue) { cl::CommandQueue OpenCLContext::getQueue() {
currentQueue = queue; return dynamic_cast<OpenCLQueue*>(currentQueue.get())->getQueue();
}
void OpenCLContext::restoreDefaultQueue() {
currentQueue = defaultQueue;
} }
OpenCLArray* OpenCLContext::createArray() { OpenCLArray* OpenCLContext::createArray() {
...@@ -714,13 +711,13 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi ...@@ -714,13 +711,13 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
try { try {
#ifdef ENABLE_PROFILING #ifdef ENABLE_PROFILING
cl::Event event; cl::Event event;
currentQueue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(size), cl::NDRange(blockSize), NULL, &event); getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(size), cl::NDRange(blockSize), NULL, &event);
profilingEvents.push_back(event); profilingEvents.push_back(event);
profilingKernelNames.push_back(kernel.getInfo<CL_KERNEL_FUNCTION_NAME>()); profilingKernelNames.push_back(kernel.getInfo<CL_KERNEL_FUNCTION_NAME>());
if (profilingEvents.size() >= 500) if (profilingEvents.size() >= 500)
printProfilingEvents(); printProfilingEvents();
#else #else
currentQueue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(size), cl::NDRange(blockSize)); getQueue().enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(size), cl::NDRange(blockSize));
#endif #endif
} }
catch (cl::Error err) { catch (cl::Error err) {
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2019 Stanford University and the Authors. * * Portions copyright (c) 2019-2025 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "OpenCLEvent.h" #include "OpenCLEvent.h"
#include "OpenCLQueue.h"
using namespace OpenMM; using namespace OpenMM;
...@@ -32,7 +33,7 @@ OpenCLEvent::OpenCLEvent(OpenCLContext& context) : context(context) { ...@@ -32,7 +33,7 @@ OpenCLEvent::OpenCLEvent(OpenCLContext& context) : context(context) {
} }
void OpenCLEvent::enqueue() { void OpenCLEvent::enqueue() {
context.getQueue().enqueueMarkerWithWaitList(NULL, &event); dynamic_cast<OpenCLQueue*>(context.getCurrentQueue().get())->getQueue().enqueueMarkerWithWaitList(NULL, &event);
} }
void OpenCLEvent::wait() { void OpenCLEvent::wait() {
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "OpenCLIntegrationUtilities.h" #include "OpenCLIntegrationUtilities.h"
#include "OpenCLNonbondedUtilities.h" #include "OpenCLNonbondedUtilities.h"
#include "OpenCLKernelSources.h" #include "OpenCLKernelSources.h"
#include "OpenCLQueue.h"
#include "SimTKOpenMMRealType.h" #include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include <algorithm> #include <algorithm>
...@@ -222,18 +223,18 @@ private: ...@@ -222,18 +223,18 @@ private:
class OpenCLCalcNonbondedForceKernel::SyncQueuePreComputation : public OpenCLContext::ForcePreComputation { class OpenCLCalcNonbondedForceKernel::SyncQueuePreComputation : public OpenCLContext::ForcePreComputation {
public: public:
SyncQueuePreComputation(OpenCLContext& cl, cl::CommandQueue queue, int forceGroup) : cl(cl), queue(queue), forceGroup(forceGroup) { SyncQueuePreComputation(OpenCLContext& cl, ComputeQueue queue, int forceGroup) : cl(cl), queue(queue), forceGroup(forceGroup) {
} }
void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) { void computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) {
if ((groups&(1<<forceGroup)) != 0) { if ((groups&(1<<forceGroup)) != 0) {
vector<cl::Event> events(1); vector<cl::Event> events(1);
cl.getQueue().enqueueMarkerWithWaitList(NULL, &events[0]); cl.getQueue().enqueueMarkerWithWaitList(NULL, &events[0]);
queue.enqueueBarrierWithWaitList(&events); dynamic_cast<OpenCLQueue*>(queue.get())->getQueue().enqueueBarrierWithWaitList(&events);
} }
} }
private: private:
OpenCLContext& cl; OpenCLContext& cl;
cl::CommandQueue queue; ComputeQueue queue;
int forceGroup; int forceGroup;
}; };
...@@ -523,7 +524,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -523,7 +524,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
usePmeQueue = (!cl.getPlatformData().disablePmeStream && !cl.getPlatformData().useCpuPme && isNvidia); usePmeQueue = (!cl.getPlatformData().disablePmeStream && !cl.getPlatformData().useCpuPme && isNvidia);
if (usePmeQueue) { if (usePmeQueue) {
pmeDefines["USE_PME_STREAM"] = "1"; pmeDefines["USE_PME_STREAM"] = "1";
pmeQueue = cl::CommandQueue(cl.getContext(), cl.getDevice()); pmeQueue = cl.createQueue();
int recipForceGroup = force.getReciprocalSpaceForceGroup(); int recipForceGroup = force.getReciprocalSpaceForceGroup();
if (recipForceGroup < 0) if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup(); recipForceGroup = force.getForceGroup();
...@@ -941,7 +942,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -941,7 +942,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (usePmeQueue) { if (usePmeQueue) {
vector<cl::Event> events(1); vector<cl::Event> events(1);
cl.getQueue().enqueueMarkerWithWaitList(NULL, &events[0]); cl.getQueue().enqueueMarkerWithWaitList(NULL, &events[0]);
pmeQueue.enqueueBarrierWithWaitList(&events); dynamic_cast<OpenCLQueue*>(pmeQueue.get())->getQueue().enqueueBarrierWithWaitList(&events);
} }
if (hasOffsets) { if (hasOffsets) {
// The Ewald self energy was computed in the kernel. // The Ewald self energy was computed in the kernel.
...@@ -979,7 +980,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -979,7 +980,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
} }
if (pmeGrid1.isInitialized() && includeReciprocal) { if (pmeGrid1.isInitialized() && includeReciprocal) {
if (usePmeQueue && !includeEnergy) if (usePmeQueue && !includeEnergy)
cl.setQueue(pmeQueue); cl.setCurrentQueue(pmeQueue);
// Invert the periodic box vectors. // Invert the periodic box vectors.
...@@ -1131,7 +1132,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1131,7 +1132,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeDispersionInterpolateForceKernel, cl.getNumAtoms()); cl.executeKernel(pmeDispersionInterpolateForceKernel, cl.getNumAtoms());
} }
if (usePmeQueue) { if (usePmeQueue) {
pmeQueue.enqueueMarkerWithWaitList(NULL, &pmeSyncEvent); dynamic_cast<OpenCLQueue*>(pmeQueue.get())->getQueue().enqueueMarkerWithWaitList(NULL, &pmeSyncEvent);
cl.restoreDefaultQueue(); cl.restoreDefaultQueue();
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment