Unverified Commit ecc2d258 authored by Anton Gorenko's avatar Anton Gorenko
Browse files

Port changes from the main repository

Use cuCtxPushCurrent() and cuCtxPopCurrent() for selecting CUDA context

    https://github.com/openmm/openmm/pull/3258

Fixed uninitialized memory access

    https://github.com/openmm/openmm/issues/3392
    https://github.com/openmm/openmm/pull/3399

Fixed potential invalid memory access

    See https://github.com/openmm/openmm/pull/3428

Improved temperature reporting for Drude particles

    https://github.com/openmm/openmm/pull/3486
    https://github.com/openmm/openmm/commit/a5e42f5

Fixed race condition with multiple GPUs

    https://github.com/openmm/openmm/commit/6fb1c8a41edff980862750bc086f6a204eb50941

Use blocking sync when creating events

    https://github.com/openmm/openmm/commit/fe21d5ee4f14673a4ea38b7244991772a64ceec2

Very minor optimizations

    https://github.com/openmm/openmm/commit/109f6b2535da4e0c0dd88007d6ca06b4add2ce81

Use PocketFFT

    https://github.com/openmm/openmm/commit/1dac981a63300a2a53a7925f570995914f7163ed

Improved logic for deciding when to reorder atoms

    https://github.com/openmm/openmm/commit/48664a1f1a4490a4dabc277757545ac070e7b898

Ensure valid atom order after loading a checkpoint

    https://github.com/openmm/openmm/commit/a056d5a3754e193105409afa12c9f0c9a2d972a2

Improve performance running on multiple GPUs

    https://github.com/openmm/openmm/commit/0c82c2647de98da5c6dab7bf7a7b8b19705aadc0

Fixed errors when running on multiple GPUs

    https://github.com/openmm/openmm/commit/ed9df876d43c037c08d4762721e73e5caae086d9

Optimized reducing energy

    https://github.com/openmm/openmm/commit/2975f44
parent f717ed89
...@@ -9,8 +9,8 @@ ...@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -56,7 +56,7 @@ public: ...@@ -56,7 +56,7 @@ public:
* @param name the name of the array * @param name the name of the array
*/ */
template <class T> template <class T>
static HipArray* create(HipContext& context, int size, const std::string& name) { static HipArray* create(HipContext& context, size_t size, const std::string& name) {
return new HipArray(context, size, sizeof(T), name); return new HipArray(context, size, sizeof(T), name);
} }
/** /**
...@@ -72,7 +72,7 @@ public: ...@@ -72,7 +72,7 @@ public:
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
HipArray(HipContext& context, int size, int elementSize, const std::string& name); HipArray(HipContext& context, size_t size, int elementSize, const std::string& name);
~HipArray(); ~HipArray();
/** /**
* Initialize this object. * Initialize this object.
...@@ -82,7 +82,7 @@ public: ...@@ -82,7 +82,7 @@ public:
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
void initialize(ComputeContext& context, int size, int elementSize, const std::string& name); void initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name);
/** /**
* Initialize this object. The template argument is the data type of each array element. * Initialize this object. The template argument is the data type of each array element.
* *
...@@ -91,13 +91,13 @@ public: ...@@ -91,13 +91,13 @@ public:
* @param name the name of the array * @param name the name of the array
*/ */
template <class T> template <class T>
void initialize(ComputeContext& context, int size, const std::string& name) { void initialize(ComputeContext& context, size_t size, const std::string& name) {
initialize(context, size, sizeof(T), name); initialize(context, size, sizeof(T), name);
} }
/** /**
* Recreate the internal storage to have a different size. * Recreate the internal storage to have a different size.
*/ */
void resize(int size); void resize(size_t size);
/** /**
* Get whether this array has been initialized. * Get whether this array has been initialized.
*/ */
...@@ -107,7 +107,7 @@ public: ...@@ -107,7 +107,7 @@ public:
/** /**
* Get the number of elements in the array. * Get the number of elements in the array.
*/ */
int getSize() const { size_t getSize() const {
return size; return size;
} }
/** /**
...@@ -183,7 +183,8 @@ public: ...@@ -183,7 +183,8 @@ public:
private: private:
HipContext* context; HipContext* context;
hipDeviceptr_t pointer; hipDeviceptr_t pointer;
int size, elementSize; size_t size;
int elementSize;
bool ownsMemory; bool ownsMemory;
std::string name; std::string name;
}; };
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <map> #include <map>
#include <stack>
#include <string> #include <string>
#include <utility> #include <utility>
#define __CL_ENABLE_EXCEPTIONS #define __CL_ENABLE_EXCEPTIONS
...@@ -99,10 +100,20 @@ public: ...@@ -99,10 +100,20 @@ public:
return contextIsValid; return contextIsValid;
} }
/** /**
* Set the hipCtx_t associated with this object to be the current context. If the context is not * Set the device associated with this object to be the current device. If the context is not
* valid, this returns without doing anything. * valid, this returns without doing anything.
*/ */
void setAsCurrent(); void setAsCurrent();
/**
* Push the device associated with this object to be the current device. If the context is not
* valid, this returns without doing anything.
*/
void pushAsCurrent();
/**
* Pop the device associated with this object off the stack of contexts. If the context is not
* valid, this returns without doing anything.
*/
void popAsCurrent();
/** /**
* Get the hipDevice_t associated with this object. * Get the hipDevice_t associated with this object.
*/ */
...@@ -582,6 +593,10 @@ public: ...@@ -582,6 +593,10 @@ public:
* expense of reduced simulation performance. * expense of reduced simulation performance.
*/ */
void flushQueue(); void flushQueue();
/**
* Get the flags that should be used when creating hipEvent_t objects.
*/
unsigned int getEventFlags();
/** /**
* Get the flags that should be used when allocating pinned host memory. * Get the flags that should be used when allocating pinned host memory.
*/ */
...@@ -610,6 +625,7 @@ private: ...@@ -610,6 +625,7 @@ private:
std::map<std::string, std::string> compilationDefines; std::map<std::string, std::string> compilationDefines;
std::vector<hipModule_t> loadedModules; std::vector<hipModule_t> loadedModules;
hipDevice_t device; hipDevice_t device;
std::stack<hipDevice_t> outerScopeDevices;
hipStream_t currentStream; hipStream_t currentStream;
hipFunction_t clearBufferKernel; hipFunction_t clearBufferKernel;
hipFunction_t clearTwoBuffersKernel; hipFunction_t clearTwoBuffersKernel;
......
...@@ -9,8 +9,8 @@ ...@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. * * Portions copyright (c) 2008-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -109,6 +109,18 @@ public: ...@@ -109,6 +109,18 @@ public:
* @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
*/ */
void setTime(ContextImpl& context, double time); void setTime(ContextImpl& context, double time);
/**
* Get the current step count
*
* @param context the context in which to execute this kernel
*/
long long getStepCount(const ContextImpl& context) const;
/**
* Set the current step count
*
* @param context the context in which to execute this kernel
*/
void setStepCount(const ContextImpl& context, long long count);
/** /**
* Get the positions of all particles. * Get the positions of all particles.
* *
...@@ -133,6 +145,15 @@ public: ...@@ -133,6 +145,15 @@ public:
* @param velocities a vector containg the particle velocities * @param velocities a vector containg the particle velocities
*/ */
void setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities); void setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities);
/**
* Compute velocities, shifted in time to account for a leapfrog integrator. The shift
* is based on the most recently computed forces.
*
* @param context the context in which to execute this kernel
* @param timeShift the amount by which to shift the velocities in time
* @param velocities the shifted velocities are returned in this
*/
void computeShiftedVelocities(ContextImpl& context, double timeShift, std::vector<Vec3>& velocities);
/** /**
* Get the current forces on all particles. * Get the current forces on all particles.
* *
......
...@@ -9,8 +9,8 @@ ...@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -339,7 +339,7 @@ private: ...@@ -339,7 +339,7 @@ private:
HipArray rebuildNeighborList; HipArray rebuildNeighborList;
HipSort* blockSorter; HipSort* blockSorter;
hipEvent_t downloadCountEvent; hipEvent_t downloadCountEvent;
int* pinnedCountBuffer; unsigned int* pinnedCountBuffer;
std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs; std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs;
std::vector<std::vector<int> > atomExclusions; std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters; std::vector<ParameterInfo> parameters;
...@@ -349,8 +349,9 @@ private: ...@@ -349,8 +349,9 @@ private:
std::map<int, std::string> groupKernelSource; std::map<int, std::string> groupKernelSource;
double lastCutoff; double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList; bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
int startTileIndex, startBlockIndex, numBlocks, maxTiles, maxSinglePairs, numTilesInBatch, maxExclusions; int startTileIndex, startBlockIndex, numBlocks, numTilesInBatch, maxExclusions;
int numForceThreadBlocks, forceThreadBlockSize, findInteractingBlocksThreadBlockSize, numAtoms, groupFlags; int numForceThreadBlocks, forceThreadBlockSize, findInteractingBlocksThreadBlockSize, numAtoms, groupFlags;
unsigned int maxTiles, maxSinglePairs, tilesAfterReorder;
long long numTiles; long long numTiles;
std::string kernelSource; std::string kernelSource;
}; };
......
...@@ -91,7 +91,9 @@ private: ...@@ -91,7 +91,9 @@ private:
long long* pinnedForceBuffer; long long* pinnedForceBuffer;
hipFunction_t sumKernel; hipFunction_t sumKernel;
hipEvent_t event; hipEvent_t event;
hipStream_t peerCopyStream; std::vector<hipEvent_t> peerCopyEvent;
std::vector<hipEvent_t> peerCopyEventLocal;
std::vector<hipStream_t> peerCopyStream;
}; };
/** /**
......
...@@ -126,8 +126,8 @@ public: ...@@ -126,8 +126,8 @@ public:
std::vector<HipContext*> contexts; std::vector<HipContext*> contexts;
std::vector<double> contextEnergy; std::vector<double> contextEnergy;
bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream, deterministicForces; bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream, deterministicForces;
int cmMotionFrequency; int cmMotionFrequency, computeForceCount;
int stepCount, computeForceCount; long long stepCount;
double time; double time;
std::map<std::string, std::string> propertyValues; std::map<std::string, std::string> propertyValues;
ThreadPool threads; ThreadPool threads;
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2012-2019 Stanford University and the Authors. * * Portions copyright (c) 2012-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "HipArray.h" #include "HipArray.h"
#include "HipContext.h" #include "HipContext.h"
#include "openmm/common/ContextSelector.h"
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
...@@ -36,13 +37,13 @@ using namespace OpenMM; ...@@ -36,13 +37,13 @@ using namespace OpenMM;
HipArray::HipArray() : pointer(0), ownsMemory(false) { HipArray::HipArray() : pointer(0), ownsMemory(false) {
} }
HipArray::HipArray(HipContext& context, int size, int elementSize, const std::string& name) : pointer(0) { HipArray::HipArray(HipContext& context, size_t size, int elementSize, const std::string& name) : pointer(0) {
initialize(context, size, elementSize, name); initialize(context, size, elementSize, name);
} }
HipArray::~HipArray() { HipArray::~HipArray() {
if (pointer != 0 && ownsMemory && context->getContextIsValid()) { if (pointer != 0 && ownsMemory && context->getContextIsValid()) {
context->setAsCurrent(); ContextSelector selector(*context);
hipError_t result = hipFree(pointer); hipError_t result = hipFree(pointer);
if (result != hipSuccess) { if (result != hipSuccess) {
std::stringstream str; std::stringstream str;
...@@ -52,7 +53,7 @@ HipArray::~HipArray() { ...@@ -52,7 +53,7 @@ HipArray::~HipArray() {
} }
} }
void HipArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) { void HipArray::initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name) {
if (this->pointer != 0) if (this->pointer != 0)
throw OpenMMException("HipArray has already been initialized"); throw OpenMMException("HipArray has already been initialized");
this->context = &dynamic_cast<HipContext&>(context); this->context = &dynamic_cast<HipContext&>(context);
...@@ -60,6 +61,7 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co ...@@ -60,6 +61,7 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co
this->elementSize = elementSize; this->elementSize = elementSize;
this->name = name; this->name = name;
ownsMemory = true; ownsMemory = true;
ContextSelector selector(*this->context);
hipError_t result = hipMalloc(&pointer, size*elementSize); hipError_t result = hipMalloc(&pointer, size*elementSize);
if (result != hipSuccess) { if (result != hipSuccess) {
std::stringstream str; std::stringstream str;
...@@ -68,11 +70,12 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co ...@@ -68,11 +70,12 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co
} }
} }
void HipArray::resize(int size) { void HipArray::resize(size_t size) {
if (pointer == 0) if (pointer == 0)
throw OpenMMException("HipArray has not been initialized"); throw OpenMMException("HipArray has not been initialized");
if (!ownsMemory) if (!ownsMemory)
throw OpenMMException("Cannot resize an array that does not own its storage"); throw OpenMMException("Cannot resize an array that does not own its storage");
ContextSelector selector(*context);
hipError_t result = hipFree(pointer); hipError_t result = hipFree(pointer);
if (result != hipSuccess) { if (result != hipSuccess) {
std::stringstream str; std::stringstream str;
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2023 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2023 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "HipProgram.h" #include "HipProgram.h"
#include "HipFFT3D.h" #include "HipFFT3D.h"
#include "openmm/common/ComputeArray.h" #include "openmm/common/ComputeArray.h"
#include "openmm/common/ContextSelector.h"
#include "SHA1.h" #include "SHA1.h"
#include "openmm/Platform.h" #include "openmm/Platform.h"
#include "openmm/System.h" #include "openmm/System.h"
...@@ -184,13 +185,15 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy ...@@ -184,13 +185,15 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy
} }
contextIsValid = true; contextIsValid = true;
ContextSelector selector(*this);
if (contextIndex > 0) { if (contextIndex > 0) {
int canAccess; int canAccess;
CHECK_RESULT(hipDeviceCanAccessPeer(&canAccess, getDevice(), platformData.contexts[0]->getDevice())); CHECK_RESULT(hipDeviceCanAccessPeer(&canAccess, getDevice(), platformData.contexts[0]->getDevice()));
if (canAccess) { if (canAccess) {
platformData.contexts[0]->setAsCurrent(); {
CHECK_RESULT(hipDeviceEnablePeerAccess(getDevice(), 0)); ContextSelector selector2(*platformData.contexts[0]);
setAsCurrent(); CHECK_RESULT(hipDeviceEnablePeerAccess(getDevice(), 0));
}
CHECK_RESULT(hipDeviceEnablePeerAccess(platformData.contexts[0]->getDevice(), 0)); CHECK_RESULT(hipDeviceEnablePeerAccess(platformData.contexts[0]->getDevice(), 0));
} }
} }
...@@ -345,7 +348,7 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy ...@@ -345,7 +348,7 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy
} }
HipContext::~HipContext() { HipContext::~HipContext() {
setAsCurrent(); pushAsCurrent();
for (auto force : forces) for (auto force : forces)
delete force; delete force;
for (auto listener : reorderListeners) for (auto listener : reorderListeners)
...@@ -366,28 +369,29 @@ HipContext::~HipContext() { ...@@ -366,28 +369,29 @@ HipContext::~HipContext() {
delete nonbonded; delete nonbonded;
for (auto module : loadedModules) for (auto module : loadedModules)
hipModuleUnload(module); hipModuleUnload(module);
popAsCurrent();
contextIsValid = false; contextIsValid = false;
} }
void HipContext::initialize() { void HipContext::initialize() {
hipSetDevice(device); ContextSelector selector(*this);
string errorMessage = "Error initializing Context"; string errorMessage = "Error initializing Context";
int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()); int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) { if (useDoublePrecision) {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer"); energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum"); energySum.initialize<double>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags())); CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
} }
else if (useMixedPrecision) { else if (useMixedPrecision) {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer"); energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum"); energySum.initialize<double>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags())); CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
} }
else { else {
energyBuffer.initialize<float>(*this, numEnergyBuffers, "energyBuffer"); energyBuffer.initialize<float>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<float>(*this, 1, "energySum"); energySum.initialize<float>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), getHostMallocFlags())); CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), getHostMallocFlags()));
} }
...@@ -423,6 +427,29 @@ void HipContext::setAsCurrent() { ...@@ -423,6 +427,29 @@ void HipContext::setAsCurrent() {
hipSetDevice(device); hipSetDevice(device);
} }
void HipContext::pushAsCurrent() {
if (contextIsValid) {
// Emulate cuCtxPushCurrent's behavior
hipDevice_t outerScopeDevice;
hipGetDevice(&outerScopeDevice);
outerScopeDevices.push(outerScopeDevice);
if (device != outerScopeDevice) {
hipSetDevice(device);
}
}
}
void HipContext::popAsCurrent() {
if (contextIsValid) {
// Emulate cuCtxPopCurrent's behavior
hipDevice_t outerScopeDevice = outerScopeDevices.top();
outerScopeDevices.pop();
if (outerScopeDevice != device) {
hipSetDevice(outerScopeDevice);
}
}
}
string HipContext::getTempFileName() const { string HipContext::getTempFileName() const {
stringstream tempFileName; stringstream tempFileName;
tempFileName << tempDir; tempFileName << tempDir;
...@@ -784,12 +811,18 @@ double HipContext::reduceEnergy() { ...@@ -784,12 +811,18 @@ double HipContext::reduceEnergy() {
int bufferSize = energyBuffer.getSize(); int bufferSize = energyBuffer.getSize();
int workGroupSize = getMaxThreadBlockSize(); int workGroupSize = getMaxThreadBlockSize();
void* args[] = {&energyBuffer.getDevicePointer(), &energySum.getDevicePointer(), &bufferSize, &workGroupSize}; void* args[] = {&energyBuffer.getDevicePointer(), &energySum.getDevicePointer(), &bufferSize, &workGroupSize};
executeKernel(reduceEnergyKernel, args, workGroupSize, workGroupSize, workGroupSize*energyBuffer.getElementSize()); executeKernel(reduceEnergyKernel, args, workGroupSize*energySum.getSize(), workGroupSize, workGroupSize*energyBuffer.getElementSize());
energySum.download(pinnedBuffer); energySum.download(pinnedBuffer);
if (getUseDoublePrecision() || getUseMixedPrecision()) double result = 0;
return *((double*) pinnedBuffer); if (getUseDoublePrecision() || getUseMixedPrecision()) {
else for (int i = 0; i < energySum.getSize(); i++)
return *((float*) pinnedBuffer); result += ((double*) pinnedBuffer)[i];
}
else {
for (int i = 0; i < energySum.getSize(); i++)
result += ((float*) pinnedBuffer)[i];
}
return result;
} }
void HipContext::setCharges(const vector<double>& charges) { void HipContext::setCharges(const vector<double>& charges) {
...@@ -850,6 +883,13 @@ vector<int> HipContext::getDevicePrecedence() { ...@@ -850,6 +883,13 @@ vector<int> HipContext::getDevicePrecedence() {
return precedence; return precedence;
} }
unsigned int HipContext::getEventFlags() {
unsigned int flags = hipEventDisableTiming;
if (useBlockingSync)
flags += hipEventBlockingSync;
return flags;
}
unsigned int HipContext::getHostMallocFlags() { unsigned int HipContext::getHostMallocFlags() {
#ifdef WIN32 #ifdef WIN32
return hipHostMallocDefault; return hipHostMallocDefault;
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
using namespace OpenMM; using namespace OpenMM;
HipEvent::HipEvent(HipContext& context) : context(context), eventCreated(false) { HipEvent::HipEvent(HipContext& context) : context(context), eventCreated(false) {
hipError_t result = hipEventCreateWithFlags(&event, hipEventDisableTiming); hipError_t result = hipEventCreateWithFlags(&event, context.getEventFlags());
if (result != hipSuccess) if (result != hipSuccess)
throw OpenMMException("Error creating HIP event:"+HipContext::getErrorString(result)); throw OpenMMException("Error creating HIP event:"+HipContext::getErrorString(result));
eventCreated = true; eventCreated = true;
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "HipIntegrationUtilities.h" #include "HipIntegrationUtilities.h"
#include "HipContext.h" #include "HipContext.h"
#include "openmm/common/ContextSelector.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -41,13 +42,13 @@ using namespace std; ...@@ -41,13 +42,13 @@ using namespace std;
HipIntegrationUtilities::HipIntegrationUtilities(HipContext& context, const System& system) : IntegrationUtilities(context, system), HipIntegrationUtilities::HipIntegrationUtilities(HipContext& context, const System& system) : IntegrationUtilities(context, system),
ccmaConvergedMemory(NULL) { ccmaConvergedMemory(NULL) {
CHECK_RESULT2(hipEventCreateWithFlags(&ccmaEvent, hipEventDisableTiming), "Error creating event for CCMA"); CHECK_RESULT2(hipEventCreateWithFlags(&ccmaEvent, context.getEventFlags()), "Error creating event for CCMA");
CHECK_RESULT2(hipHostMalloc((void**) &ccmaConvergedMemory, sizeof(int), context.getHostMallocFlags()), "Error allocating pinned memory"); CHECK_RESULT2(hipHostMalloc((void**) &ccmaConvergedMemory, sizeof(int), context.getHostMallocFlags()), "Error allocating pinned memory");
CHECK_RESULT2(hipHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory"); CHECK_RESULT2(hipHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory");
} }
HipIntegrationUtilities::~HipIntegrationUtilities() { HipIntegrationUtilities::~HipIntegrationUtilities() {
context.setAsCurrent(); ContextSelector selector(context);
if (ccmaConvergedMemory != NULL) { if (ccmaConvergedMemory != NULL) {
hipHostFree(ccmaConvergedMemory); hipHostFree(ccmaConvergedMemory);
hipEventDestroy(ccmaEvent); hipEventDestroy(ccmaEvent);
...@@ -67,6 +68,7 @@ HipArray& HipIntegrationUtilities::getStepSize() { ...@@ -67,6 +68,7 @@ HipArray& HipIntegrationUtilities::getStepSize() {
} }
void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, double tol) { void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, double tol) {
ContextSelector selector(context);
ComputeKernel settleKernel, shakeKernel, ccmaForceKernel; ComputeKernel settleKernel, shakeKernel, ccmaForceKernel;
if (constrainVelocities) { if (constrainVelocities) {
settleKernel = settleVelKernel; settleKernel = settleVelKernel;
...@@ -132,6 +134,7 @@ void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, dou ...@@ -132,6 +134,7 @@ void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, dou
} }
void HipIntegrationUtilities::distributeForcesFromVirtualSites() { void HipIntegrationUtilities::distributeForcesFromVirtualSites() {
ContextSelector selector(context);
if (numVsites > 0) { if (numVsites > 0) {
vsiteForceKernel->setArg(2, context.getLongForceBuffer()); vsiteForceKernel->setArg(2, context.getLongForceBuffer());
vsiteForceKernel->execute(numVsites); vsiteForceKernel->execute(numVsites);
......
This diff is collapsed.
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2018 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -439,6 +439,10 @@ void HipNonbondedUtilities::computeInteractions(int forceGroups, bool includeFor ...@@ -439,6 +439,10 @@ void HipNonbondedUtilities::computeInteractions(int forceGroups, bool includeFor
bool HipNonbondedUtilities::updateNeighborListSize() { bool HipNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff) if (!useCutoff)
return false; return false;
if (context.getStepsSinceReorder() == 0)
tilesAfterReorder = pinnedCountBuffer[0];
else if (context.getStepsSinceReorder() > 25 && pinnedCountBuffer[0] > 1.1*tilesAfterReorder)
context.forceReorder();
if (pinnedCountBuffer[0] <= maxTiles && pinnedCountBuffer[1] <= maxSinglePairs) if (pinnedCountBuffer[0] <= maxTiles && pinnedCountBuffer[1] <= maxSinglePairs)
return false; return false;
...@@ -446,12 +450,13 @@ bool HipNonbondedUtilities::updateNeighborListSize() { ...@@ -446,12 +450,13 @@ bool HipNonbondedUtilities::updateNeighborListSize() {
// this from happening in the future. // this from happening in the future.
if (pinnedCountBuffer[0] > maxTiles) { if (pinnedCountBuffer[0] > maxTiles) {
maxTiles = (int) (1.2*pinnedCountBuffer[0]); maxTiles = (unsigned int) (1.2*pinnedCountBuffer[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2; unsigned int numBlocks = context.getNumAtomBlocks();
int totalTiles = numBlocks*(numBlocks+1)/2;
if (maxTiles > totalTiles) if (maxTiles > totalTiles)
maxTiles = totalTiles; maxTiles = totalTiles;
interactingTiles.resize(maxTiles); interactingTiles.resize(maxTiles);
interactingAtoms.resize(HipContext::TileSize*maxTiles); interactingAtoms.resize(HipContext::TileSize*(size_t) maxTiles);
if (forceArgs.size() > 0) if (forceArgs.size() > 0)
forceArgs[7] = &interactingTiles.getDevicePointer(); forceArgs[7] = &interactingTiles.getDevicePointer();
findInteractingBlocksArgs[6] = &interactingTiles.getDevicePointer(); findInteractingBlocksArgs[6] = &interactingTiles.getDevicePointer();
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2019 Stanford University and the Authors. * * Portions copyright (c) 2011-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "HipParallelKernels.h" #include "HipParallelKernels.h"
#include "HipKernelSources.h" #include "HipKernelSources.h"
#include "openmm/common/ContextSelector.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -70,7 +71,7 @@ public: ...@@ -70,7 +71,7 @@ public:
void execute() { void execute() {
// Copy coordinates over to this device and execute the kernel. // Copy coordinates over to this device and execute the kernel.
cu.setAsCurrent(); ContextSelector selector(cu);
if (cu.getContextIndex() > 0) { if (cu.getContextIndex() > 0) {
hipStreamWaitEvent(cu.getCurrentStream(), event, 0); hipStreamWaitEvent(cu.getCurrentStream(), event, 0);
if (!cu.getPlatformData().peerAccessSupported) if (!cu.getPlatformData().peerAccessSupported)
...@@ -94,13 +95,16 @@ private: ...@@ -94,13 +95,16 @@ private:
class HipParallelCalcForcesAndEnergyKernel::FinishComputationTask : public HipContext::WorkTask { class HipParallelCalcForcesAndEnergyKernel::FinishComputationTask : public HipContext::WorkTask {
public: public:
FinishComputationTask(ContextImpl& context, HipContext& cu, HipCalcForcesAndEnergyKernel& kernel, FinishComputationTask(ContextImpl& context, HipContext& cu, HipCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, HipArray& contextForces, bool& valid, int2& interactionCount) : bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, HipArray& contextForces,
bool& valid, int2& interactionCount, hipStream_t stream, hipEvent_t event, hipEvent_t localEvent) :
context(context), cu(cu), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy), context(context), cu(cu), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy),
completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces), valid(valid), interactionCount(interactionCount) { completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces), valid(valid), interactionCount(interactionCount),
stream(stream), event(event), localEvent(localEvent) {
} }
void execute() { void execute() {
// Execute the kernel, then download forces. // Execute the kernel, then download forces.
ContextSelector selector(cu);
energy += kernel.finishComputation(context, includeForce, includeEnergy, groups, valid); energy += kernel.finishComputation(context, includeForce, includeEnergy, groups, valid);
if (cu.getComputeForceCount() < 200) { if (cu.getComputeForceCount() < 200) {
// Record timing information for load balancing. Since this takes time, only do it at the start of the simulation. // Record timing information for load balancing. Since this takes time, only do it at the start of the simulation.
...@@ -110,13 +114,16 @@ public: ...@@ -110,13 +114,16 @@ public:
} }
if (includeForce) { if (includeForce) {
if (cu.getContextIndex() > 0) { if (cu.getContextIndex() > 0) {
hipEventRecord(localEvent, cu.getCurrentStream());
hipStreamWaitEvent(stream, localEvent, 0);
int numAtoms = cu.getPaddedNumAtoms(); int numAtoms = cu.getPaddedNumAtoms();
if (cu.getPlatformData().peerAccessSupported) { if (cu.getPlatformData().peerAccessSupported) {
int numBytes = numAtoms*3*sizeof(long long); int numBytes = numAtoms*3*sizeof(long long);
int offset = (cu.getContextIndex()-1)*numBytes; int offset = (cu.getContextIndex()-1)*numBytes;
HipContext& context0 = *cu.getPlatformData().contexts[0]; HipContext& context0 = *cu.getPlatformData().contexts[0];
CHECK_RESULT(hipMemcpy(static_cast<char*>(contextForces.getDevicePointer())+offset, CHECK_RESULT(hipMemcpyAsync(static_cast<char*>(contextForces.getDevicePointer())+offset,
cu.getForce().getDevicePointer(), numBytes, hipMemcpyDeviceToDevice), "Error copying forces"); cu.getForce().getDevicePointer(), numBytes, hipMemcpyDeviceToDevice, stream), "Error copying forces");
hipEventRecord(event, stream);
} }
else else
cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]); cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]);
...@@ -140,6 +147,9 @@ private: ...@@ -140,6 +147,9 @@ private:
HipArray& contextForces; HipArray& contextForces;
bool& valid; bool& valid;
int2& interactionCount; int2& interactionCount;
hipStream_t stream;
hipEvent_t event;
hipEvent_t localEvent;
}; };
HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, HipPlatform::PlatformData& data) : HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, HipPlatform::PlatformData& data) :
...@@ -150,20 +160,25 @@ HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(strin ...@@ -150,20 +160,25 @@ HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(strin
} }
HipParallelCalcForcesAndEnergyKernel::~HipParallelCalcForcesAndEnergyKernel() { HipParallelCalcForcesAndEnergyKernel::~HipParallelCalcForcesAndEnergyKernel() {
data.contexts[0]->setAsCurrent(); ContextSelector selector(*data.contexts[0]);
if (pinnedPositionBuffer != NULL) if (pinnedPositionBuffer != NULL)
hipHostFree(pinnedPositionBuffer); hipHostFree(pinnedPositionBuffer);
if (pinnedForceBuffer != NULL) if (pinnedForceBuffer != NULL)
hipHostFree(pinnedForceBuffer); hipHostFree(pinnedForceBuffer);
hipEventDestroy(event); hipEventDestroy(event);
hipStreamDestroy(peerCopyStream); for (int i = 0; i < peerCopyEvent.size(); i++)
hipEventDestroy(peerCopyEvent[i]);
for (int i = 0; i < peerCopyEventLocal.size(); i++)
hipEventDestroy(peerCopyEventLocal[i]);
for (int i = 0; i < peerCopyStream.size(); i++)
hipStreamDestroy(peerCopyStream[i]);
if (interactionCounts != NULL) if (interactionCounts != NULL)
hipHostFree(interactionCounts); hipHostFree(interactionCounts);
} }
void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) { void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
HipContext& cu = *data.contexts[0]; HipContext& cu = *data.contexts[0];
cu.setAsCurrent(); ContextSelector selector(cu);
hipModule_t module = cu.createModule(HipKernelSources::parallel); hipModule_t module = cu.createModule(HipKernelSources::parallel);
sumKernel = cu.getKernel(module, "sumForces"); sumKernel = cu.getKernel(module, "sumForces");
int numContexts = data.contexts.size(); int numContexts = data.contexts.size();
...@@ -171,14 +186,25 @@ void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -171,14 +186,25 @@ void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
getKernel(i).initialize(system); getKernel(i).initialize(system);
for (int i = 0; i < numContexts; i++) for (int i = 0; i < numContexts; i++)
contextNonbondedFractions[i] = 1/(double) numContexts; contextNonbondedFractions[i] = 1/(double) numContexts;
CHECK_RESULT(hipEventCreateWithFlags(&event, 0), "Error creating event"); CHECK_RESULT(hipEventCreateWithFlags(&event, cu.getEventFlags()), "Error creating event");
CHECK_RESULT(hipStreamCreateWithFlags(&peerCopyStream, hipStreamNonBlocking), "Error creating stream"); peerCopyEvent.resize(numContexts);
peerCopyEventLocal.resize(numContexts);
peerCopyStream.resize(numContexts);
for (int i = 0; i < numContexts; i++) {
CHECK_RESULT(hipEventCreateWithFlags(&peerCopyEvent[i], cu.getEventFlags()), "Error creating event");
CHECK_RESULT(hipStreamCreateWithFlags(&peerCopyStream[i], hipStreamNonBlocking), "Error creating stream");
}
for (int i = 0; i < numContexts; i++) {
HipContext& cuLocal = *data.contexts[i];
ContextSelector selectorLocal(cuLocal);
CHECK_RESULT(hipEventCreateWithFlags(&peerCopyEventLocal[i], cu.getEventFlags()), "Error creating event");
}
CHECK_RESULT(hipHostMalloc((void**) &interactionCounts, numContexts*sizeof(int2), 0), "Error creating interaction counts buffer"); CHECK_RESULT(hipHostMalloc((void**) &interactionCounts, numContexts*sizeof(int2), 0), "Error creating interaction counts buffer");
} }
void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) { void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
HipContext& cu = *data.contexts[0]; HipContext& cu = *data.contexts[0];
cu.setAsCurrent(); ContextSelector selector(cu);
if (!contextForces.isInitialized()) { if (!contextForces.isInitialized()) {
contextForces.initialize<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces"); contextForces.initialize<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces");
CHECK_RESULT(hipHostMalloc((void**) &pinnedForceBuffer, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms()*sizeof(long long), hipHostMallocPortable), "Error allocating pinned memory"); CHECK_RESULT(hipHostMalloc((void**) &pinnedForceBuffer, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms()*sizeof(long long), hipHostMallocPortable), "Error allocating pinned memory");
...@@ -194,36 +220,44 @@ void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context ...@@ -194,36 +220,44 @@ void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context
else { else {
int numBytes = cu.getPosq().getSize()*cu.getPosq().getElementSize(); int numBytes = cu.getPosq().getSize()*cu.getPosq().getElementSize();
hipEventRecord(event, cu.getCurrentStream()); hipEventRecord(event, cu.getCurrentStream());
hipStreamWaitEvent(peerCopyStream, event, 0); for (int i = 1; i < (int) data.contexts.size(); i++) {
for (int i = 1; i < (int) data.contexts.size(); i++) hipStreamWaitEvent(peerCopyStream[i], event, 0);
CHECK_RESULT(hipMemcpyAsync( CHECK_RESULT(hipMemcpyAsync(
data.contexts[i]->getPosq().getDevicePointer(), data.contexts[i]->getPosq().getDevicePointer(),
cu.getPosq().getDevicePointer(), numBytes, cu.getPosq().getDevicePointer(), numBytes,
hipMemcpyDeviceToDevice, peerCopyStream), "Error copying positions"); hipMemcpyDeviceToDevice, peerCopyStream[i]), "Error copying positions");
hipEventRecord(event, peerCopyStream); hipEventRecord(peerCopyEvent[i], peerCopyStream[i]);
}
} }
for (int i = 0; i < (int) data.contexts.size(); i++) { for (int i = 0; i < (int) data.contexts.size(); i++) {
data.contextEnergy[i] = 0.0; data.contextEnergy[i] = 0.0;
HipContext& cu = *data.contexts[i]; HipContext& cu = *data.contexts[i];
ComputeContext::WorkThread& thread = cu.getWorkThread(); ComputeContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, event, interactionCounts[i])); hipEvent_t waitEvent = (cu.getPlatformData().peerAccessSupported ? peerCopyEvent[i] : event);
thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, waitEvent, interactionCounts[i]));
} }
data.syncContexts();
} }
double HipParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) { double HipParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
for (int i = 0; i < (int) data.contexts.size(); i++) { for (int i = 0; i < (int) data.contexts.size(); i++) {
HipContext& cu = *data.contexts[i]; HipContext& cu = *data.contexts[i];
ComputeContext::WorkThread& thread = cu.getWorkThread(); ComputeContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, contextForces, valid, interactionCounts[i])); thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i],
pinnedForceBuffer, contextForces, valid, interactionCounts[i], peerCopyStream[i], peerCopyEvent[i], peerCopyEventLocal[i]));
} }
data.syncContexts(); data.syncContexts();
HipContext& cu = *data.contexts[0];
ContextSelector selector(cu);
if (cu.getPlatformData().peerAccessSupported)
for (int i = 1; i < data.contexts.size(); i++)
hipStreamWaitEvent(cu.getCurrentStream(), peerCopyEvent[i], 0);
double energy = 0.0; double energy = 0.0;
for (int i = 0; i < (int) data.contextEnergy.size(); i++) for (int i = 0; i < (int) data.contextEnergy.size(); i++)
energy += data.contextEnergy[i]; energy += data.contextEnergy[i];
if (includeForce && valid) { if (includeForce && valid) {
// Sum the forces from all devices. // Sum the forces from all devices.
HipContext& cu = *data.contexts[0];
if (!cu.getPlatformData().peerAccessSupported) if (!cu.getPlatformData().peerAccessSupported)
contextForces.upload(pinnedForceBuffer, false); contextForces.upload(pinnedForceBuffer, false);
int bufferSize = 3*cu.getPaddedNumAtoms(); int bufferSize = 3*cu.getPaddedNumAtoms();
......
...@@ -80,7 +80,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re ...@@ -80,7 +80,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
extern __shared__ mixed tempBuffer[]; extern __shared__ mixed tempBuffer[];
const unsigned int thread = threadIdx.x; const unsigned int thread = threadIdx.x;
mixed sum = 0; mixed sum = 0;
for (unsigned int index = thread; index < bufferSize; index += blockDim.x) for (unsigned int index = blockDim.x*blockIdx.x+threadIdx.x; index < bufferSize; index += blockDim.x*gridDim.x)
sum += energyBuffer[index]; sum += energyBuffer[index];
tempBuffer[thread] = sum; tempBuffer[thread] = sum;
for (int i = 1; i < workGroupSize; i *= 2) { for (int i = 1; i < workGroupSize; i *= 2) {
...@@ -89,7 +89,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re ...@@ -89,7 +89,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
tempBuffer[thread] += tempBuffer[thread+i]; tempBuffer[thread] += tempBuffer[thread+i];
} }
if (thread == 0) if (thread == 0)
*result = tempBuffer[0]; result[blockIdx.x] = tempBuffer[0];
} }
/** /**
......
...@@ -41,4 +41,6 @@ OpenMM::HipPlatform platform; ...@@ -41,4 +41,6 @@ OpenMM::HipPlatform platform;
void initializeTests(int argc, char* argv[]) { void initializeTests(int argc, char* argv[]) {
if (argc > 1) if (argc > 1)
platform.setPropertyDefaultValue("Precision", std::string(argv[1])); platform.setPropertyDefaultValue("Precision", std::string(argv[1]));
if (argc > 2)
platform.setPropertyDefaultValue("DeviceIndex", std::string(argv[2]));
} }
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. * * Portions copyright (c) 2011-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -38,12 +38,16 @@ ...@@ -38,12 +38,16 @@
#include "HipArray.h" #include "HipArray.h"
#include "HipContext.h" #include "HipContext.h"
#include "HipFFT3D.h" #include "HipFFT3D.h"
#include "fftpack.h"
#include "sfmt/SFMT.h" #include "sfmt/SFMT.h"
#include "openmm/System.h" #include "openmm/System.h"
#include <complex>
#include <iostream> #include <iostream>
#include <cmath> #include <cmath>
#include <set> #include <set>
#ifdef _MSC_VER
#define POCKETFFT_NO_VECTORS
#endif
#include "pocketfft_hdronly.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -67,19 +71,19 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e ...@@ -67,19 +71,19 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt); init_gen_rand(0, sfmt);
vector<Real2> original(xsize*ysize*zsize); vector<Real2> original(xsize*ysize*zsize);
vector<t_complex> reference(original.size()); vector<complex<double>> reference(original.size());
for (int i = 0; i < (int) original.size(); i++) { for (int i = 0; i < (int) original.size(); i++) {
Real2 value; Real2 value;
value.x = (float) genrand_real2(sfmt); value.x = (float) genrand_real2(sfmt);
value.y = (float) genrand_real2(sfmt); value.y = (float) genrand_real2(sfmt);
original[i] = value; original[i] = value;
reference[i] = t_complex(value.x, value.y); reference[i] = complex<double>(value.x, value.y);
} }
for (int i = 0; i < (int) reference.size(); i++) { for (int i = 0; i < (int) reference.size(); i++) {
if (realToComplex) if (realToComplex)
reference[i] = t_complex(i%2 == 0 ? original[i/2].x : original[i/2].y, 0); reference[i] = complex<double>(i%2 == 0 ? original[i/2].x : original[i/2].y, 0);
else else
reference[i] = t_complex(original[i].x, original[i].y); reference[i] = complex<double>(original[i].x, original[i].y);
} }
HipArray grid1(context, original.size(), sizeof(Real2), "grid1"); HipArray grid1(context, original.size(), sizeof(Real2), "grid1");
HipArray grid2(context, original.size(), sizeof(Real2), "grid2"); HipArray grid2(context, original.size(), sizeof(Real2), "grid2");
...@@ -91,19 +95,21 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e ...@@ -91,19 +95,21 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e
fft.execFFT(true); fft.execFFT(true);
vector<Real2> result; vector<Real2> result;
grid2.download(result); grid2.download(result);
fftpack_t plan; vector<size_t> shape = {(size_t) xsize, (size_t) ysize, (size_t) zsize};
fftpack_init_3d(&plan, xsize, ysize, zsize); vector<size_t> axes = {0, 1, 2};
fftpack_exec_3d(plan, FFTPACK_FORWARD, &reference[0], &reference[0]); vector<ptrdiff_t> stride = {(ptrdiff_t) (ysize*zsize*sizeof(complex<double>)),
(ptrdiff_t) (zsize*sizeof(complex<double>)),
(ptrdiff_t) sizeof(complex<double>)};
pocketfft::c2c(shape, stride, stride, axes, true, reference.data(), reference.data(), 1.0);
int outputZSize = (realToComplex ? zsize/2+1 : zsize); int outputZSize = (realToComplex ? zsize/2+1 : zsize);
for (int x = 0; x < xsize; x++) for (int x = 0; x < xsize; x++)
for (int y = 0; y < ysize; y++) for (int y = 0; y < ysize; y++)
for (int z = 0; z < outputZSize; z++) { for (int z = 0; z < outputZSize; z++) {
int index1 = x*ysize*zsize + y*zsize + z; int index1 = x*ysize*zsize + y*zsize + z;
int index2 = x*ysize*outputZSize + y*outputZSize + z; int index2 = x*ysize*outputZSize + y*outputZSize + z;
ASSERT_EQUAL_TOL(reference[index1].re, result[index2].x, 1e-3 * eps); ASSERT_EQUAL_TOL(reference[index1].real(), result[index2].x, 1e-3 * eps);
ASSERT_EQUAL_TOL(reference[index1].im, result[index2].y, 1e-3 * eps); ASSERT_EQUAL_TOL(reference[index1].imag(), result[index2].y, 1e-3 * eps);
} }
fftpack_destroy(plan);
// Perform a backward transform and see if we get the original values. // Perform a backward transform and see if we get the original values.
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. * * Portions copyright (c) 2008-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
...@@ -50,13 +50,19 @@ void testParallelComputation(NonbondedForce::NonbondedMethod method) { ...@@ -50,13 +50,19 @@ void testParallelComputation(NonbondedForce::NonbondedMethod method) {
vector<Vec3> positions(numParticles); vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
positions[i] = Vec3(5*genrand_real2(sfmt), 5*genrand_real2(sfmt), 5*genrand_real2(sfmt)); positions[i] = Vec3(5*genrand_real2(sfmt), 5*genrand_real2(sfmt), 5*genrand_real2(sfmt));
force->addGlobalParameter("scale", 0.5);
for (int i = 0; i < numParticles; ++i) for (int i = 0; i < numParticles; ++i)
for (int j = 0; j < i; ++j) { for (int j = 0; j < i; ++j) {
Vec3 delta = positions[i]-positions[j]; Vec3 delta = positions[i]-positions[j];
if (delta.dot(delta) < 0.1) if (delta.dot(delta) < 0.1) {
force->addException(i, j, 0, 1, 0); force->addException(i, j, 0, 1, 0);
}
else if (delta.dot(delta) < 0.2) {
int index = force->addException(i, j, 0.5, 1, 1.0);
force->addExceptionParameterOffset("scale", index, 0.5, 0.4, 0.3);
}
} }
// Create two contexts, one with a single device and one with two devices. // Create two contexts, one with a single device and one with two devices.
VerletIntegrator integrator1(0.01); VerletIntegrator integrator1(0.01);
...@@ -179,6 +185,7 @@ void runPlatformTests() { ...@@ -179,6 +185,7 @@ void runPlatformTests() {
testParallelComputation(NonbondedForce::NoCutoff); testParallelComputation(NonbondedForce::NoCutoff);
testParallelComputation(NonbondedForce::Ewald); testParallelComputation(NonbondedForce::Ewald);
testParallelComputation(NonbondedForce::PME); testParallelComputation(NonbondedForce::PME);
testParallelComputation(NonbondedForce::LJPME);
testReordering(); testReordering();
testDeterministicForces(); testDeterministicForces();
if (canRunHugeTest()) if (canRunHugeTest())
......
...@@ -60,6 +60,7 @@ void testGaussian() { ...@@ -60,6 +60,7 @@ void testGaussian() {
platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL); platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0]; HipContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.setAsCurrent();
context.getIntegrationUtilities().initRandomNumberGenerator(0); context.getIntegrationUtilities().initRandomNumberGenerator(0);
HipArray& random = context.getIntegrationUtilities().getRandom(); HipArray& random = context.getIntegrationUtilities().getRandom();
context.getIntegrationUtilities().prepareRandomNumbers(random.getSize()); context.getIntegrationUtilities().prepareRandomNumbers(random.getSize());
......
...@@ -70,6 +70,7 @@ void verifySorting(vector<float> array, bool uniform) { ...@@ -70,6 +70,7 @@ void verifySorting(vector<float> array, bool uniform) {
platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL); platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0]; HipContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.setAsCurrent();
HipArray data(context, array.size(), 4, "sortData"); HipArray data(context, array.size(), 4, "sortData");
data.upload(array); data.upload(array);
HipSort sort(context, new SortTrait(), array.size(), uniform); HipSort sort(context, new SortTrait(), array.size(), uniform);
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2020 Stanford University and the Authors. * * Portions copyright (c) 2008-2021 Stanford University and the Authors. *
* Portions copyright (c) 2021 Advanced Micro Devices, Inc. * * Portions copyright (c) 2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Mark Friedrichs * * Authors: Peter Eastman, Mark Friedrichs *
* Contributors: * * Contributors: *
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#define _USE_MATH_DEFINES // Needed to get M_PI #define _USE_MATH_DEFINES // Needed to get M_PI
#endif #endif
#include "AmoebaHipKernels.h" #include "AmoebaHipKernels.h"
#include "openmm/common/ContextSelector.h"
#include "openmm/internal/ContextImpl.h" #include "openmm/internal/ContextImpl.h"
#include "openmm/internal/AmoebaGeneralizedKirkwoodForceImpl.h" #include "openmm/internal/AmoebaGeneralizedKirkwoodForceImpl.h"
#include "openmm/internal/AmoebaMultipoleForceImpl.h" #include "openmm/internal/AmoebaMultipoleForceImpl.h"
...@@ -56,7 +57,7 @@ using namespace std; ...@@ -56,7 +57,7 @@ using namespace std;
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() { HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() {
cc.setAsCurrent(); ContextSelector selector(cc);
if (fft != NULL) if (fft != NULL)
delete fft; delete fft;
} }
...@@ -64,6 +65,7 @@ HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() { ...@@ -64,6 +65,7 @@ HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() {
void HipCalcAmoebaMultipoleForceKernel::initialize(const System& system, const AmoebaMultipoleForce& force) { void HipCalcAmoebaMultipoleForceKernel::initialize(const System& system, const AmoebaMultipoleForce& force) {
CommonCalcAmoebaMultipoleForceKernel::initialize(system, force); CommonCalcAmoebaMultipoleForceKernel::initialize(system, force);
if (usePME) { if (usePME) {
ContextSelector selector(cc);
HipArray& grid1 = cu.unwrap(pmeGrid1); HipArray& grid1 = cu.unwrap(pmeGrid1);
HipArray& grid2 = cu.unwrap(pmeGrid2); HipArray& grid2 = cu.unwrap(pmeGrid2);
fft = cu.createFFT(gridSizeX, gridSizeY, gridSizeZ, false, cu.getCurrentStream(), grid1, grid2); fft = cu.createFFT(gridSizeX, gridSizeY, gridSizeZ, false, cu.getCurrentStream(), grid1, grid2);
...@@ -79,7 +81,7 @@ void HipCalcAmoebaMultipoleForceKernel::computeFFT(bool forward) { ...@@ -79,7 +81,7 @@ void HipCalcAmoebaMultipoleForceKernel::computeFFT(bool forward) {
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() { HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() {
cc.setAsCurrent(); ContextSelector selector(cc);
if (sort != NULL) if (sort != NULL)
delete sort; delete sort;
if (fft != NULL) if (fft != NULL)
...@@ -91,6 +93,7 @@ HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() { ...@@ -91,6 +93,7 @@ HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() {
void HipCalcHippoNonbondedForceKernel::initialize(const System& system, const HippoNonbondedForce& force) { void HipCalcHippoNonbondedForceKernel::initialize(const System& system, const HippoNonbondedForce& force) {
CommonCalcHippoNonbondedForceKernel::initialize(system, force); CommonCalcHippoNonbondedForceKernel::initialize(system, force);
if (usePME) { if (usePME) {
ContextSelector selector(cc);
sort = new HipSort(cu, new SortTrait(), cc.getNumAtoms()); sort = new HipSort(cu, new SortTrait(), cc.getNumAtoms());
HipArray& grid1 = cu.unwrap(pmeGrid1); HipArray& grid1 = cu.unwrap(pmeGrid1);
HipArray& grid2 = cu.unwrap(pmeGrid2); HipArray& grid2 = cu.unwrap(pmeGrid2);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment