"vscode:/vscode.git/clone" did not exist on "b7fef496ae991819ddbbc304e37e50d28420adad"
Unverified Commit ecc2d258 authored by Anton Gorenko's avatar Anton Gorenko
Browse files

Port changes from the main repository

Use cuCtxPushCurrent() and cuCtxPopCurrent() for selecting CUDA context

    https://github.com/openmm/openmm/pull/3258

Fixed uninitialized memory access

    https://github.com/openmm/openmm/issues/3392
    https://github.com/openmm/openmm/pull/3399

Fixed potential invalid memory access

    See https://github.com/openmm/openmm/pull/3428

Improved temperature reporting for Drude particles

    https://github.com/openmm/openmm/pull/3486
    https://github.com/openmm/openmm/commit/a5e42f5

Fixed race condition with multiple GPUs

    https://github.com/openmm/openmm/commit/6fb1c8a41edff980862750bc086f6a204eb50941

Use blocking sync when creating events

    https://github.com/openmm/openmm/commit/fe21d5ee4f14673a4ea38b7244991772a64ceec2

Very minor optimizations

    https://github.com/openmm/openmm/commit/109f6b2535da4e0c0dd88007d6ca06b4add2ce81

Use PocketFFT

    https://github.com/openmm/openmm/commit/1dac981a63300a2a53a7925f570995914f7163ed

Improved logic for deciding when to reorder atoms

    https://github.com/openmm/openmm/commit/48664a1f1a4490a4dabc277757545ac070e7b898

Ensure valid atom order after loading a checkpoint

    https://github.com/openmm/openmm/commit/a056d5a3754e193105409afa12c9f0c9a2d972a2

Improve performance running on multiple GPUs

    https://github.com/openmm/openmm/commit/0c82c2647de98da5c6dab7bf7a7b8b19705aadc0

Fixed errors when running on multiple GPUs

    https://github.com/openmm/openmm/commit/ed9df876d43c037c08d4762721e73e5caae086d9

Optimized reducing energy

    https://github.com/openmm/openmm/commit/2975f44
parent f717ed89
...@@ -9,8 +9,8 @@ ...@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -56,7 +56,7 @@ public: ...@@ -56,7 +56,7 @@ public:
* @param name the name of the array * @param name the name of the array
*/ */
template <class T> template <class T>
static HipArray* create(HipContext& context, int size, const std::string& name) { static HipArray* create(HipContext& context, size_t size, const std::string& name) {
return new HipArray(context, size, sizeof(T), name); return new HipArray(context, size, sizeof(T), name);
} }
/** /**
...@@ -72,7 +72,7 @@ public: ...@@ -72,7 +72,7 @@ public:
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
HipArray(HipContext& context, int size, int elementSize, const std::string& name); HipArray(HipContext& context, size_t size, int elementSize, const std::string& name);
~HipArray(); ~HipArray();
/** /**
* Initialize this object. * Initialize this object.
...@@ -82,7 +82,7 @@ public: ...@@ -82,7 +82,7 @@ public:
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
void initialize(ComputeContext& context, int size, int elementSize, const std::string& name); void initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name);
/** /**
* Initialize this object. The template argument is the data type of each array element. * Initialize this object. The template argument is the data type of each array element.
* *
...@@ -91,13 +91,13 @@ public: ...@@ -91,13 +91,13 @@ public:
* @param name the name of the array * @param name the name of the array
*/ */
template <class T> template <class T>
void initialize(ComputeContext& context, int size, const std::string& name) { void initialize(ComputeContext& context, size_t size, const std::string& name) {
initialize(context, size, sizeof(T), name); initialize(context, size, sizeof(T), name);
} }
/** /**
* Recreate the internal storage to have a different size. * Recreate the internal storage to have a different size.
*/ */
void resize(int size); void resize(size_t size);
/** /**
* Get whether this array has been initialized. * Get whether this array has been initialized.
*/ */
...@@ -107,7 +107,7 @@ public: ...@@ -107,7 +107,7 @@ public:
/** /**
* Get the number of elements in the array. * Get the number of elements in the array.
*/ */
int getSize() const { size_t getSize() const {
return size; return size;
} }
/** /**
...@@ -183,7 +183,8 @@ public: ...@@ -183,7 +183,8 @@ public:
private: private:
HipContext* context; HipContext* context;
hipDeviceptr_t pointer; hipDeviceptr_t pointer;
int size, elementSize; size_t size;
int elementSize;
bool ownsMemory; bool ownsMemory;
std::string name; std::string name;
}; };
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <map> #include <map>
#include <stack>
#include <string> #include <string>
#include <utility> #include <utility>
#define __CL_ENABLE_EXCEPTIONS #define __CL_ENABLE_EXCEPTIONS
...@@ -99,10 +100,20 @@ public: ...@@ -99,10 +100,20 @@ public:
return contextIsValid; return contextIsValid;
} }
/** /**
* Set the hipCtx_t associated with this object to be the current context. If the context is not * Set the device associated with this object to be the current device. If the context is not
* valid, this returns without doing anything. * valid, this returns without doing anything.
*/ */
void setAsCurrent(); void setAsCurrent();
/**
* Push the device associated with this object to be the current device. If the context is not
* valid, this returns without doing anything.
*/
void pushAsCurrent();
/**
* Pop the device associated with this object off the stack of contexts. If the context is not
* valid, this returns without doing anything.
*/
void popAsCurrent();
/** /**
* Get the hipDevice_t associated with this object. * Get the hipDevice_t associated with this object.
*/ */
...@@ -582,6 +593,10 @@ public: ...@@ -582,6 +593,10 @@ public:
* expense of reduced simulation performance. * expense of reduced simulation performance.
*/ */
void flushQueue(); void flushQueue();
/**
* Get the flags that should be used when creating hipEvent_t objects.
*/
unsigned int getEventFlags();
/** /**
* Get the flags that should be used when allocating pinned host memory. * Get the flags that should be used when allocating pinned host memory.
*/ */
...@@ -610,6 +625,7 @@ private: ...@@ -610,6 +625,7 @@ private:
std::map<std::string, std::string> compilationDefines; std::map<std::string, std::string> compilationDefines;
std::vector<hipModule_t> loadedModules; std::vector<hipModule_t> loadedModules;
hipDevice_t device; hipDevice_t device;
std::stack<hipDevice_t> outerScopeDevices;
hipStream_t currentStream; hipStream_t currentStream;
hipFunction_t clearBufferKernel; hipFunction_t clearBufferKernel;
hipFunction_t clearTwoBuffersKernel; hipFunction_t clearTwoBuffersKernel;
......
...@@ -9,8 +9,8 @@ ...@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. * * Portions copyright (c) 2008-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -109,6 +109,18 @@ public: ...@@ -109,6 +109,18 @@ public:
* @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
*/ */
void setTime(ContextImpl& context, double time); void setTime(ContextImpl& context, double time);
/**
* Get the current step count
*
* @param context the context in which to execute this kernel
*/
long long getStepCount(const ContextImpl& context) const;
/**
* Set the current step count
*
* @param context the context in which to execute this kernel
*/
void setStepCount(const ContextImpl& context, long long count);
/** /**
* Get the positions of all particles. * Get the positions of all particles.
* *
...@@ -133,6 +145,15 @@ public: ...@@ -133,6 +145,15 @@ public:
* @param velocities a vector containg the particle velocities * @param velocities a vector containg the particle velocities
*/ */
void setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities); void setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities);
/**
* Compute velocities, shifted in time to account for a leapfrog integrator. The shift
* is based on the most recently computed forces.
*
* @param context the context in which to execute this kernel
* @param timeShift the amount by which to shift the velocities in time
* @param velocities the shifted velocities are returned in this
*/
void computeShiftedVelocities(ContextImpl& context, double timeShift, std::vector<Vec3>& velocities);
/** /**
* Get the current forces on all particles. * Get the current forces on all particles.
* *
......
...@@ -9,8 +9,8 @@ ...@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -339,7 +339,7 @@ private: ...@@ -339,7 +339,7 @@ private:
HipArray rebuildNeighborList; HipArray rebuildNeighborList;
HipSort* blockSorter; HipSort* blockSorter;
hipEvent_t downloadCountEvent; hipEvent_t downloadCountEvent;
int* pinnedCountBuffer; unsigned int* pinnedCountBuffer;
std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs; std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs;
std::vector<std::vector<int> > atomExclusions; std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters; std::vector<ParameterInfo> parameters;
...@@ -349,8 +349,9 @@ private: ...@@ -349,8 +349,9 @@ private:
std::map<int, std::string> groupKernelSource; std::map<int, std::string> groupKernelSource;
double lastCutoff; double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList; bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
int startTileIndex, startBlockIndex, numBlocks, maxTiles, maxSinglePairs, numTilesInBatch, maxExclusions; int startTileIndex, startBlockIndex, numBlocks, numTilesInBatch, maxExclusions;
int numForceThreadBlocks, forceThreadBlockSize, findInteractingBlocksThreadBlockSize, numAtoms, groupFlags; int numForceThreadBlocks, forceThreadBlockSize, findInteractingBlocksThreadBlockSize, numAtoms, groupFlags;
unsigned int maxTiles, maxSinglePairs, tilesAfterReorder;
long long numTiles; long long numTiles;
std::string kernelSource; std::string kernelSource;
}; };
......
...@@ -91,7 +91,9 @@ private: ...@@ -91,7 +91,9 @@ private:
long long* pinnedForceBuffer; long long* pinnedForceBuffer;
hipFunction_t sumKernel; hipFunction_t sumKernel;
hipEvent_t event; hipEvent_t event;
hipStream_t peerCopyStream; std::vector<hipEvent_t> peerCopyEvent;
std::vector<hipEvent_t> peerCopyEventLocal;
std::vector<hipStream_t> peerCopyStream;
}; };
/** /**
......
...@@ -126,8 +126,8 @@ public: ...@@ -126,8 +126,8 @@ public:
std::vector<HipContext*> contexts; std::vector<HipContext*> contexts;
std::vector<double> contextEnergy; std::vector<double> contextEnergy;
bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream, deterministicForces; bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream, deterministicForces;
int cmMotionFrequency; int cmMotionFrequency, computeForceCount;
int stepCount, computeForceCount; long long stepCount;
double time; double time;
std::map<std::string, std::string> propertyValues; std::map<std::string, std::string> propertyValues;
ThreadPool threads; ThreadPool threads;
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2012-2019 Stanford University and the Authors. * * Portions copyright (c) 2012-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "HipArray.h" #include "HipArray.h"
#include "HipContext.h" #include "HipContext.h"
#include "openmm/common/ContextSelector.h"
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
...@@ -36,13 +37,13 @@ using namespace OpenMM; ...@@ -36,13 +37,13 @@ using namespace OpenMM;
HipArray::HipArray() : pointer(0), ownsMemory(false) { HipArray::HipArray() : pointer(0), ownsMemory(false) {
} }
HipArray::HipArray(HipContext& context, int size, int elementSize, const std::string& name) : pointer(0) { HipArray::HipArray(HipContext& context, size_t size, int elementSize, const std::string& name) : pointer(0) {
initialize(context, size, elementSize, name); initialize(context, size, elementSize, name);
} }
HipArray::~HipArray() { HipArray::~HipArray() {
if (pointer != 0 && ownsMemory && context->getContextIsValid()) { if (pointer != 0 && ownsMemory && context->getContextIsValid()) {
context->setAsCurrent(); ContextSelector selector(*context);
hipError_t result = hipFree(pointer); hipError_t result = hipFree(pointer);
if (result != hipSuccess) { if (result != hipSuccess) {
std::stringstream str; std::stringstream str;
...@@ -52,7 +53,7 @@ HipArray::~HipArray() { ...@@ -52,7 +53,7 @@ HipArray::~HipArray() {
} }
} }
void HipArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) { void HipArray::initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name) {
if (this->pointer != 0) if (this->pointer != 0)
throw OpenMMException("HipArray has already been initialized"); throw OpenMMException("HipArray has already been initialized");
this->context = &dynamic_cast<HipContext&>(context); this->context = &dynamic_cast<HipContext&>(context);
...@@ -60,6 +61,7 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co ...@@ -60,6 +61,7 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co
this->elementSize = elementSize; this->elementSize = elementSize;
this->name = name; this->name = name;
ownsMemory = true; ownsMemory = true;
ContextSelector selector(*this->context);
hipError_t result = hipMalloc(&pointer, size*elementSize); hipError_t result = hipMalloc(&pointer, size*elementSize);
if (result != hipSuccess) { if (result != hipSuccess) {
std::stringstream str; std::stringstream str;
...@@ -68,11 +70,12 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co ...@@ -68,11 +70,12 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co
} }
} }
void HipArray::resize(int size) { void HipArray::resize(size_t size) {
if (pointer == 0) if (pointer == 0)
throw OpenMMException("HipArray has not been initialized"); throw OpenMMException("HipArray has not been initialized");
if (!ownsMemory) if (!ownsMemory)
throw OpenMMException("Cannot resize an array that does not own its storage"); throw OpenMMException("Cannot resize an array that does not own its storage");
ContextSelector selector(*context);
hipError_t result = hipFree(pointer); hipError_t result = hipFree(pointer);
if (result != hipSuccess) { if (result != hipSuccess) {
std::stringstream str; std::stringstream str;
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2023 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2023 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "HipProgram.h" #include "HipProgram.h"
#include "HipFFT3D.h" #include "HipFFT3D.h"
#include "openmm/common/ComputeArray.h" #include "openmm/common/ComputeArray.h"
#include "openmm/common/ContextSelector.h"
#include "SHA1.h" #include "SHA1.h"
#include "openmm/Platform.h" #include "openmm/Platform.h"
#include "openmm/System.h" #include "openmm/System.h"
...@@ -184,13 +185,15 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy ...@@ -184,13 +185,15 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy
} }
contextIsValid = true; contextIsValid = true;
ContextSelector selector(*this);
if (contextIndex > 0) { if (contextIndex > 0) {
int canAccess; int canAccess;
CHECK_RESULT(hipDeviceCanAccessPeer(&canAccess, getDevice(), platformData.contexts[0]->getDevice())); CHECK_RESULT(hipDeviceCanAccessPeer(&canAccess, getDevice(), platformData.contexts[0]->getDevice()));
if (canAccess) { if (canAccess) {
platformData.contexts[0]->setAsCurrent(); {
CHECK_RESULT(hipDeviceEnablePeerAccess(getDevice(), 0)); ContextSelector selector2(*platformData.contexts[0]);
setAsCurrent(); CHECK_RESULT(hipDeviceEnablePeerAccess(getDevice(), 0));
}
CHECK_RESULT(hipDeviceEnablePeerAccess(platformData.contexts[0]->getDevice(), 0)); CHECK_RESULT(hipDeviceEnablePeerAccess(platformData.contexts[0]->getDevice(), 0));
} }
} }
...@@ -345,7 +348,7 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy ...@@ -345,7 +348,7 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy
} }
HipContext::~HipContext() { HipContext::~HipContext() {
setAsCurrent(); pushAsCurrent();
for (auto force : forces) for (auto force : forces)
delete force; delete force;
for (auto listener : reorderListeners) for (auto listener : reorderListeners)
...@@ -366,28 +369,29 @@ HipContext::~HipContext() { ...@@ -366,28 +369,29 @@ HipContext::~HipContext() {
delete nonbonded; delete nonbonded;
for (auto module : loadedModules) for (auto module : loadedModules)
hipModuleUnload(module); hipModuleUnload(module);
popAsCurrent();
contextIsValid = false; contextIsValid = false;
} }
void HipContext::initialize() { void HipContext::initialize() {
hipSetDevice(device); ContextSelector selector(*this);
string errorMessage = "Error initializing Context"; string errorMessage = "Error initializing Context";
int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()); int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) { if (useDoublePrecision) {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer"); energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum"); energySum.initialize<double>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags())); CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
} }
else if (useMixedPrecision) { else if (useMixedPrecision) {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer"); energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum"); energySum.initialize<double>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags())); CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
} }
else { else {
energyBuffer.initialize<float>(*this, numEnergyBuffers, "energyBuffer"); energyBuffer.initialize<float>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<float>(*this, 1, "energySum"); energySum.initialize<float>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), getHostMallocFlags())); CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), getHostMallocFlags()));
} }
...@@ -423,6 +427,29 @@ void HipContext::setAsCurrent() { ...@@ -423,6 +427,29 @@ void HipContext::setAsCurrent() {
hipSetDevice(device); hipSetDevice(device);
} }
void HipContext::pushAsCurrent() {
if (contextIsValid) {
// Emulate cuCtxPushCurrent's behavior
hipDevice_t outerScopeDevice;
hipGetDevice(&outerScopeDevice);
outerScopeDevices.push(outerScopeDevice);
if (device != outerScopeDevice) {
hipSetDevice(device);
}
}
}
void HipContext::popAsCurrent() {
if (contextIsValid) {
// Emulate cuCtxPopCurrent's behavior
hipDevice_t outerScopeDevice = outerScopeDevices.top();
outerScopeDevices.pop();
if (outerScopeDevice != device) {
hipSetDevice(outerScopeDevice);
}
}
}
string HipContext::getTempFileName() const { string HipContext::getTempFileName() const {
stringstream tempFileName; stringstream tempFileName;
tempFileName << tempDir; tempFileName << tempDir;
...@@ -784,12 +811,18 @@ double HipContext::reduceEnergy() { ...@@ -784,12 +811,18 @@ double HipContext::reduceEnergy() {
int bufferSize = energyBuffer.getSize(); int bufferSize = energyBuffer.getSize();
int workGroupSize = getMaxThreadBlockSize(); int workGroupSize = getMaxThreadBlockSize();
void* args[] = {&energyBuffer.getDevicePointer(), &energySum.getDevicePointer(), &bufferSize, &workGroupSize}; void* args[] = {&energyBuffer.getDevicePointer(), &energySum.getDevicePointer(), &bufferSize, &workGroupSize};
executeKernel(reduceEnergyKernel, args, workGroupSize, workGroupSize, workGroupSize*energyBuffer.getElementSize()); executeKernel(reduceEnergyKernel, args, workGroupSize*energySum.getSize(), workGroupSize, workGroupSize*energyBuffer.getElementSize());
energySum.download(pinnedBuffer); energySum.download(pinnedBuffer);
if (getUseDoublePrecision() || getUseMixedPrecision()) double result = 0;
return *((double*) pinnedBuffer); if (getUseDoublePrecision() || getUseMixedPrecision()) {
else for (int i = 0; i < energySum.getSize(); i++)
return *((float*) pinnedBuffer); result += ((double*) pinnedBuffer)[i];
}
else {
for (int i = 0; i < energySum.getSize(); i++)
result += ((float*) pinnedBuffer)[i];
}
return result;
} }
void HipContext::setCharges(const vector<double>& charges) { void HipContext::setCharges(const vector<double>& charges) {
...@@ -850,6 +883,13 @@ vector<int> HipContext::getDevicePrecedence() { ...@@ -850,6 +883,13 @@ vector<int> HipContext::getDevicePrecedence() {
return precedence; return precedence;
} }
unsigned int HipContext::getEventFlags() {
unsigned int flags = hipEventDisableTiming;
if (useBlockingSync)
flags += hipEventBlockingSync;
return flags;
}
unsigned int HipContext::getHostMallocFlags() { unsigned int HipContext::getHostMallocFlags() {
#ifdef WIN32 #ifdef WIN32
return hipHostMallocDefault; return hipHostMallocDefault;
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
using namespace OpenMM; using namespace OpenMM;
HipEvent::HipEvent(HipContext& context) : context(context), eventCreated(false) { HipEvent::HipEvent(HipContext& context) : context(context), eventCreated(false) {
hipError_t result = hipEventCreateWithFlags(&event, hipEventDisableTiming); hipError_t result = hipEventCreateWithFlags(&event, context.getEventFlags());
if (result != hipSuccess) if (result != hipSuccess)
throw OpenMMException("Error creating HIP event:"+HipContext::getErrorString(result)); throw OpenMMException("Error creating HIP event:"+HipContext::getErrorString(result));
eventCreated = true; eventCreated = true;
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "HipIntegrationUtilities.h" #include "HipIntegrationUtilities.h"
#include "HipContext.h" #include "HipContext.h"
#include "openmm/common/ContextSelector.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -41,13 +42,13 @@ using namespace std; ...@@ -41,13 +42,13 @@ using namespace std;
HipIntegrationUtilities::HipIntegrationUtilities(HipContext& context, const System& system) : IntegrationUtilities(context, system), HipIntegrationUtilities::HipIntegrationUtilities(HipContext& context, const System& system) : IntegrationUtilities(context, system),
ccmaConvergedMemory(NULL) { ccmaConvergedMemory(NULL) {
CHECK_RESULT2(hipEventCreateWithFlags(&ccmaEvent, hipEventDisableTiming), "Error creating event for CCMA"); CHECK_RESULT2(hipEventCreateWithFlags(&ccmaEvent, context.getEventFlags()), "Error creating event for CCMA");
CHECK_RESULT2(hipHostMalloc((void**) &ccmaConvergedMemory, sizeof(int), context.getHostMallocFlags()), "Error allocating pinned memory"); CHECK_RESULT2(hipHostMalloc((void**) &ccmaConvergedMemory, sizeof(int), context.getHostMallocFlags()), "Error allocating pinned memory");
CHECK_RESULT2(hipHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory"); CHECK_RESULT2(hipHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory");
} }
HipIntegrationUtilities::~HipIntegrationUtilities() { HipIntegrationUtilities::~HipIntegrationUtilities() {
context.setAsCurrent(); ContextSelector selector(context);
if (ccmaConvergedMemory != NULL) { if (ccmaConvergedMemory != NULL) {
hipHostFree(ccmaConvergedMemory); hipHostFree(ccmaConvergedMemory);
hipEventDestroy(ccmaEvent); hipEventDestroy(ccmaEvent);
...@@ -67,6 +68,7 @@ HipArray& HipIntegrationUtilities::getStepSize() { ...@@ -67,6 +68,7 @@ HipArray& HipIntegrationUtilities::getStepSize() {
} }
void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, double tol) { void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, double tol) {
ContextSelector selector(context);
ComputeKernel settleKernel, shakeKernel, ccmaForceKernel; ComputeKernel settleKernel, shakeKernel, ccmaForceKernel;
if (constrainVelocities) { if (constrainVelocities) {
settleKernel = settleVelKernel; settleKernel = settleVelKernel;
...@@ -132,6 +134,7 @@ void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, dou ...@@ -132,6 +134,7 @@ void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, dou
} }
void HipIntegrationUtilities::distributeForcesFromVirtualSites() { void HipIntegrationUtilities::distributeForcesFromVirtualSites() {
ContextSelector selector(context);
if (numVsites > 0) { if (numVsites > 0) {
vsiteForceKernel->setArg(2, context.getLongForceBuffer()); vsiteForceKernel->setArg(2, context.getLongForceBuffer());
vsiteForceKernel->execute(numVsites); vsiteForceKernel->execute(numVsites);
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. * * Portions copyright (c) 2008-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "openmm/Context.h" #include "openmm/Context.h"
#include "openmm/internal/ContextImpl.h" #include "openmm/internal/ContextImpl.h"
#include "openmm/internal/NonbondedForceImpl.h" #include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/common/ContextSelector.h"
#include "CommonKernelSources.h" #include "CommonKernelSources.h"
#include "HipBondedUtilities.h" #include "HipBondedUtilities.h"
#include "HipExpressionUtilities.h" #include "HipExpressionUtilities.h"
...@@ -59,7 +60,7 @@ void HipCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -59,7 +60,7 @@ void HipCalcForcesAndEnergyKernel::initialize(const System& system) {
void HipCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) { void HipCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
cu.setForcesValid(true); cu.setForcesValid(true);
cu.setAsCurrent(); ContextSelector selector(cu);
cu.clearAutoclearBuffers(); cu.clearAutoclearBuffers();
for (auto computation : cu.getPreComputations()) for (auto computation : cu.getPreComputations())
computation->computeForceAndEnergy(includeForces, includeEnergy, groups); computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
...@@ -72,7 +73,7 @@ void HipCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i ...@@ -72,7 +73,7 @@ void HipCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
} }
double HipCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) { double HipCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cu.setAsCurrent(); ContextSelector selector(cu);
cu.getBondedUtilities().computeInteractions(groups); cu.getBondedUtilities().computeInteractions(groups);
cu.getNonbondedUtilities().computeInteractions(groups, includeForces, includeEnergy); cu.getNonbondedUtilities().computeInteractions(groups, includeForces, includeEnergy);
double sum = 0.0; double sum = 0.0;
...@@ -99,8 +100,18 @@ void HipUpdateStateDataKernel::setTime(ContextImpl& context, double time) { ...@@ -99,8 +100,18 @@ void HipUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
ctx->setTime(time); ctx->setTime(time);
} }
long long HipUpdateStateDataKernel::getStepCount(const ContextImpl& context) const {
return cu.getStepCount();
}
void HipUpdateStateDataKernel::setStepCount(const ContextImpl& context, long long count) {
vector<HipContext*>& contexts = cu.getPlatformData().contexts;
for (auto ctx : contexts)
ctx->setStepCount(count);
}
void HipUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) { void HipUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
cu.setAsCurrent(); ContextSelector selector(cu);
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles); positions.resize(numParticles);
vector<float4> posCorrection; vector<float4> posCorrection;
...@@ -161,7 +172,7 @@ void HipUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& ...@@ -161,7 +172,7 @@ void HipUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>&
} }
void HipUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<Vec3>& positions) { void HipUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<Vec3>& positions) {
cu.setAsCurrent(); ContextSelector selector(cu);
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision()) { if (cu.getUseDoublePrecision()) {
...@@ -212,7 +223,7 @@ void HipUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<V ...@@ -212,7 +223,7 @@ void HipUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<V
} }
void HipUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) { void HipUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) {
cu.setAsCurrent(); ContextSelector selector(cu);
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
velocities.resize(numParticles); velocities.resize(numParticles);
...@@ -237,7 +248,7 @@ void HipUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& ...@@ -237,7 +248,7 @@ void HipUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>&
} }
void HipUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector<Vec3>& velocities) { void HipUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector<Vec3>& velocities) {
cu.setAsCurrent(); ContextSelector selector(cu);
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) { if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
...@@ -270,8 +281,12 @@ void HipUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector< ...@@ -270,8 +281,12 @@ void HipUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector<
} }
} }
void HipUpdateStateDataKernel::computeShiftedVelocities(ContextImpl& context, double timeShift, vector<Vec3>& velocities) {
cu.getIntegrationUtilities().computeShiftedVelocities(timeShift, velocities);
}
void HipUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& forces) { void HipUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& forces) {
cu.setAsCurrent(); ContextSelector selector(cu);
long long* force = (long long*) cu.getPinnedBuffer(); long long* force = (long long*) cu.getPinnedBuffer();
cu.getForce().download(force); cu.getForce().download(force);
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
...@@ -284,6 +299,7 @@ void HipUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& for ...@@ -284,6 +299,7 @@ void HipUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& for
} }
void HipUpdateStateDataKernel::getEnergyParameterDerivatives(ContextImpl& context, map<string, double>& derivs) { void HipUpdateStateDataKernel::getEnergyParameterDerivatives(ContextImpl& context, map<string, double>& derivs) {
ContextSelector selector(cu);
const vector<string>& paramDerivNames = cu.getEnergyParamDerivNames(); const vector<string>& paramDerivNames = cu.getEnergyParamDerivNames();
int numDerivs = paramDerivNames.size(); int numDerivs = paramDerivNames.size();
if (numDerivs == 0) if (numDerivs == 0)
...@@ -337,15 +353,15 @@ void HipUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const ...@@ -337,15 +353,15 @@ void HipUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const
} }
void HipUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) { void HipUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
cu.setAsCurrent(); ContextSelector selector(cu);
int version = 3; int version = 3;
stream.write((char*) &version, sizeof(int)); stream.write((char*) &version, sizeof(int));
int precision = (cu.getUseDoublePrecision() ? 2 : cu.getUseMixedPrecision() ? 1 : 0); int precision = (cu.getUseDoublePrecision() ? 2 : cu.getUseMixedPrecision() ? 1 : 0);
stream.write((char*) &precision, sizeof(int)); stream.write((char*) &precision, sizeof(int));
double time = cu.getTime(); double time = cu.getTime();
stream.write((char*) &time, sizeof(double)); stream.write((char*) &time, sizeof(double));
int stepCount = cu.getStepCount(); long long stepCount = cu.getStepCount();
stream.write((char*) &stepCount, sizeof(int)); stream.write((char*) &stepCount, sizeof(long long));
int stepsSinceReorder = cu.getStepsSinceReorder(); int stepsSinceReorder = cu.getStepsSinceReorder();
stream.write((char*) &stepsSinceReorder, sizeof(int)); stream.write((char*) &stepsSinceReorder, sizeof(int));
char* buffer = (char*) cu.getPinnedBuffer(); char* buffer = (char*) cu.getPinnedBuffer();
...@@ -367,7 +383,7 @@ void HipUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& s ...@@ -367,7 +383,7 @@ void HipUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& s
} }
void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) { void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
cu.setAsCurrent(); ContextSelector selector(cu);
int version; int version;
stream.read((char*) &version, sizeof(int)); stream.read((char*) &version, sizeof(int));
if (version != 3) if (version != 3)
...@@ -379,8 +395,9 @@ void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& str ...@@ -379,8 +395,9 @@ void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& str
throw OpenMMException("Checkpoint was created with a different numeric precision"); throw OpenMMException("Checkpoint was created with a different numeric precision");
double time; double time;
stream.read((char*) &time, sizeof(double)); stream.read((char*) &time, sizeof(double));
int stepCount, stepsSinceReorder; long long stepCount;
stream.read((char*) &stepCount, sizeof(int)); stream.read((char*) &stepCount, sizeof(long long));
int stepsSinceReorder;
stream.read((char*) &stepsSinceReorder, sizeof(int)); stream.read((char*) &stepsSinceReorder, sizeof(int));
vector<HipContext*>& contexts = cu.getPlatformData().contexts; vector<HipContext*>& contexts = cu.getPlatformData().contexts;
for (auto ctx : contexts) { for (auto ctx : contexts) {
...@@ -408,6 +425,7 @@ void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& str ...@@ -408,6 +425,7 @@ void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& str
SimTKOpenMMUtilities::loadCheckpoint(stream); SimTKOpenMMUtilities::loadCheckpoint(stream);
for (auto listener : cu.getReorderListeners()) for (auto listener : cu.getReorderListeners())
listener->execute(); listener->execute();
cu.validateAtomOrder();
} }
class HipCalcNonbondedForceKernel::ForceInfo : public HipForceInfo { class HipCalcNonbondedForceKernel::ForceInfo : public HipForceInfo {
...@@ -448,7 +466,7 @@ public: ...@@ -448,7 +466,7 @@ public:
forceTemp.initialize<float4>(cu, cu.getNumAtoms(), "PmeForce"); forceTemp.initialize<float4>(cu, cu.getNumAtoms(), "PmeForce");
} }
float* getPosq() { float* getPosq() {
cu.setAsCurrent(); ContextSelector selector(cu);
cu.getPosq().download(posq); cu.getPosq().download(posq);
return (float*) &posq[0]; return (float*) &posq[0];
} }
...@@ -532,7 +550,7 @@ private: ...@@ -532,7 +550,7 @@ private:
}; };
HipCalcNonbondedForceKernel::~HipCalcNonbondedForceKernel() { HipCalcNonbondedForceKernel::~HipCalcNonbondedForceKernel() {
cu.setAsCurrent(); ContextSelector selector(cu);
if (sort != NULL) if (sort != NULL)
delete sort; delete sort;
if (fft != NULL) if (fft != NULL)
...@@ -551,7 +569,7 @@ HipCalcNonbondedForceKernel::~HipCalcNonbondedForceKernel() { ...@@ -551,7 +569,7 @@ HipCalcNonbondedForceKernel::~HipCalcNonbondedForceKernel() {
} }
void HipCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) { void HipCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
cu.setAsCurrent(); ContextSelector selector(cu);
int forceIndex; int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex) for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
; ;
...@@ -650,8 +668,14 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -650,8 +668,14 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0); hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets) if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1"; paramsDefines["HAS_OFFSETS"] = "1";
if (force.getNumParticleParameterOffsets() > 0)
paramsDefines["HAS_PARTICLE_OFFSETS"] = "1";
if (force.getNumExceptionParameterOffsets() > 0)
paramsDefines["HAS_EXCEPTION_OFFSETS"] = "1";
if (usePosqCharges) if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1"; paramsDefines["USE_POSQ_CHARGES"] = "1";
if (doLJPME)
paramsDefines["INCLUDE_LJPME_EXCEPTIONS"] = "1";
if (nonbondedMethod == Ewald) { if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters. // Compute the Ewald parameters.
...@@ -703,8 +727,16 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -703,8 +727,16 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI)); defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1"; defines["USE_EWALD"] = "1";
defines["DO_LJPME"] = doLJPME ? "1" : "0"; defines["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME) if (doLJPME) {
defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha); defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
double dar2 = dalphaR*dalphaR;
double dar4 = dar2*dar2;
double multShift6 = -invRCut6*(1.0 - exp(-dar2) * (1.0 + dar2 + 0.5*dar4));
defines["INVCUT6"] = cu.doubleToString(invRCut6);
defines["MULTSHIFT6"] = cu.doubleToString(multShift6);
}
if (cu.getContextIndex() == 0) { if (cu.getContextIndex() == 0) {
paramsDefines["INCLUDE_EWALD"] = "1"; paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI)); paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
...@@ -766,13 +798,6 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -766,13 +798,6 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha)); pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha));
pmeDefines["USE_LJPME"] = "1"; pmeDefines["USE_LJPME"] = "1";
pmeDefines["CHARGE_FROM_SIGEPS"] = "1"; pmeDefines["CHARGE_FROM_SIGEPS"] = "1";
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
double dar2 = dalphaR*dalphaR;
double dar4 = dar2*dar2;
double multShift6 = -invRCut6*(1.0 - exp(-dar2) * (1.0 + dar2 + 0.5*dar4));
defines["INVCUT6"] = cu.doubleToString(invRCut6);
defines["MULTSHIFT6"] = cu.doubleToString(multShift6);
module = cu.createModule(HipKernelSources::vectorOps+CommonKernelSources::pme, pmeDefines); module = cu.createModule(HipKernelSources::vectorOps+CommonKernelSources::pme, pmeDefines);
pmeDispersionFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge"); pmeDispersionFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
pmeDispersionGridIndexKernel = cu.getKernel(module, "findAtomGridIndex"); pmeDispersionGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
...@@ -813,8 +838,8 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -813,8 +838,8 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
if (usePmeStream) { if (usePmeStream) {
CHECK_RESULT(hipStreamCreateWithFlags(&pmeStream, hipStreamNonBlocking), "Error creating stream for NonbondedForce"); CHECK_RESULT(hipStreamCreateWithFlags(&pmeStream, hipStreamNonBlocking), "Error creating stream for NonbondedForce");
CHECK_RESULT(hipEventCreateWithFlags(&pmeSyncEvent, hipEventDisableTiming), "Error creating event for NonbondedForce"); CHECK_RESULT(hipEventCreateWithFlags(&pmeSyncEvent, cu.getEventFlags()), "Error creating event for NonbondedForce");
CHECK_RESULT(hipEventCreateWithFlags(&paramsSyncEvent, hipEventDisableTiming), "Error creating event for NonbondedForce"); CHECK_RESULT(hipEventCreateWithFlags(&paramsSyncEvent, cu.getEventFlags()), "Error creating event for NonbondedForce");
int recipForceGroup = force.getReciprocalSpaceForceGroup(); int recipForceGroup = force.getReciprocalSpaceForceGroup();
if (recipForceGroup < 0) if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup(); recipForceGroup = force.getForceGroup();
...@@ -939,7 +964,8 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -939,7 +964,8 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
replacements["USE_PERIODIC"] = force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0"; replacements["USE_PERIODIC"] = force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0";
if (doLJPME) if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha); replacements["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::pmeExclusions, replacements), force.getForceGroup()); if (force.getIncludeDirectSpace())
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::pmeExclusions, replacements), force.getForceGroup());
} }
} }
...@@ -959,7 +985,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -959,7 +985,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
replacements["CHARGE1"] = prefix+"charge1"; replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2"; replacements["CHARGE2"] = prefix+"charge2";
} }
if (hasCoulomb) if (hasCoulomb && !usePosqCharges)
cu.getNonbondedUtilities().addParameter(HipNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer())); cu.getNonbondedUtilities().addParameter(HipNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer()));
sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon"); sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
if (hasLJ) { if (hasLJ) {
...@@ -968,7 +994,8 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -968,7 +994,8 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
cu.getNonbondedUtilities().addParameter(HipNonbondedUtilities::ParameterInfo(prefix+"sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon.getDevicePointer())); cu.getNonbondedUtilities().addParameter(HipNonbondedUtilities::ParameterInfo(prefix+"sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon.getDevicePointer()));
} }
source = cu.replaceStrings(source, replacements); source = cu.replaceStrings(source, replacements);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true); if (force.getIncludeDirectSpace())
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
// Initialize the exceptions. // Initialize the exceptions.
...@@ -993,13 +1020,14 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -993,13 +1020,14 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
map<string, string> replacements; map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (usePeriodic && force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0"); replacements["APPLY_PERIODIC"] = (usePeriodic && force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4"); replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::nonbondedExceptions, replacements), force.getForceGroup()); if (force.getIncludeDirectSpace())
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
} }
// Initialize parameter offsets. // Initialize parameter offsets.
vector<vector<float4> > particleOffsetVec(force.getNumParticles()); vector<vector<float4> > particleOffsetVec(force.getNumParticles());
vector<vector<float4> > exceptionOffsetVec(force.getNumExceptions()); vector<vector<float4> > exceptionOffsetVec(numExceptions);
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) { for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string param; string param;
int particle; int particle;
...@@ -1020,6 +1048,9 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -1020,6 +1048,9 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
int exception; int exception;
double charge, sigma, epsilon; double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon); force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
int index = exceptionIndex[exception];
if (index < startIndex || index >= endIndex)
continue;
auto paramPos = find(paramNames.begin(), paramNames.end(), param); auto paramPos = find(paramNames.begin(), paramNames.end(), param);
int paramIndex; int paramIndex;
if (paramPos == paramNames.end()) { if (paramPos == paramNames.end()) {
...@@ -1028,13 +1059,11 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -1028,13 +1059,11 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
} }
else else
paramIndex = paramPos-paramNames.begin(); paramIndex = paramPos-paramNames.begin();
exceptionOffsetVec[exceptionIndex[exception]].push_back(make_float4(charge, sigma, epsilon, paramIndex)); exceptionOffsetVec[index-startIndex].push_back(make_float4(charge, sigma, epsilon, paramIndex));
} }
paramValues.resize(paramNames.size(), 0.0); paramValues.resize(paramNames.size(), 0.0);
particleParamOffsets.initialize<float4>(cu, max(force.getNumParticleParameterOffsets(), 1), "particleParamOffsets"); particleParamOffsets.initialize<float4>(cu, max(force.getNumParticleParameterOffsets(), 1), "particleParamOffsets");
exceptionParamOffsets.initialize<float4>(cu, max(force.getNumExceptionParameterOffsets(), 1), "exceptionParamOffsets");
particleOffsetIndices.initialize<int>(cu, cu.getPaddedNumAtoms()+1, "particleOffsetIndices"); particleOffsetIndices.initialize<int>(cu, cu.getPaddedNumAtoms()+1, "particleOffsetIndices");
exceptionOffsetIndices.initialize<int>(cu, force.getNumExceptions()+1, "exceptionOffsetIndices");
vector<int> particleOffsetIndicesVec, exceptionOffsetIndicesVec; vector<int> particleOffsetIndicesVec, exceptionOffsetIndicesVec;
vector<float4> p, e; vector<float4> p, e;
for (int i = 0; i < particleOffsetVec.size(); i++) { for (int i = 0; i < particleOffsetVec.size(); i++) {
...@@ -1054,7 +1083,9 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -1054,7 +1083,9 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
particleParamOffsets.upload(p); particleParamOffsets.upload(p);
particleOffsetIndices.upload(particleOffsetIndicesVec); particleOffsetIndices.upload(particleOffsetIndicesVec);
} }
if (force.getNumExceptionParameterOffsets() > 0) { exceptionParamOffsets.initialize<float4>(cu, max((int) e.size(), 1), "exceptionParamOffsets");
exceptionOffsetIndices.initialize<int>(cu, exceptionOffsetIndicesVec.size(), "exceptionOffsetIndices");
if (e.size() > 0) {
exceptionParamOffsets.upload(e); exceptionParamOffsets.upload(e);
exceptionOffsetIndices.upload(exceptionOffsetIndicesVec); exceptionOffsetIndices.upload(exceptionOffsetIndicesVec);
} }
...@@ -1075,6 +1106,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -1075,6 +1106,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) { double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
// Update particle and exception parameters. // Update particle and exception parameters.
ContextSelector selector(cu);
bool paramChanged = false; bool paramChanged = false;
for (int i = 0; i < paramNames.size(); i++) { for (int i = 0; i < paramNames.size(); i++) {
double value = context.getParameter(paramNames[i]); double value = context.getParameter(paramNames[i]);
...@@ -1089,7 +1121,7 @@ double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -1089,7 +1121,7 @@ double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
} }
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0); double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (recomputeParams || hasOffsets) { if (recomputeParams || hasOffsets) {
bool computeSelfEnergy = (includeEnergy && includeReciprocal); int computeSelfEnergy = (includeEnergy && includeReciprocal);
int numAtoms = cu.getPaddedNumAtoms(); int numAtoms = cu.getPaddedNumAtoms();
vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms, vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms,
&baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(), &baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
...@@ -1258,7 +1290,7 @@ double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -1258,7 +1290,7 @@ double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force) { void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force) {
// Make sure the new parameters are acceptable. // Make sure the new parameters are acceptable.
cu.setAsCurrent(); ContextSelector selector(cu);
if (force.getNumParticles() != cu.getNumAtoms()) if (force.getNumParticles() != cu.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed"); throw OpenMMException("updateParametersInContext: The number of particles has changed");
if (!hasCoulomb || !hasLJ) { if (!hasCoulomb || !hasLJ) {
...@@ -1271,20 +1303,28 @@ void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -1271,20 +1303,28 @@ void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Lennard-Jones interactions, because all epsilons were originally 0"); throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Lennard-Jones interactions, because all epsilons were originally 0");
} }
} }
set<int> exceptionsWithOffsets;
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
exceptionsWithOffsets.insert(exception);
}
vector<int> exceptions; vector<int> exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) { for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2; int particle1, particle2;
double chargeProd, sigma, epsilon; double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon); force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
if (exceptionAtoms.size() > exceptions.size() && make_pair(particle1, particle2) == exceptionAtoms[exceptions.size()]) if (chargeProd != 0.0 || epsilon != 0.0 || exceptionsWithOffsets.find(i) != exceptionsWithOffsets.end())
exceptions.push_back(i); exceptions.push_back(i);
else if (chargeProd != 0.0 || epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
} }
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*exceptions.size()/numContexts; int startIndex = cu.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts; int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex; int numExceptions = endIndex-startIndex;
if (numExceptions != exceptionAtoms.size())
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
// Record the per-particle parameters. // Record the per-particle parameters.
...@@ -1300,11 +1340,13 @@ void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -1300,11 +1340,13 @@ void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Record the exceptions. // Record the exceptions.
if (numExceptions > 0) { if (numExceptions > 0) {
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<float4> baseExceptionParamsVec(numExceptions); vector<float4> baseExceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) { for (int i = 0; i < numExceptions; i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon; double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon); force.getExceptionParameters(exceptions[startIndex+i], particle1, particle2, chargeProd, sigma, epsilon);
if (make_pair(particle1, particle2) != exceptionAtoms[i])
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
baseExceptionParamsVec[i] = make_float4(chargeProd, sigma, epsilon, 0); baseExceptionParamsVec[i] = make_float4(chargeProd, sigma, epsilon, 0);
} }
baseExceptionParams.upload(baseExceptionParamsVec); baseExceptionParams.upload(baseExceptionParamsVec);
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2018 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -439,6 +439,10 @@ void HipNonbondedUtilities::computeInteractions(int forceGroups, bool includeFor ...@@ -439,6 +439,10 @@ void HipNonbondedUtilities::computeInteractions(int forceGroups, bool includeFor
bool HipNonbondedUtilities::updateNeighborListSize() { bool HipNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff) if (!useCutoff)
return false; return false;
if (context.getStepsSinceReorder() == 0)
tilesAfterReorder = pinnedCountBuffer[0];
else if (context.getStepsSinceReorder() > 25 && pinnedCountBuffer[0] > 1.1*tilesAfterReorder)
context.forceReorder();
if (pinnedCountBuffer[0] <= maxTiles && pinnedCountBuffer[1] <= maxSinglePairs) if (pinnedCountBuffer[0] <= maxTiles && pinnedCountBuffer[1] <= maxSinglePairs)
return false; return false;
...@@ -446,12 +450,13 @@ bool HipNonbondedUtilities::updateNeighborListSize() { ...@@ -446,12 +450,13 @@ bool HipNonbondedUtilities::updateNeighborListSize() {
// this from happening in the future. // this from happening in the future.
if (pinnedCountBuffer[0] > maxTiles) { if (pinnedCountBuffer[0] > maxTiles) {
maxTiles = (int) (1.2*pinnedCountBuffer[0]); maxTiles = (unsigned int) (1.2*pinnedCountBuffer[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2; unsigned int numBlocks = context.getNumAtomBlocks();
int totalTiles = numBlocks*(numBlocks+1)/2;
if (maxTiles > totalTiles) if (maxTiles > totalTiles)
maxTiles = totalTiles; maxTiles = totalTiles;
interactingTiles.resize(maxTiles); interactingTiles.resize(maxTiles);
interactingAtoms.resize(HipContext::TileSize*maxTiles); interactingAtoms.resize(HipContext::TileSize*(size_t) maxTiles);
if (forceArgs.size() > 0) if (forceArgs.size() > 0)
forceArgs[7] = &interactingTiles.getDevicePointer(); forceArgs[7] = &interactingTiles.getDevicePointer();
findInteractingBlocksArgs[6] = &interactingTiles.getDevicePointer(); findInteractingBlocksArgs[6] = &interactingTiles.getDevicePointer();
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2019 Stanford University and the Authors. * * Portions copyright (c) 2011-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "HipParallelKernels.h" #include "HipParallelKernels.h"
#include "HipKernelSources.h" #include "HipKernelSources.h"
#include "openmm/common/ContextSelector.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -70,7 +71,7 @@ public: ...@@ -70,7 +71,7 @@ public:
void execute() { void execute() {
// Copy coordinates over to this device and execute the kernel. // Copy coordinates over to this device and execute the kernel.
cu.setAsCurrent(); ContextSelector selector(cu);
if (cu.getContextIndex() > 0) { if (cu.getContextIndex() > 0) {
hipStreamWaitEvent(cu.getCurrentStream(), event, 0); hipStreamWaitEvent(cu.getCurrentStream(), event, 0);
if (!cu.getPlatformData().peerAccessSupported) if (!cu.getPlatformData().peerAccessSupported)
...@@ -94,13 +95,16 @@ private: ...@@ -94,13 +95,16 @@ private:
class HipParallelCalcForcesAndEnergyKernel::FinishComputationTask : public HipContext::WorkTask { class HipParallelCalcForcesAndEnergyKernel::FinishComputationTask : public HipContext::WorkTask {
public: public:
FinishComputationTask(ContextImpl& context, HipContext& cu, HipCalcForcesAndEnergyKernel& kernel, FinishComputationTask(ContextImpl& context, HipContext& cu, HipCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, HipArray& contextForces, bool& valid, int2& interactionCount) : bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, HipArray& contextForces,
bool& valid, int2& interactionCount, hipStream_t stream, hipEvent_t event, hipEvent_t localEvent) :
context(context), cu(cu), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy), context(context), cu(cu), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy),
completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces), valid(valid), interactionCount(interactionCount) { completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces), valid(valid), interactionCount(interactionCount),
stream(stream), event(event), localEvent(localEvent) {
} }
void execute() { void execute() {
// Execute the kernel, then download forces. // Execute the kernel, then download forces.
ContextSelector selector(cu);
energy += kernel.finishComputation(context, includeForce, includeEnergy, groups, valid); energy += kernel.finishComputation(context, includeForce, includeEnergy, groups, valid);
if (cu.getComputeForceCount() < 200) { if (cu.getComputeForceCount() < 200) {
// Record timing information for load balancing. Since this takes time, only do it at the start of the simulation. // Record timing information for load balancing. Since this takes time, only do it at the start of the simulation.
...@@ -110,13 +114,16 @@ public: ...@@ -110,13 +114,16 @@ public:
} }
if (includeForce) { if (includeForce) {
if (cu.getContextIndex() > 0) { if (cu.getContextIndex() > 0) {
hipEventRecord(localEvent, cu.getCurrentStream());
hipStreamWaitEvent(stream, localEvent, 0);
int numAtoms = cu.getPaddedNumAtoms(); int numAtoms = cu.getPaddedNumAtoms();
if (cu.getPlatformData().peerAccessSupported) { if (cu.getPlatformData().peerAccessSupported) {
int numBytes = numAtoms*3*sizeof(long long); int numBytes = numAtoms*3*sizeof(long long);
int offset = (cu.getContextIndex()-1)*numBytes; int offset = (cu.getContextIndex()-1)*numBytes;
HipContext& context0 = *cu.getPlatformData().contexts[0]; HipContext& context0 = *cu.getPlatformData().contexts[0];
CHECK_RESULT(hipMemcpy(static_cast<char*>(contextForces.getDevicePointer())+offset, CHECK_RESULT(hipMemcpyAsync(static_cast<char*>(contextForces.getDevicePointer())+offset,
cu.getForce().getDevicePointer(), numBytes, hipMemcpyDeviceToDevice), "Error copying forces"); cu.getForce().getDevicePointer(), numBytes, hipMemcpyDeviceToDevice, stream), "Error copying forces");
hipEventRecord(event, stream);
} }
else else
cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]); cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]);
...@@ -140,6 +147,9 @@ private: ...@@ -140,6 +147,9 @@ private:
HipArray& contextForces; HipArray& contextForces;
bool& valid; bool& valid;
int2& interactionCount; int2& interactionCount;
hipStream_t stream;
hipEvent_t event;
hipEvent_t localEvent;
}; };
HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, HipPlatform::PlatformData& data) : HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, HipPlatform::PlatformData& data) :
...@@ -150,20 +160,25 @@ HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(strin ...@@ -150,20 +160,25 @@ HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(strin
} }
HipParallelCalcForcesAndEnergyKernel::~HipParallelCalcForcesAndEnergyKernel() { HipParallelCalcForcesAndEnergyKernel::~HipParallelCalcForcesAndEnergyKernel() {
data.contexts[0]->setAsCurrent(); ContextSelector selector(*data.contexts[0]);
if (pinnedPositionBuffer != NULL) if (pinnedPositionBuffer != NULL)
hipHostFree(pinnedPositionBuffer); hipHostFree(pinnedPositionBuffer);
if (pinnedForceBuffer != NULL) if (pinnedForceBuffer != NULL)
hipHostFree(pinnedForceBuffer); hipHostFree(pinnedForceBuffer);
hipEventDestroy(event); hipEventDestroy(event);
hipStreamDestroy(peerCopyStream); for (int i = 0; i < peerCopyEvent.size(); i++)
hipEventDestroy(peerCopyEvent[i]);
for (int i = 0; i < peerCopyEventLocal.size(); i++)
hipEventDestroy(peerCopyEventLocal[i]);
for (int i = 0; i < peerCopyStream.size(); i++)
hipStreamDestroy(peerCopyStream[i]);
if (interactionCounts != NULL) if (interactionCounts != NULL)
hipHostFree(interactionCounts); hipHostFree(interactionCounts);
} }
void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) { void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
HipContext& cu = *data.contexts[0]; HipContext& cu = *data.contexts[0];
cu.setAsCurrent(); ContextSelector selector(cu);
hipModule_t module = cu.createModule(HipKernelSources::parallel); hipModule_t module = cu.createModule(HipKernelSources::parallel);
sumKernel = cu.getKernel(module, "sumForces"); sumKernel = cu.getKernel(module, "sumForces");
int numContexts = data.contexts.size(); int numContexts = data.contexts.size();
...@@ -171,14 +186,25 @@ void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -171,14 +186,25 @@ void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
getKernel(i).initialize(system); getKernel(i).initialize(system);
for (int i = 0; i < numContexts; i++) for (int i = 0; i < numContexts; i++)
contextNonbondedFractions[i] = 1/(double) numContexts; contextNonbondedFractions[i] = 1/(double) numContexts;
CHECK_RESULT(hipEventCreateWithFlags(&event, 0), "Error creating event"); CHECK_RESULT(hipEventCreateWithFlags(&event, cu.getEventFlags()), "Error creating event");
CHECK_RESULT(hipStreamCreateWithFlags(&peerCopyStream, hipStreamNonBlocking), "Error creating stream"); peerCopyEvent.resize(numContexts);
peerCopyEventLocal.resize(numContexts);
peerCopyStream.resize(numContexts);
for (int i = 0; i < numContexts; i++) {
CHECK_RESULT(hipEventCreateWithFlags(&peerCopyEvent[i], cu.getEventFlags()), "Error creating event");
CHECK_RESULT(hipStreamCreateWithFlags(&peerCopyStream[i], hipStreamNonBlocking), "Error creating stream");
}
for (int i = 0; i < numContexts; i++) {
HipContext& cuLocal = *data.contexts[i];
ContextSelector selectorLocal(cuLocal);
CHECK_RESULT(hipEventCreateWithFlags(&peerCopyEventLocal[i], cu.getEventFlags()), "Error creating event");
}
CHECK_RESULT(hipHostMalloc((void**) &interactionCounts, numContexts*sizeof(int2), 0), "Error creating interaction counts buffer"); CHECK_RESULT(hipHostMalloc((void**) &interactionCounts, numContexts*sizeof(int2), 0), "Error creating interaction counts buffer");
} }
void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) { void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
HipContext& cu = *data.contexts[0]; HipContext& cu = *data.contexts[0];
cu.setAsCurrent(); ContextSelector selector(cu);
if (!contextForces.isInitialized()) { if (!contextForces.isInitialized()) {
contextForces.initialize<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces"); contextForces.initialize<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces");
CHECK_RESULT(hipHostMalloc((void**) &pinnedForceBuffer, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms()*sizeof(long long), hipHostMallocPortable), "Error allocating pinned memory"); CHECK_RESULT(hipHostMalloc((void**) &pinnedForceBuffer, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms()*sizeof(long long), hipHostMallocPortable), "Error allocating pinned memory");
...@@ -194,36 +220,44 @@ void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context ...@@ -194,36 +220,44 @@ void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context
else { else {
int numBytes = cu.getPosq().getSize()*cu.getPosq().getElementSize(); int numBytes = cu.getPosq().getSize()*cu.getPosq().getElementSize();
hipEventRecord(event, cu.getCurrentStream()); hipEventRecord(event, cu.getCurrentStream());
hipStreamWaitEvent(peerCopyStream, event, 0); for (int i = 1; i < (int) data.contexts.size(); i++) {
for (int i = 1; i < (int) data.contexts.size(); i++) hipStreamWaitEvent(peerCopyStream[i], event, 0);
CHECK_RESULT(hipMemcpyAsync( CHECK_RESULT(hipMemcpyAsync(
data.contexts[i]->getPosq().getDevicePointer(), data.contexts[i]->getPosq().getDevicePointer(),
cu.getPosq().getDevicePointer(), numBytes, cu.getPosq().getDevicePointer(), numBytes,
hipMemcpyDeviceToDevice, peerCopyStream), "Error copying positions"); hipMemcpyDeviceToDevice, peerCopyStream[i]), "Error copying positions");
hipEventRecord(event, peerCopyStream); hipEventRecord(peerCopyEvent[i], peerCopyStream[i]);
}
} }
for (int i = 0; i < (int) data.contexts.size(); i++) { for (int i = 0; i < (int) data.contexts.size(); i++) {
data.contextEnergy[i] = 0.0; data.contextEnergy[i] = 0.0;
HipContext& cu = *data.contexts[i]; HipContext& cu = *data.contexts[i];
ComputeContext::WorkThread& thread = cu.getWorkThread(); ComputeContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, event, interactionCounts[i])); hipEvent_t waitEvent = (cu.getPlatformData().peerAccessSupported ? peerCopyEvent[i] : event);
thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, waitEvent, interactionCounts[i]));
} }
data.syncContexts();
} }
double HipParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) { double HipParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
for (int i = 0; i < (int) data.contexts.size(); i++) { for (int i = 0; i < (int) data.contexts.size(); i++) {
HipContext& cu = *data.contexts[i]; HipContext& cu = *data.contexts[i];
ComputeContext::WorkThread& thread = cu.getWorkThread(); ComputeContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, contextForces, valid, interactionCounts[i])); thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i],
pinnedForceBuffer, contextForces, valid, interactionCounts[i], peerCopyStream[i], peerCopyEvent[i], peerCopyEventLocal[i]));
} }
data.syncContexts(); data.syncContexts();
HipContext& cu = *data.contexts[0];
ContextSelector selector(cu);
if (cu.getPlatformData().peerAccessSupported)
for (int i = 1; i < data.contexts.size(); i++)
hipStreamWaitEvent(cu.getCurrentStream(), peerCopyEvent[i], 0);
double energy = 0.0; double energy = 0.0;
for (int i = 0; i < (int) data.contextEnergy.size(); i++) for (int i = 0; i < (int) data.contextEnergy.size(); i++)
energy += data.contextEnergy[i]; energy += data.contextEnergy[i];
if (includeForce && valid) { if (includeForce && valid) {
// Sum the forces from all devices. // Sum the forces from all devices.
HipContext& cu = *data.contexts[0];
if (!cu.getPlatformData().peerAccessSupported) if (!cu.getPlatformData().peerAccessSupported)
contextForces.upload(pinnedForceBuffer, false); contextForces.upload(pinnedForceBuffer, false);
int bufferSize = 3*cu.getPaddedNumAtoms(); int bufferSize = 3*cu.getPaddedNumAtoms();
......
...@@ -80,7 +80,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re ...@@ -80,7 +80,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
extern __shared__ mixed tempBuffer[]; extern __shared__ mixed tempBuffer[];
const unsigned int thread = threadIdx.x; const unsigned int thread = threadIdx.x;
mixed sum = 0; mixed sum = 0;
for (unsigned int index = thread; index < bufferSize; index += blockDim.x) for (unsigned int index = blockDim.x*blockIdx.x+threadIdx.x; index < bufferSize; index += blockDim.x*gridDim.x)
sum += energyBuffer[index]; sum += energyBuffer[index];
tempBuffer[thread] = sum; tempBuffer[thread] = sum;
for (int i = 1; i < workGroupSize; i *= 2) { for (int i = 1; i < workGroupSize; i *= 2) {
...@@ -89,7 +89,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re ...@@ -89,7 +89,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
tempBuffer[thread] += tempBuffer[thread+i]; tempBuffer[thread] += tempBuffer[thread+i];
} }
if (thread == 0) if (thread == 0)
*result = tempBuffer[0]; result[blockIdx.x] = tempBuffer[0];
} }
/** /**
......
...@@ -41,4 +41,6 @@ OpenMM::HipPlatform platform; ...@@ -41,4 +41,6 @@ OpenMM::HipPlatform platform;
void initializeTests(int argc, char* argv[]) { void initializeTests(int argc, char* argv[]) {
if (argc > 1) if (argc > 1)
platform.setPropertyDefaultValue("Precision", std::string(argv[1])); platform.setPropertyDefaultValue("Precision", std::string(argv[1]));
if (argc > 2)
platform.setPropertyDefaultValue("DeviceIndex", std::string(argv[2]));
} }
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. * * Portions copyright (c) 2011-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -38,12 +38,16 @@ ...@@ -38,12 +38,16 @@
#include "HipArray.h" #include "HipArray.h"
#include "HipContext.h" #include "HipContext.h"
#include "HipFFT3D.h" #include "HipFFT3D.h"
#include "fftpack.h"
#include "sfmt/SFMT.h" #include "sfmt/SFMT.h"
#include "openmm/System.h" #include "openmm/System.h"
#include <complex>
#include <iostream> #include <iostream>
#include <cmath> #include <cmath>
#include <set> #include <set>
#ifdef _MSC_VER
#define POCKETFFT_NO_VECTORS
#endif
#include "pocketfft_hdronly.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -67,19 +71,19 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e ...@@ -67,19 +71,19 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt); init_gen_rand(0, sfmt);
vector<Real2> original(xsize*ysize*zsize); vector<Real2> original(xsize*ysize*zsize);
vector<t_complex> reference(original.size()); vector<complex<double>> reference(original.size());
for (int i = 0; i < (int) original.size(); i++) { for (int i = 0; i < (int) original.size(); i++) {
Real2 value; Real2 value;
value.x = (float) genrand_real2(sfmt); value.x = (float) genrand_real2(sfmt);
value.y = (float) genrand_real2(sfmt); value.y = (float) genrand_real2(sfmt);
original[i] = value; original[i] = value;
reference[i] = t_complex(value.x, value.y); reference[i] = complex<double>(value.x, value.y);
} }
for (int i = 0; i < (int) reference.size(); i++) { for (int i = 0; i < (int) reference.size(); i++) {
if (realToComplex) if (realToComplex)
reference[i] = t_complex(i%2 == 0 ? original[i/2].x : original[i/2].y, 0); reference[i] = complex<double>(i%2 == 0 ? original[i/2].x : original[i/2].y, 0);
else else
reference[i] = t_complex(original[i].x, original[i].y); reference[i] = complex<double>(original[i].x, original[i].y);
} }
HipArray grid1(context, original.size(), sizeof(Real2), "grid1"); HipArray grid1(context, original.size(), sizeof(Real2), "grid1");
HipArray grid2(context, original.size(), sizeof(Real2), "grid2"); HipArray grid2(context, original.size(), sizeof(Real2), "grid2");
...@@ -91,19 +95,21 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e ...@@ -91,19 +95,21 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e
fft.execFFT(true); fft.execFFT(true);
vector<Real2> result; vector<Real2> result;
grid2.download(result); grid2.download(result);
fftpack_t plan; vector<size_t> shape = {(size_t) xsize, (size_t) ysize, (size_t) zsize};
fftpack_init_3d(&plan, xsize, ysize, zsize); vector<size_t> axes = {0, 1, 2};
fftpack_exec_3d(plan, FFTPACK_FORWARD, &reference[0], &reference[0]); vector<ptrdiff_t> stride = {(ptrdiff_t) (ysize*zsize*sizeof(complex<double>)),
(ptrdiff_t) (zsize*sizeof(complex<double>)),
(ptrdiff_t) sizeof(complex<double>)};
pocketfft::c2c(shape, stride, stride, axes, true, reference.data(), reference.data(), 1.0);
int outputZSize = (realToComplex ? zsize/2+1 : zsize); int outputZSize = (realToComplex ? zsize/2+1 : zsize);
for (int x = 0; x < xsize; x++) for (int x = 0; x < xsize; x++)
for (int y = 0; y < ysize; y++) for (int y = 0; y < ysize; y++)
for (int z = 0; z < outputZSize; z++) { for (int z = 0; z < outputZSize; z++) {
int index1 = x*ysize*zsize + y*zsize + z; int index1 = x*ysize*zsize + y*zsize + z;
int index2 = x*ysize*outputZSize + y*outputZSize + z; int index2 = x*ysize*outputZSize + y*outputZSize + z;
ASSERT_EQUAL_TOL(reference[index1].re, result[index2].x, 1e-3 * eps); ASSERT_EQUAL_TOL(reference[index1].real(), result[index2].x, 1e-3 * eps);
ASSERT_EQUAL_TOL(reference[index1].im, result[index2].y, 1e-3 * eps); ASSERT_EQUAL_TOL(reference[index1].imag(), result[index2].y, 1e-3 * eps);
} }
fftpack_destroy(plan);
// Perform a backward transform and see if we get the original values. // Perform a backward transform and see if we get the original values.
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. * * Portions copyright (c) 2008-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
...@@ -50,13 +50,19 @@ void testParallelComputation(NonbondedForce::NonbondedMethod method) { ...@@ -50,13 +50,19 @@ void testParallelComputation(NonbondedForce::NonbondedMethod method) {
vector<Vec3> positions(numParticles); vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
positions[i] = Vec3(5*genrand_real2(sfmt), 5*genrand_real2(sfmt), 5*genrand_real2(sfmt)); positions[i] = Vec3(5*genrand_real2(sfmt), 5*genrand_real2(sfmt), 5*genrand_real2(sfmt));
force->addGlobalParameter("scale", 0.5);
for (int i = 0; i < numParticles; ++i) for (int i = 0; i < numParticles; ++i)
for (int j = 0; j < i; ++j) { for (int j = 0; j < i; ++j) {
Vec3 delta = positions[i]-positions[j]; Vec3 delta = positions[i]-positions[j];
if (delta.dot(delta) < 0.1) if (delta.dot(delta) < 0.1) {
force->addException(i, j, 0, 1, 0); force->addException(i, j, 0, 1, 0);
}
else if (delta.dot(delta) < 0.2) {
int index = force->addException(i, j, 0.5, 1, 1.0);
force->addExceptionParameterOffset("scale", index, 0.5, 0.4, 0.3);
}
} }
// Create two contexts, one with a single device and one with two devices. // Create two contexts, one with a single device and one with two devices.
VerletIntegrator integrator1(0.01); VerletIntegrator integrator1(0.01);
...@@ -179,6 +185,7 @@ void runPlatformTests() { ...@@ -179,6 +185,7 @@ void runPlatformTests() {
testParallelComputation(NonbondedForce::NoCutoff); testParallelComputation(NonbondedForce::NoCutoff);
testParallelComputation(NonbondedForce::Ewald); testParallelComputation(NonbondedForce::Ewald);
testParallelComputation(NonbondedForce::PME); testParallelComputation(NonbondedForce::PME);
testParallelComputation(NonbondedForce::LJPME);
testReordering(); testReordering();
testDeterministicForces(); testDeterministicForces();
if (canRunHugeTest()) if (canRunHugeTest())
......
...@@ -60,6 +60,7 @@ void testGaussian() { ...@@ -60,6 +60,7 @@ void testGaussian() {
platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL); platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0]; HipContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.setAsCurrent();
context.getIntegrationUtilities().initRandomNumberGenerator(0); context.getIntegrationUtilities().initRandomNumberGenerator(0);
HipArray& random = context.getIntegrationUtilities().getRandom(); HipArray& random = context.getIntegrationUtilities().getRandom();
context.getIntegrationUtilities().prepareRandomNumbers(random.getSize()); context.getIntegrationUtilities().prepareRandomNumbers(random.getSize());
......
...@@ -70,6 +70,7 @@ void verifySorting(vector<float> array, bool uniform) { ...@@ -70,6 +70,7 @@ void verifySorting(vector<float> array, bool uniform) {
platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL); platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0]; HipContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.setAsCurrent();
HipArray data(context, array.size(), 4, "sortData"); HipArray data(context, array.size(), 4, "sortData");
data.upload(array); data.upload(array);
HipSort sort(context, new SortTrait(), array.size(), uniform); HipSort sort(context, new SortTrait(), array.size(), uniform);
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2020 Stanford University and the Authors. * * Portions copyright (c) 2008-2021 Stanford University and the Authors. *
* Portions copyright (c) 2021 Advanced Micro Devices, Inc. * * Portions copyright (c) 2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Mark Friedrichs * * Authors: Peter Eastman, Mark Friedrichs *
* Contributors: * * Contributors: *
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#define _USE_MATH_DEFINES // Needed to get M_PI #define _USE_MATH_DEFINES // Needed to get M_PI
#endif #endif
#include "AmoebaHipKernels.h" #include "AmoebaHipKernels.h"
#include "openmm/common/ContextSelector.h"
#include "openmm/internal/ContextImpl.h" #include "openmm/internal/ContextImpl.h"
#include "openmm/internal/AmoebaGeneralizedKirkwoodForceImpl.h" #include "openmm/internal/AmoebaGeneralizedKirkwoodForceImpl.h"
#include "openmm/internal/AmoebaMultipoleForceImpl.h" #include "openmm/internal/AmoebaMultipoleForceImpl.h"
...@@ -56,7 +57,7 @@ using namespace std; ...@@ -56,7 +57,7 @@ using namespace std;
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() { HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() {
cc.setAsCurrent(); ContextSelector selector(cc);
if (fft != NULL) if (fft != NULL)
delete fft; delete fft;
} }
...@@ -64,6 +65,7 @@ HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() { ...@@ -64,6 +65,7 @@ HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() {
void HipCalcAmoebaMultipoleForceKernel::initialize(const System& system, const AmoebaMultipoleForce& force) { void HipCalcAmoebaMultipoleForceKernel::initialize(const System& system, const AmoebaMultipoleForce& force) {
CommonCalcAmoebaMultipoleForceKernel::initialize(system, force); CommonCalcAmoebaMultipoleForceKernel::initialize(system, force);
if (usePME) { if (usePME) {
ContextSelector selector(cc);
HipArray& grid1 = cu.unwrap(pmeGrid1); HipArray& grid1 = cu.unwrap(pmeGrid1);
HipArray& grid2 = cu.unwrap(pmeGrid2); HipArray& grid2 = cu.unwrap(pmeGrid2);
fft = cu.createFFT(gridSizeX, gridSizeY, gridSizeZ, false, cu.getCurrentStream(), grid1, grid2); fft = cu.createFFT(gridSizeX, gridSizeY, gridSizeZ, false, cu.getCurrentStream(), grid1, grid2);
...@@ -79,7 +81,7 @@ void HipCalcAmoebaMultipoleForceKernel::computeFFT(bool forward) { ...@@ -79,7 +81,7 @@ void HipCalcAmoebaMultipoleForceKernel::computeFFT(bool forward) {
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() { HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() {
cc.setAsCurrent(); ContextSelector selector(cc);
if (sort != NULL) if (sort != NULL)
delete sort; delete sort;
if (fft != NULL) if (fft != NULL)
...@@ -91,6 +93,7 @@ HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() { ...@@ -91,6 +93,7 @@ HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() {
void HipCalcHippoNonbondedForceKernel::initialize(const System& system, const HippoNonbondedForce& force) { void HipCalcHippoNonbondedForceKernel::initialize(const System& system, const HippoNonbondedForce& force) {
CommonCalcHippoNonbondedForceKernel::initialize(system, force); CommonCalcHippoNonbondedForceKernel::initialize(system, force);
if (usePME) { if (usePME) {
ContextSelector selector(cc);
sort = new HipSort(cu, new SortTrait(), cc.getNumAtoms()); sort = new HipSort(cu, new SortTrait(), cc.getNumAtoms());
HipArray& grid1 = cu.unwrap(pmeGrid1); HipArray& grid1 = cu.unwrap(pmeGrid1);
HipArray& grid2 = cu.unwrap(pmeGrid2); HipArray& grid2 = cu.unwrap(pmeGrid2);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment