Unverified Commit ecc2d258 authored by Anton Gorenko's avatar Anton Gorenko
Browse files

Port changes from the main repository

Use cuCtxPushCurrent() and cuCtxPopCurrent() for selecting CUDA context

    https://github.com/openmm/openmm/pull/3258

Fixed uninitialized memory access

    https://github.com/openmm/openmm/issues/3392
    https://github.com/openmm/openmm/pull/3399

Fixed potential invalid memory access

    See https://github.com/openmm/openmm/pull/3428

Improved temperature reporting for Drude particles

    https://github.com/openmm/openmm/pull/3486
    https://github.com/openmm/openmm/commit/a5e42f5

Fixed race condition with multiple GPUs

    https://github.com/openmm/openmm/commit/6fb1c8a41edff980862750bc086f6a204eb50941

Use blocking sync when creating events

    https://github.com/openmm/openmm/commit/fe21d5ee4f14673a4ea38b7244991772a64ceec2

Very minor optimizations

    https://github.com/openmm/openmm/commit/109f6b2535da4e0c0dd88007d6ca06b4add2ce81

Use PocketFFT

    https://github.com/openmm/openmm/commit/1dac981a63300a2a53a7925f570995914f7163ed

Improved logic for deciding when to reorder atoms

    https://github.com/openmm/openmm/commit/48664a1f1a4490a4dabc277757545ac070e7b898

Ensure valid atom order after loading a checkpoint

    https://github.com/openmm/openmm/commit/a056d5a3754e193105409afa12c9f0c9a2d972a2

Improve performance running on multiple GPUs

    https://github.com/openmm/openmm/commit/0c82c2647de98da5c6dab7bf7a7b8b19705aadc0

Fixed errors when running on multiple GPUs

    https://github.com/openmm/openmm/commit/ed9df876d43c037c08d4762721e73e5caae086d9

Optimized reducing energy

    https://github.com/openmm/openmm/commit/2975f44
parent f717ed89
...@@ -9,8 +9,8 @@ ...@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -56,7 +56,7 @@ public: ...@@ -56,7 +56,7 @@ public:
* @param name the name of the array * @param name the name of the array
*/ */
template <class T> template <class T>
static HipArray* create(HipContext& context, int size, const std::string& name) { static HipArray* create(HipContext& context, size_t size, const std::string& name) {
return new HipArray(context, size, sizeof(T), name); return new HipArray(context, size, sizeof(T), name);
} }
/** /**
...@@ -72,7 +72,7 @@ public: ...@@ -72,7 +72,7 @@ public:
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
HipArray(HipContext& context, int size, int elementSize, const std::string& name); HipArray(HipContext& context, size_t size, int elementSize, const std::string& name);
~HipArray(); ~HipArray();
/** /**
* Initialize this object. * Initialize this object.
...@@ -82,7 +82,7 @@ public: ...@@ -82,7 +82,7 @@ public:
* @param elementSize the size of each element in bytes * @param elementSize the size of each element in bytes
* @param name the name of the array * @param name the name of the array
*/ */
void initialize(ComputeContext& context, int size, int elementSize, const std::string& name); void initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name);
/** /**
* Initialize this object. The template argument is the data type of each array element. * Initialize this object. The template argument is the data type of each array element.
* *
...@@ -91,13 +91,13 @@ public: ...@@ -91,13 +91,13 @@ public:
* @param name the name of the array * @param name the name of the array
*/ */
template <class T> template <class T>
void initialize(ComputeContext& context, int size, const std::string& name) { void initialize(ComputeContext& context, size_t size, const std::string& name) {
initialize(context, size, sizeof(T), name); initialize(context, size, sizeof(T), name);
} }
/** /**
* Recreate the internal storage to have a different size. * Recreate the internal storage to have a different size.
*/ */
void resize(int size); void resize(size_t size);
/** /**
* Get whether this array has been initialized. * Get whether this array has been initialized.
*/ */
...@@ -107,7 +107,7 @@ public: ...@@ -107,7 +107,7 @@ public:
/** /**
* Get the number of elements in the array. * Get the number of elements in the array.
*/ */
int getSize() const { size_t getSize() const {
return size; return size;
} }
/** /**
...@@ -183,7 +183,8 @@ public: ...@@ -183,7 +183,8 @@ public:
private: private:
HipContext* context; HipContext* context;
hipDeviceptr_t pointer; hipDeviceptr_t pointer;
int size, elementSize; size_t size;
int elementSize;
bool ownsMemory; bool ownsMemory;
std::string name; std::string name;
}; };
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <map> #include <map>
#include <stack>
#include <string> #include <string>
#include <utility> #include <utility>
#define __CL_ENABLE_EXCEPTIONS #define __CL_ENABLE_EXCEPTIONS
...@@ -99,10 +100,20 @@ public: ...@@ -99,10 +100,20 @@ public:
return contextIsValid; return contextIsValid;
} }
/** /**
* Set the hipCtx_t associated with this object to be the current context. If the context is not * Set the device associated with this object to be the current device. If the context is not
* valid, this returns without doing anything. * valid, this returns without doing anything.
*/ */
void setAsCurrent(); void setAsCurrent();
/**
* Push the device associated with this object to be the current device. If the context is not
* valid, this returns without doing anything.
*/
void pushAsCurrent();
/**
* Pop the device associated with this object off the stack of contexts. If the context is not
* valid, this returns without doing anything.
*/
void popAsCurrent();
/** /**
* Get the hipDevice_t associated with this object. * Get the hipDevice_t associated with this object.
*/ */
...@@ -582,6 +593,10 @@ public: ...@@ -582,6 +593,10 @@ public:
* expense of reduced simulation performance. * expense of reduced simulation performance.
*/ */
void flushQueue(); void flushQueue();
/**
* Get the flags that should be used when creating hipEvent_t objects.
*/
unsigned int getEventFlags();
/** /**
* Get the flags that should be used when allocating pinned host memory. * Get the flags that should be used when allocating pinned host memory.
*/ */
...@@ -610,6 +625,7 @@ private: ...@@ -610,6 +625,7 @@ private:
std::map<std::string, std::string> compilationDefines; std::map<std::string, std::string> compilationDefines;
std::vector<hipModule_t> loadedModules; std::vector<hipModule_t> loadedModules;
hipDevice_t device; hipDevice_t device;
std::stack<hipDevice_t> outerScopeDevices;
hipStream_t currentStream; hipStream_t currentStream;
hipFunction_t clearBufferKernel; hipFunction_t clearBufferKernel;
hipFunction_t clearTwoBuffersKernel; hipFunction_t clearTwoBuffersKernel;
......
...@@ -9,8 +9,8 @@ ...@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. * * Portions copyright (c) 2008-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -109,6 +109,18 @@ public: ...@@ -109,6 +109,18 @@ public:
* @param context the context in which to execute this kernel * @param context the context in which to execute this kernel
*/ */
void setTime(ContextImpl& context, double time); void setTime(ContextImpl& context, double time);
/**
* Get the current step count
*
* @param context the context in which to execute this kernel
*/
long long getStepCount(const ContextImpl& context) const;
/**
* Set the current step count
*
* @param context the context in which to execute this kernel
*/
void setStepCount(const ContextImpl& context, long long count);
/** /**
* Get the positions of all particles. * Get the positions of all particles.
* *
...@@ -133,6 +145,15 @@ public: ...@@ -133,6 +145,15 @@ public:
* @param velocities a vector containg the particle velocities * @param velocities a vector containg the particle velocities
*/ */
void setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities); void setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities);
/**
* Compute velocities, shifted in time to account for a leapfrog integrator. The shift
* is based on the most recently computed forces.
*
* @param context the context in which to execute this kernel
* @param timeShift the amount by which to shift the velocities in time
* @param velocities the shifted velocities are returned in this
*/
void computeShiftedVelocities(ContextImpl& context, double timeShift, std::vector<Vec3>& velocities);
/** /**
* Get the current forces on all particles. * Get the current forces on all particles.
* *
......
...@@ -9,8 +9,8 @@ ...@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -339,7 +339,7 @@ private: ...@@ -339,7 +339,7 @@ private:
HipArray rebuildNeighborList; HipArray rebuildNeighborList;
HipSort* blockSorter; HipSort* blockSorter;
hipEvent_t downloadCountEvent; hipEvent_t downloadCountEvent;
int* pinnedCountBuffer; unsigned int* pinnedCountBuffer;
std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs; std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs;
std::vector<std::vector<int> > atomExclusions; std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters; std::vector<ParameterInfo> parameters;
...@@ -349,8 +349,9 @@ private: ...@@ -349,8 +349,9 @@ private:
std::map<int, std::string> groupKernelSource; std::map<int, std::string> groupKernelSource;
double lastCutoff; double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList; bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
int startTileIndex, startBlockIndex, numBlocks, maxTiles, maxSinglePairs, numTilesInBatch, maxExclusions; int startTileIndex, startBlockIndex, numBlocks, numTilesInBatch, maxExclusions;
int numForceThreadBlocks, forceThreadBlockSize, findInteractingBlocksThreadBlockSize, numAtoms, groupFlags; int numForceThreadBlocks, forceThreadBlockSize, findInteractingBlocksThreadBlockSize, numAtoms, groupFlags;
unsigned int maxTiles, maxSinglePairs, tilesAfterReorder;
long long numTiles; long long numTiles;
std::string kernelSource; std::string kernelSource;
}; };
......
...@@ -91,7 +91,9 @@ private: ...@@ -91,7 +91,9 @@ private:
long long* pinnedForceBuffer; long long* pinnedForceBuffer;
hipFunction_t sumKernel; hipFunction_t sumKernel;
hipEvent_t event; hipEvent_t event;
hipStream_t peerCopyStream; std::vector<hipEvent_t> peerCopyEvent;
std::vector<hipEvent_t> peerCopyEventLocal;
std::vector<hipStream_t> peerCopyStream;
}; };
/** /**
......
...@@ -126,8 +126,8 @@ public: ...@@ -126,8 +126,8 @@ public:
std::vector<HipContext*> contexts; std::vector<HipContext*> contexts;
std::vector<double> contextEnergy; std::vector<double> contextEnergy;
bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream, deterministicForces; bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream, deterministicForces;
int cmMotionFrequency; int cmMotionFrequency, computeForceCount;
int stepCount, computeForceCount; long long stepCount;
double time; double time;
std::map<std::string, std::string> propertyValues; std::map<std::string, std::string> propertyValues;
ThreadPool threads; ThreadPool threads;
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2012-2019 Stanford University and the Authors. * * Portions copyright (c) 2012-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "HipArray.h" #include "HipArray.h"
#include "HipContext.h" #include "HipContext.h"
#include "openmm/common/ContextSelector.h"
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
...@@ -36,13 +37,13 @@ using namespace OpenMM; ...@@ -36,13 +37,13 @@ using namespace OpenMM;
HipArray::HipArray() : pointer(0), ownsMemory(false) { HipArray::HipArray() : pointer(0), ownsMemory(false) {
} }
HipArray::HipArray(HipContext& context, int size, int elementSize, const std::string& name) : pointer(0) { HipArray::HipArray(HipContext& context, size_t size, int elementSize, const std::string& name) : pointer(0) {
initialize(context, size, elementSize, name); initialize(context, size, elementSize, name);
} }
HipArray::~HipArray() { HipArray::~HipArray() {
if (pointer != 0 && ownsMemory && context->getContextIsValid()) { if (pointer != 0 && ownsMemory && context->getContextIsValid()) {
context->setAsCurrent(); ContextSelector selector(*context);
hipError_t result = hipFree(pointer); hipError_t result = hipFree(pointer);
if (result != hipSuccess) { if (result != hipSuccess) {
std::stringstream str; std::stringstream str;
...@@ -52,7 +53,7 @@ HipArray::~HipArray() { ...@@ -52,7 +53,7 @@ HipArray::~HipArray() {
} }
} }
void HipArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) { void HipArray::initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name) {
if (this->pointer != 0) if (this->pointer != 0)
throw OpenMMException("HipArray has already been initialized"); throw OpenMMException("HipArray has already been initialized");
this->context = &dynamic_cast<HipContext&>(context); this->context = &dynamic_cast<HipContext&>(context);
...@@ -60,6 +61,7 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co ...@@ -60,6 +61,7 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co
this->elementSize = elementSize; this->elementSize = elementSize;
this->name = name; this->name = name;
ownsMemory = true; ownsMemory = true;
ContextSelector selector(*this->context);
hipError_t result = hipMalloc(&pointer, size*elementSize); hipError_t result = hipMalloc(&pointer, size*elementSize);
if (result != hipSuccess) { if (result != hipSuccess) {
std::stringstream str; std::stringstream str;
...@@ -68,11 +70,12 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co ...@@ -68,11 +70,12 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co
} }
} }
void HipArray::resize(int size) { void HipArray::resize(size_t size) {
if (pointer == 0) if (pointer == 0)
throw OpenMMException("HipArray has not been initialized"); throw OpenMMException("HipArray has not been initialized");
if (!ownsMemory) if (!ownsMemory)
throw OpenMMException("Cannot resize an array that does not own its storage"); throw OpenMMException("Cannot resize an array that does not own its storage");
ContextSelector selector(*context);
hipError_t result = hipFree(pointer); hipError_t result = hipFree(pointer);
if (result != hipSuccess) { if (result != hipSuccess) {
std::stringstream str; std::stringstream str;
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2023 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2023 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "HipProgram.h" #include "HipProgram.h"
#include "HipFFT3D.h" #include "HipFFT3D.h"
#include "openmm/common/ComputeArray.h" #include "openmm/common/ComputeArray.h"
#include "openmm/common/ContextSelector.h"
#include "SHA1.h" #include "SHA1.h"
#include "openmm/Platform.h" #include "openmm/Platform.h"
#include "openmm/System.h" #include "openmm/System.h"
...@@ -184,13 +185,15 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy ...@@ -184,13 +185,15 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy
} }
contextIsValid = true; contextIsValid = true;
ContextSelector selector(*this);
if (contextIndex > 0) { if (contextIndex > 0) {
int canAccess; int canAccess;
CHECK_RESULT(hipDeviceCanAccessPeer(&canAccess, getDevice(), platformData.contexts[0]->getDevice())); CHECK_RESULT(hipDeviceCanAccessPeer(&canAccess, getDevice(), platformData.contexts[0]->getDevice()));
if (canAccess) { if (canAccess) {
platformData.contexts[0]->setAsCurrent(); {
ContextSelector selector2(*platformData.contexts[0]);
CHECK_RESULT(hipDeviceEnablePeerAccess(getDevice(), 0)); CHECK_RESULT(hipDeviceEnablePeerAccess(getDevice(), 0));
setAsCurrent(); }
CHECK_RESULT(hipDeviceEnablePeerAccess(platformData.contexts[0]->getDevice(), 0)); CHECK_RESULT(hipDeviceEnablePeerAccess(platformData.contexts[0]->getDevice(), 0));
} }
} }
...@@ -345,7 +348,7 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy ...@@ -345,7 +348,7 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy
} }
HipContext::~HipContext() { HipContext::~HipContext() {
setAsCurrent(); pushAsCurrent();
for (auto force : forces) for (auto force : forces)
delete force; delete force;
for (auto listener : reorderListeners) for (auto listener : reorderListeners)
...@@ -366,28 +369,29 @@ HipContext::~HipContext() { ...@@ -366,28 +369,29 @@ HipContext::~HipContext() {
delete nonbonded; delete nonbonded;
for (auto module : loadedModules) for (auto module : loadedModules)
hipModuleUnload(module); hipModuleUnload(module);
popAsCurrent();
contextIsValid = false; contextIsValid = false;
} }
void HipContext::initialize() { void HipContext::initialize() {
hipSetDevice(device); ContextSelector selector(*this);
string errorMessage = "Error initializing Context"; string errorMessage = "Error initializing Context";
int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers()); int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) { if (useDoublePrecision) {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer"); energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum"); energySum.initialize<double>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags())); CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
} }
else if (useMixedPrecision) { else if (useMixedPrecision) {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer"); energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum"); energySum.initialize<double>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags())); CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
} }
else { else {
energyBuffer.initialize<float>(*this, numEnergyBuffers, "energyBuffer"); energyBuffer.initialize<float>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<float>(*this, 1, "energySum"); energySum.initialize<float>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers); int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), getHostMallocFlags())); CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), getHostMallocFlags()));
} }
...@@ -423,6 +427,29 @@ void HipContext::setAsCurrent() { ...@@ -423,6 +427,29 @@ void HipContext::setAsCurrent() {
hipSetDevice(device); hipSetDevice(device);
} }
void HipContext::pushAsCurrent() {
if (contextIsValid) {
// Emulate cuCtxPushCurrent's behavior
hipDevice_t outerScopeDevice;
hipGetDevice(&outerScopeDevice);
outerScopeDevices.push(outerScopeDevice);
if (device != outerScopeDevice) {
hipSetDevice(device);
}
}
}
void HipContext::popAsCurrent() {
if (contextIsValid) {
// Emulate cuCtxPopCurrent's behavior
hipDevice_t outerScopeDevice = outerScopeDevices.top();
outerScopeDevices.pop();
if (outerScopeDevice != device) {
hipSetDevice(outerScopeDevice);
}
}
}
string HipContext::getTempFileName() const { string HipContext::getTempFileName() const {
stringstream tempFileName; stringstream tempFileName;
tempFileName << tempDir; tempFileName << tempDir;
...@@ -784,12 +811,18 @@ double HipContext::reduceEnergy() { ...@@ -784,12 +811,18 @@ double HipContext::reduceEnergy() {
int bufferSize = energyBuffer.getSize(); int bufferSize = energyBuffer.getSize();
int workGroupSize = getMaxThreadBlockSize(); int workGroupSize = getMaxThreadBlockSize();
void* args[] = {&energyBuffer.getDevicePointer(), &energySum.getDevicePointer(), &bufferSize, &workGroupSize}; void* args[] = {&energyBuffer.getDevicePointer(), &energySum.getDevicePointer(), &bufferSize, &workGroupSize};
executeKernel(reduceEnergyKernel, args, workGroupSize, workGroupSize, workGroupSize*energyBuffer.getElementSize()); executeKernel(reduceEnergyKernel, args, workGroupSize*energySum.getSize(), workGroupSize, workGroupSize*energyBuffer.getElementSize());
energySum.download(pinnedBuffer); energySum.download(pinnedBuffer);
if (getUseDoublePrecision() || getUseMixedPrecision()) double result = 0;
return *((double*) pinnedBuffer); if (getUseDoublePrecision() || getUseMixedPrecision()) {
else for (int i = 0; i < energySum.getSize(); i++)
return *((float*) pinnedBuffer); result += ((double*) pinnedBuffer)[i];
}
else {
for (int i = 0; i < energySum.getSize(); i++)
result += ((float*) pinnedBuffer)[i];
}
return result;
} }
void HipContext::setCharges(const vector<double>& charges) { void HipContext::setCharges(const vector<double>& charges) {
...@@ -850,6 +883,13 @@ vector<int> HipContext::getDevicePrecedence() { ...@@ -850,6 +883,13 @@ vector<int> HipContext::getDevicePrecedence() {
return precedence; return precedence;
} }
unsigned int HipContext::getEventFlags() {
unsigned int flags = hipEventDisableTiming;
if (useBlockingSync)
flags += hipEventBlockingSync;
return flags;
}
unsigned int HipContext::getHostMallocFlags() { unsigned int HipContext::getHostMallocFlags() {
#ifdef WIN32 #ifdef WIN32
return hipHostMallocDefault; return hipHostMallocDefault;
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
using namespace OpenMM; using namespace OpenMM;
HipEvent::HipEvent(HipContext& context) : context(context), eventCreated(false) { HipEvent::HipEvent(HipContext& context) : context(context), eventCreated(false) {
hipError_t result = hipEventCreateWithFlags(&event, hipEventDisableTiming); hipError_t result = hipEventCreateWithFlags(&event, context.getEventFlags());
if (result != hipSuccess) if (result != hipSuccess)
throw OpenMMException("Error creating HIP event:"+HipContext::getErrorString(result)); throw OpenMMException("Error creating HIP event:"+HipContext::getErrorString(result));
eventCreated = true; eventCreated = true;
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. * * Portions copyright (c) 2009-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "HipIntegrationUtilities.h" #include "HipIntegrationUtilities.h"
#include "HipContext.h" #include "HipContext.h"
#include "openmm/common/ContextSelector.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -41,13 +42,13 @@ using namespace std; ...@@ -41,13 +42,13 @@ using namespace std;
HipIntegrationUtilities::HipIntegrationUtilities(HipContext& context, const System& system) : IntegrationUtilities(context, system), HipIntegrationUtilities::HipIntegrationUtilities(HipContext& context, const System& system) : IntegrationUtilities(context, system),
ccmaConvergedMemory(NULL) { ccmaConvergedMemory(NULL) {
CHECK_RESULT2(hipEventCreateWithFlags(&ccmaEvent, hipEventDisableTiming), "Error creating event for CCMA"); CHECK_RESULT2(hipEventCreateWithFlags(&ccmaEvent, context.getEventFlags()), "Error creating event for CCMA");
CHECK_RESULT2(hipHostMalloc((void**) &ccmaConvergedMemory, sizeof(int), context.getHostMallocFlags()), "Error allocating pinned memory"); CHECK_RESULT2(hipHostMalloc((void**) &ccmaConvergedMemory, sizeof(int), context.getHostMallocFlags()), "Error allocating pinned memory");
CHECK_RESULT2(hipHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory"); CHECK_RESULT2(hipHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory");
} }
HipIntegrationUtilities::~HipIntegrationUtilities() { HipIntegrationUtilities::~HipIntegrationUtilities() {
context.setAsCurrent(); ContextSelector selector(context);
if (ccmaConvergedMemory != NULL) { if (ccmaConvergedMemory != NULL) {
hipHostFree(ccmaConvergedMemory); hipHostFree(ccmaConvergedMemory);
hipEventDestroy(ccmaEvent); hipEventDestroy(ccmaEvent);
...@@ -67,6 +68,7 @@ HipArray& HipIntegrationUtilities::getStepSize() { ...@@ -67,6 +68,7 @@ HipArray& HipIntegrationUtilities::getStepSize() {
} }
void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, double tol) { void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, double tol) {
ContextSelector selector(context);
ComputeKernel settleKernel, shakeKernel, ccmaForceKernel; ComputeKernel settleKernel, shakeKernel, ccmaForceKernel;
if (constrainVelocities) { if (constrainVelocities) {
settleKernel = settleVelKernel; settleKernel = settleVelKernel;
...@@ -132,6 +134,7 @@ void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, dou ...@@ -132,6 +134,7 @@ void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, dou
} }
void HipIntegrationUtilities::distributeForcesFromVirtualSites() { void HipIntegrationUtilities::distributeForcesFromVirtualSites() {
ContextSelector selector(context);
if (numVsites > 0) { if (numVsites > 0) {
vsiteForceKernel->setArg(2, context.getLongForceBuffer()); vsiteForceKernel->setArg(2, context.getLongForceBuffer());
vsiteForceKernel->execute(numVsites); vsiteForceKernel->execute(numVsites);
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. * * Portions copyright (c) 2008-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "openmm/Context.h" #include "openmm/Context.h"
#include "openmm/internal/ContextImpl.h" #include "openmm/internal/ContextImpl.h"
#include "openmm/internal/NonbondedForceImpl.h" #include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/common/ContextSelector.h"
#include "CommonKernelSources.h" #include "CommonKernelSources.h"
#include "HipBondedUtilities.h" #include "HipBondedUtilities.h"
#include "HipExpressionUtilities.h" #include "HipExpressionUtilities.h"
...@@ -59,7 +60,7 @@ void HipCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -59,7 +60,7 @@ void HipCalcForcesAndEnergyKernel::initialize(const System& system) {
void HipCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) { void HipCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
cu.setForcesValid(true); cu.setForcesValid(true);
cu.setAsCurrent(); ContextSelector selector(cu);
cu.clearAutoclearBuffers(); cu.clearAutoclearBuffers();
for (auto computation : cu.getPreComputations()) for (auto computation : cu.getPreComputations())
computation->computeForceAndEnergy(includeForces, includeEnergy, groups); computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
...@@ -72,7 +73,7 @@ void HipCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i ...@@ -72,7 +73,7 @@ void HipCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
} }
double HipCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) { double HipCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cu.setAsCurrent(); ContextSelector selector(cu);
cu.getBondedUtilities().computeInteractions(groups); cu.getBondedUtilities().computeInteractions(groups);
cu.getNonbondedUtilities().computeInteractions(groups, includeForces, includeEnergy); cu.getNonbondedUtilities().computeInteractions(groups, includeForces, includeEnergy);
double sum = 0.0; double sum = 0.0;
...@@ -99,8 +100,18 @@ void HipUpdateStateDataKernel::setTime(ContextImpl& context, double time) { ...@@ -99,8 +100,18 @@ void HipUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
ctx->setTime(time); ctx->setTime(time);
} }
long long HipUpdateStateDataKernel::getStepCount(const ContextImpl& context) const {
return cu.getStepCount();
}
void HipUpdateStateDataKernel::setStepCount(const ContextImpl& context, long long count) {
vector<HipContext*>& contexts = cu.getPlatformData().contexts;
for (auto ctx : contexts)
ctx->setStepCount(count);
}
void HipUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) { void HipUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
cu.setAsCurrent(); ContextSelector selector(cu);
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles); positions.resize(numParticles);
vector<float4> posCorrection; vector<float4> posCorrection;
...@@ -161,7 +172,7 @@ void HipUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& ...@@ -161,7 +172,7 @@ void HipUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>&
} }
void HipUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<Vec3>& positions) { void HipUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<Vec3>& positions) {
cu.setAsCurrent(); ContextSelector selector(cu);
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision()) { if (cu.getUseDoublePrecision()) {
...@@ -212,7 +223,7 @@ void HipUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<V ...@@ -212,7 +223,7 @@ void HipUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<V
} }
void HipUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) { void HipUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) {
cu.setAsCurrent(); ContextSelector selector(cu);
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
velocities.resize(numParticles); velocities.resize(numParticles);
...@@ -237,7 +248,7 @@ void HipUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& ...@@ -237,7 +248,7 @@ void HipUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>&
} }
void HipUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector<Vec3>& velocities) { void HipUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector<Vec3>& velocities) {
cu.setAsCurrent(); ContextSelector selector(cu);
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles(); int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) { if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
...@@ -270,8 +281,12 @@ void HipUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector< ...@@ -270,8 +281,12 @@ void HipUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector<
} }
} }
void HipUpdateStateDataKernel::computeShiftedVelocities(ContextImpl& context, double timeShift, vector<Vec3>& velocities) {
cu.getIntegrationUtilities().computeShiftedVelocities(timeShift, velocities);
}
void HipUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& forces) { void HipUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& forces) {
cu.setAsCurrent(); ContextSelector selector(cu);
long long* force = (long long*) cu.getPinnedBuffer(); long long* force = (long long*) cu.getPinnedBuffer();
cu.getForce().download(force); cu.getForce().download(force);
const vector<int>& order = cu.getAtomIndex(); const vector<int>& order = cu.getAtomIndex();
...@@ -284,6 +299,7 @@ void HipUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& for ...@@ -284,6 +299,7 @@ void HipUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& for
} }
void HipUpdateStateDataKernel::getEnergyParameterDerivatives(ContextImpl& context, map<string, double>& derivs) { void HipUpdateStateDataKernel::getEnergyParameterDerivatives(ContextImpl& context, map<string, double>& derivs) {
ContextSelector selector(cu);
const vector<string>& paramDerivNames = cu.getEnergyParamDerivNames(); const vector<string>& paramDerivNames = cu.getEnergyParamDerivNames();
int numDerivs = paramDerivNames.size(); int numDerivs = paramDerivNames.size();
if (numDerivs == 0) if (numDerivs == 0)
...@@ -337,15 +353,15 @@ void HipUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const ...@@ -337,15 +353,15 @@ void HipUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const
} }
void HipUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) { void HipUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
cu.setAsCurrent(); ContextSelector selector(cu);
int version = 3; int version = 3;
stream.write((char*) &version, sizeof(int)); stream.write((char*) &version, sizeof(int));
int precision = (cu.getUseDoublePrecision() ? 2 : cu.getUseMixedPrecision() ? 1 : 0); int precision = (cu.getUseDoublePrecision() ? 2 : cu.getUseMixedPrecision() ? 1 : 0);
stream.write((char*) &precision, sizeof(int)); stream.write((char*) &precision, sizeof(int));
double time = cu.getTime(); double time = cu.getTime();
stream.write((char*) &time, sizeof(double)); stream.write((char*) &time, sizeof(double));
int stepCount = cu.getStepCount(); long long stepCount = cu.getStepCount();
stream.write((char*) &stepCount, sizeof(int)); stream.write((char*) &stepCount, sizeof(long long));
int stepsSinceReorder = cu.getStepsSinceReorder(); int stepsSinceReorder = cu.getStepsSinceReorder();
stream.write((char*) &stepsSinceReorder, sizeof(int)); stream.write((char*) &stepsSinceReorder, sizeof(int));
char* buffer = (char*) cu.getPinnedBuffer(); char* buffer = (char*) cu.getPinnedBuffer();
...@@ -367,7 +383,7 @@ void HipUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& s ...@@ -367,7 +383,7 @@ void HipUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& s
} }
void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) { void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
cu.setAsCurrent(); ContextSelector selector(cu);
int version; int version;
stream.read((char*) &version, sizeof(int)); stream.read((char*) &version, sizeof(int));
if (version != 3) if (version != 3)
...@@ -379,8 +395,9 @@ void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& str ...@@ -379,8 +395,9 @@ void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& str
throw OpenMMException("Checkpoint was created with a different numeric precision"); throw OpenMMException("Checkpoint was created with a different numeric precision");
double time; double time;
stream.read((char*) &time, sizeof(double)); stream.read((char*) &time, sizeof(double));
int stepCount, stepsSinceReorder; long long stepCount;
stream.read((char*) &stepCount, sizeof(int)); stream.read((char*) &stepCount, sizeof(long long));
int stepsSinceReorder;
stream.read((char*) &stepsSinceReorder, sizeof(int)); stream.read((char*) &stepsSinceReorder, sizeof(int));
vector<HipContext*>& contexts = cu.getPlatformData().contexts; vector<HipContext*>& contexts = cu.getPlatformData().contexts;
for (auto ctx : contexts) { for (auto ctx : contexts) {
...@@ -408,6 +425,7 @@ void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& str ...@@ -408,6 +425,7 @@ void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& str
SimTKOpenMMUtilities::loadCheckpoint(stream); SimTKOpenMMUtilities::loadCheckpoint(stream);
for (auto listener : cu.getReorderListeners()) for (auto listener : cu.getReorderListeners())
listener->execute(); listener->execute();
cu.validateAtomOrder();
} }
class HipCalcNonbondedForceKernel::ForceInfo : public HipForceInfo { class HipCalcNonbondedForceKernel::ForceInfo : public HipForceInfo {
...@@ -448,7 +466,7 @@ public: ...@@ -448,7 +466,7 @@ public:
forceTemp.initialize<float4>(cu, cu.getNumAtoms(), "PmeForce"); forceTemp.initialize<float4>(cu, cu.getNumAtoms(), "PmeForce");
} }
float* getPosq() { float* getPosq() {
cu.setAsCurrent(); ContextSelector selector(cu);
cu.getPosq().download(posq); cu.getPosq().download(posq);
return (float*) &posq[0]; return (float*) &posq[0];
} }
...@@ -532,7 +550,7 @@ private: ...@@ -532,7 +550,7 @@ private:
}; };
HipCalcNonbondedForceKernel::~HipCalcNonbondedForceKernel() { HipCalcNonbondedForceKernel::~HipCalcNonbondedForceKernel() {
cu.setAsCurrent(); ContextSelector selector(cu);
if (sort != NULL) if (sort != NULL)
delete sort; delete sort;
if (fft != NULL) if (fft != NULL)
...@@ -551,7 +569,7 @@ HipCalcNonbondedForceKernel::~HipCalcNonbondedForceKernel() { ...@@ -551,7 +569,7 @@ HipCalcNonbondedForceKernel::~HipCalcNonbondedForceKernel() {
} }
void HipCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) { void HipCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
cu.setAsCurrent(); ContextSelector selector(cu);
int forceIndex; int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex) for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
; ;
...@@ -650,8 +668,14 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -650,8 +668,14 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0); hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets) if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1"; paramsDefines["HAS_OFFSETS"] = "1";
if (force.getNumParticleParameterOffsets() > 0)
paramsDefines["HAS_PARTICLE_OFFSETS"] = "1";
if (force.getNumExceptionParameterOffsets() > 0)
paramsDefines["HAS_EXCEPTION_OFFSETS"] = "1";
if (usePosqCharges) if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1"; paramsDefines["USE_POSQ_CHARGES"] = "1";
if (doLJPME)
paramsDefines["INCLUDE_LJPME_EXCEPTIONS"] = "1";
if (nonbondedMethod == Ewald) { if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters. // Compute the Ewald parameters.
...@@ -703,8 +727,16 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -703,8 +727,16 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI)); defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1"; defines["USE_EWALD"] = "1";
defines["DO_LJPME"] = doLJPME ? "1" : "0"; defines["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME) if (doLJPME) {
defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha); defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
double dar2 = dalphaR*dalphaR;
double dar4 = dar2*dar2;
double multShift6 = -invRCut6*(1.0 - exp(-dar2) * (1.0 + dar2 + 0.5*dar4));
defines["INVCUT6"] = cu.doubleToString(invRCut6);
defines["MULTSHIFT6"] = cu.doubleToString(multShift6);
}
if (cu.getContextIndex() == 0) { if (cu.getContextIndex() == 0) {
paramsDefines["INCLUDE_EWALD"] = "1"; paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI)); paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
...@@ -766,13 +798,6 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -766,13 +798,6 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha)); pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha));
pmeDefines["USE_LJPME"] = "1"; pmeDefines["USE_LJPME"] = "1";
pmeDefines["CHARGE_FROM_SIGEPS"] = "1"; pmeDefines["CHARGE_FROM_SIGEPS"] = "1";
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
double dar2 = dalphaR*dalphaR;
double dar4 = dar2*dar2;
double multShift6 = -invRCut6*(1.0 - exp(-dar2) * (1.0 + dar2 + 0.5*dar4));
defines["INVCUT6"] = cu.doubleToString(invRCut6);
defines["MULTSHIFT6"] = cu.doubleToString(multShift6);
module = cu.createModule(HipKernelSources::vectorOps+CommonKernelSources::pme, pmeDefines); module = cu.createModule(HipKernelSources::vectorOps+CommonKernelSources::pme, pmeDefines);
pmeDispersionFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge"); pmeDispersionFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
pmeDispersionGridIndexKernel = cu.getKernel(module, "findAtomGridIndex"); pmeDispersionGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
...@@ -813,8 +838,8 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -813,8 +838,8 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
if (usePmeStream) { if (usePmeStream) {
CHECK_RESULT(hipStreamCreateWithFlags(&pmeStream, hipStreamNonBlocking), "Error creating stream for NonbondedForce"); CHECK_RESULT(hipStreamCreateWithFlags(&pmeStream, hipStreamNonBlocking), "Error creating stream for NonbondedForce");
CHECK_RESULT(hipEventCreateWithFlags(&pmeSyncEvent, hipEventDisableTiming), "Error creating event for NonbondedForce"); CHECK_RESULT(hipEventCreateWithFlags(&pmeSyncEvent, cu.getEventFlags()), "Error creating event for NonbondedForce");
CHECK_RESULT(hipEventCreateWithFlags(&paramsSyncEvent, hipEventDisableTiming), "Error creating event for NonbondedForce"); CHECK_RESULT(hipEventCreateWithFlags(&paramsSyncEvent, cu.getEventFlags()), "Error creating event for NonbondedForce");
int recipForceGroup = force.getReciprocalSpaceForceGroup(); int recipForceGroup = force.getReciprocalSpaceForceGroup();
if (recipForceGroup < 0) if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup(); recipForceGroup = force.getForceGroup();
...@@ -939,6 +964,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -939,6 +964,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
replacements["USE_PERIODIC"] = force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0"; replacements["USE_PERIODIC"] = force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0";
if (doLJPME) if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha); replacements["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
if (force.getIncludeDirectSpace())
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::pmeExclusions, replacements), force.getForceGroup()); cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::pmeExclusions, replacements), force.getForceGroup());
} }
} }
...@@ -959,7 +985,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -959,7 +985,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
replacements["CHARGE1"] = prefix+"charge1"; replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2"; replacements["CHARGE2"] = prefix+"charge2";
} }
if (hasCoulomb) if (hasCoulomb && !usePosqCharges)
cu.getNonbondedUtilities().addParameter(HipNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer())); cu.getNonbondedUtilities().addParameter(HipNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer()));
sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon"); sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
if (hasLJ) { if (hasLJ) {
...@@ -968,6 +994,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -968,6 +994,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
cu.getNonbondedUtilities().addParameter(HipNonbondedUtilities::ParameterInfo(prefix+"sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon.getDevicePointer())); cu.getNonbondedUtilities().addParameter(HipNonbondedUtilities::ParameterInfo(prefix+"sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon.getDevicePointer()));
} }
source = cu.replaceStrings(source, replacements); source = cu.replaceStrings(source, replacements);
if (force.getIncludeDirectSpace())
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true); cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
// Initialize the exceptions. // Initialize the exceptions.
...@@ -993,13 +1020,14 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -993,13 +1020,14 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
map<string, string> replacements; map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (usePeriodic && force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0"); replacements["APPLY_PERIODIC"] = (usePeriodic && force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4"); replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4");
if (force.getIncludeDirectSpace())
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::nonbondedExceptions, replacements), force.getForceGroup()); cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
} }
// Initialize parameter offsets. // Initialize parameter offsets.
vector<vector<float4> > particleOffsetVec(force.getNumParticles()); vector<vector<float4> > particleOffsetVec(force.getNumParticles());
vector<vector<float4> > exceptionOffsetVec(force.getNumExceptions()); vector<vector<float4> > exceptionOffsetVec(numExceptions);
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) { for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string param; string param;
int particle; int particle;
...@@ -1020,6 +1048,9 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -1020,6 +1048,9 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
int exception; int exception;
double charge, sigma, epsilon; double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon); force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
int index = exceptionIndex[exception];
if (index < startIndex || index >= endIndex)
continue;
auto paramPos = find(paramNames.begin(), paramNames.end(), param); auto paramPos = find(paramNames.begin(), paramNames.end(), param);
int paramIndex; int paramIndex;
if (paramPos == paramNames.end()) { if (paramPos == paramNames.end()) {
...@@ -1028,13 +1059,11 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -1028,13 +1059,11 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
} }
else else
paramIndex = paramPos-paramNames.begin(); paramIndex = paramPos-paramNames.begin();
exceptionOffsetVec[exceptionIndex[exception]].push_back(make_float4(charge, sigma, epsilon, paramIndex)); exceptionOffsetVec[index-startIndex].push_back(make_float4(charge, sigma, epsilon, paramIndex));
} }
paramValues.resize(paramNames.size(), 0.0); paramValues.resize(paramNames.size(), 0.0);
particleParamOffsets.initialize<float4>(cu, max(force.getNumParticleParameterOffsets(), 1), "particleParamOffsets"); particleParamOffsets.initialize<float4>(cu, max(force.getNumParticleParameterOffsets(), 1), "particleParamOffsets");
exceptionParamOffsets.initialize<float4>(cu, max(force.getNumExceptionParameterOffsets(), 1), "exceptionParamOffsets");
particleOffsetIndices.initialize<int>(cu, cu.getPaddedNumAtoms()+1, "particleOffsetIndices"); particleOffsetIndices.initialize<int>(cu, cu.getPaddedNumAtoms()+1, "particleOffsetIndices");
exceptionOffsetIndices.initialize<int>(cu, force.getNumExceptions()+1, "exceptionOffsetIndices");
vector<int> particleOffsetIndicesVec, exceptionOffsetIndicesVec; vector<int> particleOffsetIndicesVec, exceptionOffsetIndicesVec;
vector<float4> p, e; vector<float4> p, e;
for (int i = 0; i < particleOffsetVec.size(); i++) { for (int i = 0; i < particleOffsetVec.size(); i++) {
...@@ -1054,7 +1083,9 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -1054,7 +1083,9 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
particleParamOffsets.upload(p); particleParamOffsets.upload(p);
particleOffsetIndices.upload(particleOffsetIndicesVec); particleOffsetIndices.upload(particleOffsetIndicesVec);
} }
if (force.getNumExceptionParameterOffsets() > 0) { exceptionParamOffsets.initialize<float4>(cu, max((int) e.size(), 1), "exceptionParamOffsets");
exceptionOffsetIndices.initialize<int>(cu, exceptionOffsetIndicesVec.size(), "exceptionOffsetIndices");
if (e.size() > 0) {
exceptionParamOffsets.upload(e); exceptionParamOffsets.upload(e);
exceptionOffsetIndices.upload(exceptionOffsetIndicesVec); exceptionOffsetIndices.upload(exceptionOffsetIndicesVec);
} }
...@@ -1075,6 +1106,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -1075,6 +1106,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) { double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
// Update particle and exception parameters. // Update particle and exception parameters.
ContextSelector selector(cu);
bool paramChanged = false; bool paramChanged = false;
for (int i = 0; i < paramNames.size(); i++) { for (int i = 0; i < paramNames.size(); i++) {
double value = context.getParameter(paramNames[i]); double value = context.getParameter(paramNames[i]);
...@@ -1089,7 +1121,7 @@ double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -1089,7 +1121,7 @@ double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
} }
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0); double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (recomputeParams || hasOffsets) { if (recomputeParams || hasOffsets) {
bool computeSelfEnergy = (includeEnergy && includeReciprocal); int computeSelfEnergy = (includeEnergy && includeReciprocal);
int numAtoms = cu.getPaddedNumAtoms(); int numAtoms = cu.getPaddedNumAtoms();
vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms, vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms,
&baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(), &baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
...@@ -1258,7 +1290,7 @@ double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -1258,7 +1290,7 @@ double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force) { void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force) {
// Make sure the new parameters are acceptable. // Make sure the new parameters are acceptable.
cu.setAsCurrent(); ContextSelector selector(cu);
if (force.getNumParticles() != cu.getNumAtoms()) if (force.getNumParticles() != cu.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed"); throw OpenMMException("updateParametersInContext: The number of particles has changed");
if (!hasCoulomb || !hasLJ) { if (!hasCoulomb || !hasLJ) {
...@@ -1271,20 +1303,28 @@ void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -1271,20 +1303,28 @@ void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Lennard-Jones interactions, because all epsilons were originally 0"); throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Lennard-Jones interactions, because all epsilons were originally 0");
} }
} }
set<int> exceptionsWithOffsets;
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
exceptionsWithOffsets.insert(exception);
}
vector<int> exceptions; vector<int> exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) { for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2; int particle1, particle2;
double chargeProd, sigma, epsilon; double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon); force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
if (exceptionAtoms.size() > exceptions.size() && make_pair(particle1, particle2) == exceptionAtoms[exceptions.size()]) if (chargeProd != 0.0 || epsilon != 0.0 || exceptionsWithOffsets.find(i) != exceptionsWithOffsets.end())
exceptions.push_back(i); exceptions.push_back(i);
else if (chargeProd != 0.0 || epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
} }
int numContexts = cu.getPlatformData().contexts.size(); int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*exceptions.size()/numContexts; int startIndex = cu.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts; int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex; int numExceptions = endIndex-startIndex;
if (numExceptions != exceptionAtoms.size())
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
// Record the per-particle parameters. // Record the per-particle parameters.
...@@ -1300,11 +1340,13 @@ void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -1300,11 +1340,13 @@ void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Record the exceptions. // Record the exceptions.
if (numExceptions > 0) { if (numExceptions > 0) {
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<float4> baseExceptionParamsVec(numExceptions); vector<float4> baseExceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) { for (int i = 0; i < numExceptions; i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon; double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon); force.getExceptionParameters(exceptions[startIndex+i], particle1, particle2, chargeProd, sigma, epsilon);
if (make_pair(particle1, particle2) != exceptionAtoms[i])
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
baseExceptionParamsVec[i] = make_float4(chargeProd, sigma, epsilon, 0); baseExceptionParamsVec[i] = make_float4(chargeProd, sigma, epsilon, 0);
} }
baseExceptionParams.upload(baseExceptionParamsVec); baseExceptionParams.upload(baseExceptionParamsVec);
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2009-2018 Stanford University and the Authors. * * Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -439,6 +439,10 @@ void HipNonbondedUtilities::computeInteractions(int forceGroups, bool includeFor ...@@ -439,6 +439,10 @@ void HipNonbondedUtilities::computeInteractions(int forceGroups, bool includeFor
bool HipNonbondedUtilities::updateNeighborListSize() { bool HipNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff) if (!useCutoff)
return false; return false;
if (context.getStepsSinceReorder() == 0)
tilesAfterReorder = pinnedCountBuffer[0];
else if (context.getStepsSinceReorder() > 25 && pinnedCountBuffer[0] > 1.1*tilesAfterReorder)
context.forceReorder();
if (pinnedCountBuffer[0] <= maxTiles && pinnedCountBuffer[1] <= maxSinglePairs) if (pinnedCountBuffer[0] <= maxTiles && pinnedCountBuffer[1] <= maxSinglePairs)
return false; return false;
...@@ -446,12 +450,13 @@ bool HipNonbondedUtilities::updateNeighborListSize() { ...@@ -446,12 +450,13 @@ bool HipNonbondedUtilities::updateNeighborListSize() {
// this from happening in the future. // this from happening in the future.
if (pinnedCountBuffer[0] > maxTiles) { if (pinnedCountBuffer[0] > maxTiles) {
maxTiles = (int) (1.2*pinnedCountBuffer[0]); maxTiles = (unsigned int) (1.2*pinnedCountBuffer[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2; unsigned int numBlocks = context.getNumAtomBlocks();
int totalTiles = numBlocks*(numBlocks+1)/2;
if (maxTiles > totalTiles) if (maxTiles > totalTiles)
maxTiles = totalTiles; maxTiles = totalTiles;
interactingTiles.resize(maxTiles); interactingTiles.resize(maxTiles);
interactingAtoms.resize(HipContext::TileSize*maxTiles); interactingAtoms.resize(HipContext::TileSize*(size_t) maxTiles);
if (forceArgs.size() > 0) if (forceArgs.size() > 0)
forceArgs[7] = &interactingTiles.getDevicePointer(); forceArgs[7] = &interactingTiles.getDevicePointer();
findInteractingBlocksArgs[6] = &interactingTiles.getDevicePointer(); findInteractingBlocksArgs[6] = &interactingTiles.getDevicePointer();
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2019 Stanford University and the Authors. * * Portions copyright (c) 2011-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "HipParallelKernels.h" #include "HipParallelKernels.h"
#include "HipKernelSources.h" #include "HipKernelSources.h"
#include "openmm/common/ContextSelector.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -70,7 +71,7 @@ public: ...@@ -70,7 +71,7 @@ public:
void execute() { void execute() {
// Copy coordinates over to this device and execute the kernel. // Copy coordinates over to this device and execute the kernel.
cu.setAsCurrent(); ContextSelector selector(cu);
if (cu.getContextIndex() > 0) { if (cu.getContextIndex() > 0) {
hipStreamWaitEvent(cu.getCurrentStream(), event, 0); hipStreamWaitEvent(cu.getCurrentStream(), event, 0);
if (!cu.getPlatformData().peerAccessSupported) if (!cu.getPlatformData().peerAccessSupported)
...@@ -94,13 +95,16 @@ private: ...@@ -94,13 +95,16 @@ private:
class HipParallelCalcForcesAndEnergyKernel::FinishComputationTask : public HipContext::WorkTask { class HipParallelCalcForcesAndEnergyKernel::FinishComputationTask : public HipContext::WorkTask {
public: public:
FinishComputationTask(ContextImpl& context, HipContext& cu, HipCalcForcesAndEnergyKernel& kernel, FinishComputationTask(ContextImpl& context, HipContext& cu, HipCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, HipArray& contextForces, bool& valid, int2& interactionCount) : bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, HipArray& contextForces,
bool& valid, int2& interactionCount, hipStream_t stream, hipEvent_t event, hipEvent_t localEvent) :
context(context), cu(cu), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy), context(context), cu(cu), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy),
completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces), valid(valid), interactionCount(interactionCount) { completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces), valid(valid), interactionCount(interactionCount),
stream(stream), event(event), localEvent(localEvent) {
} }
void execute() { void execute() {
// Execute the kernel, then download forces. // Execute the kernel, then download forces.
ContextSelector selector(cu);
energy += kernel.finishComputation(context, includeForce, includeEnergy, groups, valid); energy += kernel.finishComputation(context, includeForce, includeEnergy, groups, valid);
if (cu.getComputeForceCount() < 200) { if (cu.getComputeForceCount() < 200) {
// Record timing information for load balancing. Since this takes time, only do it at the start of the simulation. // Record timing information for load balancing. Since this takes time, only do it at the start of the simulation.
...@@ -110,13 +114,16 @@ public: ...@@ -110,13 +114,16 @@ public:
} }
if (includeForce) { if (includeForce) {
if (cu.getContextIndex() > 0) { if (cu.getContextIndex() > 0) {
hipEventRecord(localEvent, cu.getCurrentStream());
hipStreamWaitEvent(stream, localEvent, 0);
int numAtoms = cu.getPaddedNumAtoms(); int numAtoms = cu.getPaddedNumAtoms();
if (cu.getPlatformData().peerAccessSupported) { if (cu.getPlatformData().peerAccessSupported) {
int numBytes = numAtoms*3*sizeof(long long); int numBytes = numAtoms*3*sizeof(long long);
int offset = (cu.getContextIndex()-1)*numBytes; int offset = (cu.getContextIndex()-1)*numBytes;
HipContext& context0 = *cu.getPlatformData().contexts[0]; HipContext& context0 = *cu.getPlatformData().contexts[0];
CHECK_RESULT(hipMemcpy(static_cast<char*>(contextForces.getDevicePointer())+offset, CHECK_RESULT(hipMemcpyAsync(static_cast<char*>(contextForces.getDevicePointer())+offset,
cu.getForce().getDevicePointer(), numBytes, hipMemcpyDeviceToDevice), "Error copying forces"); cu.getForce().getDevicePointer(), numBytes, hipMemcpyDeviceToDevice, stream), "Error copying forces");
hipEventRecord(event, stream);
} }
else else
cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]); cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]);
...@@ -140,6 +147,9 @@ private: ...@@ -140,6 +147,9 @@ private:
HipArray& contextForces; HipArray& contextForces;
bool& valid; bool& valid;
int2& interactionCount; int2& interactionCount;
hipStream_t stream;
hipEvent_t event;
hipEvent_t localEvent;
}; };
HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, HipPlatform::PlatformData& data) : HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, HipPlatform::PlatformData& data) :
...@@ -150,20 +160,25 @@ HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(strin ...@@ -150,20 +160,25 @@ HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(strin
} }
HipParallelCalcForcesAndEnergyKernel::~HipParallelCalcForcesAndEnergyKernel() { HipParallelCalcForcesAndEnergyKernel::~HipParallelCalcForcesAndEnergyKernel() {
data.contexts[0]->setAsCurrent(); ContextSelector selector(*data.contexts[0]);
if (pinnedPositionBuffer != NULL) if (pinnedPositionBuffer != NULL)
hipHostFree(pinnedPositionBuffer); hipHostFree(pinnedPositionBuffer);
if (pinnedForceBuffer != NULL) if (pinnedForceBuffer != NULL)
hipHostFree(pinnedForceBuffer); hipHostFree(pinnedForceBuffer);
hipEventDestroy(event); hipEventDestroy(event);
hipStreamDestroy(peerCopyStream); for (int i = 0; i < peerCopyEvent.size(); i++)
hipEventDestroy(peerCopyEvent[i]);
for (int i = 0; i < peerCopyEventLocal.size(); i++)
hipEventDestroy(peerCopyEventLocal[i]);
for (int i = 0; i < peerCopyStream.size(); i++)
hipStreamDestroy(peerCopyStream[i]);
if (interactionCounts != NULL) if (interactionCounts != NULL)
hipHostFree(interactionCounts); hipHostFree(interactionCounts);
} }
void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) { void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
HipContext& cu = *data.contexts[0]; HipContext& cu = *data.contexts[0];
cu.setAsCurrent(); ContextSelector selector(cu);
hipModule_t module = cu.createModule(HipKernelSources::parallel); hipModule_t module = cu.createModule(HipKernelSources::parallel);
sumKernel = cu.getKernel(module, "sumForces"); sumKernel = cu.getKernel(module, "sumForces");
int numContexts = data.contexts.size(); int numContexts = data.contexts.size();
...@@ -171,14 +186,25 @@ void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -171,14 +186,25 @@ void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
getKernel(i).initialize(system); getKernel(i).initialize(system);
for (int i = 0; i < numContexts; i++) for (int i = 0; i < numContexts; i++)
contextNonbondedFractions[i] = 1/(double) numContexts; contextNonbondedFractions[i] = 1/(double) numContexts;
CHECK_RESULT(hipEventCreateWithFlags(&event, 0), "Error creating event"); CHECK_RESULT(hipEventCreateWithFlags(&event, cu.getEventFlags()), "Error creating event");
CHECK_RESULT(hipStreamCreateWithFlags(&peerCopyStream, hipStreamNonBlocking), "Error creating stream"); peerCopyEvent.resize(numContexts);
peerCopyEventLocal.resize(numContexts);
peerCopyStream.resize(numContexts);
for (int i = 0; i < numContexts; i++) {
CHECK_RESULT(hipEventCreateWithFlags(&peerCopyEvent[i], cu.getEventFlags()), "Error creating event");
CHECK_RESULT(hipStreamCreateWithFlags(&peerCopyStream[i], hipStreamNonBlocking), "Error creating stream");
}
for (int i = 0; i < numContexts; i++) {
HipContext& cuLocal = *data.contexts[i];
ContextSelector selectorLocal(cuLocal);
CHECK_RESULT(hipEventCreateWithFlags(&peerCopyEventLocal[i], cu.getEventFlags()), "Error creating event");
}
CHECK_RESULT(hipHostMalloc((void**) &interactionCounts, numContexts*sizeof(int2), 0), "Error creating interaction counts buffer"); CHECK_RESULT(hipHostMalloc((void**) &interactionCounts, numContexts*sizeof(int2), 0), "Error creating interaction counts buffer");
} }
void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) { void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
HipContext& cu = *data.contexts[0]; HipContext& cu = *data.contexts[0];
cu.setAsCurrent(); ContextSelector selector(cu);
if (!contextForces.isInitialized()) { if (!contextForces.isInitialized()) {
contextForces.initialize<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces"); contextForces.initialize<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces");
CHECK_RESULT(hipHostMalloc((void**) &pinnedForceBuffer, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms()*sizeof(long long), hipHostMallocPortable), "Error allocating pinned memory"); CHECK_RESULT(hipHostMalloc((void**) &pinnedForceBuffer, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms()*sizeof(long long), hipHostMallocPortable), "Error allocating pinned memory");
...@@ -194,36 +220,44 @@ void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context ...@@ -194,36 +220,44 @@ void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context
else { else {
int numBytes = cu.getPosq().getSize()*cu.getPosq().getElementSize(); int numBytes = cu.getPosq().getSize()*cu.getPosq().getElementSize();
hipEventRecord(event, cu.getCurrentStream()); hipEventRecord(event, cu.getCurrentStream());
hipStreamWaitEvent(peerCopyStream, event, 0); for (int i = 1; i < (int) data.contexts.size(); i++) {
for (int i = 1; i < (int) data.contexts.size(); i++) hipStreamWaitEvent(peerCopyStream[i], event, 0);
CHECK_RESULT(hipMemcpyAsync( CHECK_RESULT(hipMemcpyAsync(
data.contexts[i]->getPosq().getDevicePointer(), data.contexts[i]->getPosq().getDevicePointer(),
cu.getPosq().getDevicePointer(), numBytes, cu.getPosq().getDevicePointer(), numBytes,
hipMemcpyDeviceToDevice, peerCopyStream), "Error copying positions"); hipMemcpyDeviceToDevice, peerCopyStream[i]), "Error copying positions");
hipEventRecord(event, peerCopyStream); hipEventRecord(peerCopyEvent[i], peerCopyStream[i]);
}
} }
for (int i = 0; i < (int) data.contexts.size(); i++) { for (int i = 0; i < (int) data.contexts.size(); i++) {
data.contextEnergy[i] = 0.0; data.contextEnergy[i] = 0.0;
HipContext& cu = *data.contexts[i]; HipContext& cu = *data.contexts[i];
ComputeContext::WorkThread& thread = cu.getWorkThread(); ComputeContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, event, interactionCounts[i])); hipEvent_t waitEvent = (cu.getPlatformData().peerAccessSupported ? peerCopyEvent[i] : event);
thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, waitEvent, interactionCounts[i]));
} }
data.syncContexts();
} }
double HipParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) { double HipParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
for (int i = 0; i < (int) data.contexts.size(); i++) { for (int i = 0; i < (int) data.contexts.size(); i++) {
HipContext& cu = *data.contexts[i]; HipContext& cu = *data.contexts[i];
ComputeContext::WorkThread& thread = cu.getWorkThread(); ComputeContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, contextForces, valid, interactionCounts[i])); thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i],
pinnedForceBuffer, contextForces, valid, interactionCounts[i], peerCopyStream[i], peerCopyEvent[i], peerCopyEventLocal[i]));
} }
data.syncContexts(); data.syncContexts();
HipContext& cu = *data.contexts[0];
ContextSelector selector(cu);
if (cu.getPlatformData().peerAccessSupported)
for (int i = 1; i < data.contexts.size(); i++)
hipStreamWaitEvent(cu.getCurrentStream(), peerCopyEvent[i], 0);
double energy = 0.0; double energy = 0.0;
for (int i = 0; i < (int) data.contextEnergy.size(); i++) for (int i = 0; i < (int) data.contextEnergy.size(); i++)
energy += data.contextEnergy[i]; energy += data.contextEnergy[i];
if (includeForce && valid) { if (includeForce && valid) {
// Sum the forces from all devices. // Sum the forces from all devices.
HipContext& cu = *data.contexts[0];
if (!cu.getPlatformData().peerAccessSupported) if (!cu.getPlatformData().peerAccessSupported)
contextForces.upload(pinnedForceBuffer, false); contextForces.upload(pinnedForceBuffer, false);
int bufferSize = 3*cu.getPaddedNumAtoms(); int bufferSize = 3*cu.getPaddedNumAtoms();
......
...@@ -80,7 +80,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re ...@@ -80,7 +80,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
extern __shared__ mixed tempBuffer[]; extern __shared__ mixed tempBuffer[];
const unsigned int thread = threadIdx.x; const unsigned int thread = threadIdx.x;
mixed sum = 0; mixed sum = 0;
for (unsigned int index = thread; index < bufferSize; index += blockDim.x) for (unsigned int index = blockDim.x*blockIdx.x+threadIdx.x; index < bufferSize; index += blockDim.x*gridDim.x)
sum += energyBuffer[index]; sum += energyBuffer[index];
tempBuffer[thread] = sum; tempBuffer[thread] = sum;
for (int i = 1; i < workGroupSize; i *= 2) { for (int i = 1; i < workGroupSize; i *= 2) {
...@@ -89,7 +89,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re ...@@ -89,7 +89,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
tempBuffer[thread] += tempBuffer[thread+i]; tempBuffer[thread] += tempBuffer[thread+i];
} }
if (thread == 0) if (thread == 0)
*result = tempBuffer[0]; result[blockIdx.x] = tempBuffer[0];
} }
/** /**
......
...@@ -41,4 +41,6 @@ OpenMM::HipPlatform platform; ...@@ -41,4 +41,6 @@ OpenMM::HipPlatform platform;
void initializeTests(int argc, char* argv[]) { void initializeTests(int argc, char* argv[]) {
if (argc > 1) if (argc > 1)
platform.setPropertyDefaultValue("Precision", std::string(argv[1])); platform.setPropertyDefaultValue("Precision", std::string(argv[1]));
if (argc > 2)
platform.setPropertyDefaultValue("DeviceIndex", std::string(argv[2]));
} }
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. * * Portions copyright (c) 2011-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
* * * *
...@@ -38,12 +38,16 @@ ...@@ -38,12 +38,16 @@
#include "HipArray.h" #include "HipArray.h"
#include "HipContext.h" #include "HipContext.h"
#include "HipFFT3D.h" #include "HipFFT3D.h"
#include "fftpack.h"
#include "sfmt/SFMT.h" #include "sfmt/SFMT.h"
#include "openmm/System.h" #include "openmm/System.h"
#include <complex>
#include <iostream> #include <iostream>
#include <cmath> #include <cmath>
#include <set> #include <set>
#ifdef _MSC_VER
#define POCKETFFT_NO_VECTORS
#endif
#include "pocketfft_hdronly.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
...@@ -67,19 +71,19 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e ...@@ -67,19 +71,19 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt); init_gen_rand(0, sfmt);
vector<Real2> original(xsize*ysize*zsize); vector<Real2> original(xsize*ysize*zsize);
vector<t_complex> reference(original.size()); vector<complex<double>> reference(original.size());
for (int i = 0; i < (int) original.size(); i++) { for (int i = 0; i < (int) original.size(); i++) {
Real2 value; Real2 value;
value.x = (float) genrand_real2(sfmt); value.x = (float) genrand_real2(sfmt);
value.y = (float) genrand_real2(sfmt); value.y = (float) genrand_real2(sfmt);
original[i] = value; original[i] = value;
reference[i] = t_complex(value.x, value.y); reference[i] = complex<double>(value.x, value.y);
} }
for (int i = 0; i < (int) reference.size(); i++) { for (int i = 0; i < (int) reference.size(); i++) {
if (realToComplex) if (realToComplex)
reference[i] = t_complex(i%2 == 0 ? original[i/2].x : original[i/2].y, 0); reference[i] = complex<double>(i%2 == 0 ? original[i/2].x : original[i/2].y, 0);
else else
reference[i] = t_complex(original[i].x, original[i].y); reference[i] = complex<double>(original[i].x, original[i].y);
} }
HipArray grid1(context, original.size(), sizeof(Real2), "grid1"); HipArray grid1(context, original.size(), sizeof(Real2), "grid1");
HipArray grid2(context, original.size(), sizeof(Real2), "grid2"); HipArray grid2(context, original.size(), sizeof(Real2), "grid2");
...@@ -91,19 +95,21 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e ...@@ -91,19 +95,21 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e
fft.execFFT(true); fft.execFFT(true);
vector<Real2> result; vector<Real2> result;
grid2.download(result); grid2.download(result);
fftpack_t plan; vector<size_t> shape = {(size_t) xsize, (size_t) ysize, (size_t) zsize};
fftpack_init_3d(&plan, xsize, ysize, zsize); vector<size_t> axes = {0, 1, 2};
fftpack_exec_3d(plan, FFTPACK_FORWARD, &reference[0], &reference[0]); vector<ptrdiff_t> stride = {(ptrdiff_t) (ysize*zsize*sizeof(complex<double>)),
(ptrdiff_t) (zsize*sizeof(complex<double>)),
(ptrdiff_t) sizeof(complex<double>)};
pocketfft::c2c(shape, stride, stride, axes, true, reference.data(), reference.data(), 1.0);
int outputZSize = (realToComplex ? zsize/2+1 : zsize); int outputZSize = (realToComplex ? zsize/2+1 : zsize);
for (int x = 0; x < xsize; x++) for (int x = 0; x < xsize; x++)
for (int y = 0; y < ysize; y++) for (int y = 0; y < ysize; y++)
for (int z = 0; z < outputZSize; z++) { for (int z = 0; z < outputZSize; z++) {
int index1 = x*ysize*zsize + y*zsize + z; int index1 = x*ysize*zsize + y*zsize + z;
int index2 = x*ysize*outputZSize + y*outputZSize + z; int index2 = x*ysize*outputZSize + y*outputZSize + z;
ASSERT_EQUAL_TOL(reference[index1].re, result[index2].x, 1e-3 * eps); ASSERT_EQUAL_TOL(reference[index1].real(), result[index2].x, 1e-3 * eps);
ASSERT_EQUAL_TOL(reference[index1].im, result[index2].y, 1e-3 * eps); ASSERT_EQUAL_TOL(reference[index1].imag(), result[index2].y, 1e-3 * eps);
} }
fftpack_destroy(plan);
// Perform a backward transform and see if we get the original values. // Perform a backward transform and see if we get the original values.
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. * * Portions copyright (c) 2008-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. * * Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis * * Authors: Peter Eastman, Nicholas Curtis *
* Contributors: * * Contributors: *
...@@ -50,12 +50,18 @@ void testParallelComputation(NonbondedForce::NonbondedMethod method) { ...@@ -50,12 +50,18 @@ void testParallelComputation(NonbondedForce::NonbondedMethod method) {
vector<Vec3> positions(numParticles); vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
positions[i] = Vec3(5*genrand_real2(sfmt), 5*genrand_real2(sfmt), 5*genrand_real2(sfmt)); positions[i] = Vec3(5*genrand_real2(sfmt), 5*genrand_real2(sfmt), 5*genrand_real2(sfmt));
force->addGlobalParameter("scale", 0.5);
for (int i = 0; i < numParticles; ++i) for (int i = 0; i < numParticles; ++i)
for (int j = 0; j < i; ++j) { for (int j = 0; j < i; ++j) {
Vec3 delta = positions[i]-positions[j]; Vec3 delta = positions[i]-positions[j];
if (delta.dot(delta) < 0.1) if (delta.dot(delta) < 0.1) {
force->addException(i, j, 0, 1, 0); force->addException(i, j, 0, 1, 0);
} }
else if (delta.dot(delta) < 0.2) {
int index = force->addException(i, j, 0.5, 1, 1.0);
force->addExceptionParameterOffset("scale", index, 0.5, 0.4, 0.3);
}
}
// Create two contexts, one with a single device and one with two devices. // Create two contexts, one with a single device and one with two devices.
...@@ -179,6 +185,7 @@ void runPlatformTests() { ...@@ -179,6 +185,7 @@ void runPlatformTests() {
testParallelComputation(NonbondedForce::NoCutoff); testParallelComputation(NonbondedForce::NoCutoff);
testParallelComputation(NonbondedForce::Ewald); testParallelComputation(NonbondedForce::Ewald);
testParallelComputation(NonbondedForce::PME); testParallelComputation(NonbondedForce::PME);
testParallelComputation(NonbondedForce::LJPME);
testReordering(); testReordering();
testDeterministicForces(); testDeterministicForces();
if (canRunHugeTest()) if (canRunHugeTest())
......
...@@ -60,6 +60,7 @@ void testGaussian() { ...@@ -60,6 +60,7 @@ void testGaussian() {
platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL); platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0]; HipContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.setAsCurrent();
context.getIntegrationUtilities().initRandomNumberGenerator(0); context.getIntegrationUtilities().initRandomNumberGenerator(0);
HipArray& random = context.getIntegrationUtilities().getRandom(); HipArray& random = context.getIntegrationUtilities().getRandom();
context.getIntegrationUtilities().prepareRandomNumbers(random.getSize()); context.getIntegrationUtilities().prepareRandomNumbers(random.getSize());
......
...@@ -70,6 +70,7 @@ void verifySorting(vector<float> array, bool uniform) { ...@@ -70,6 +70,7 @@ void verifySorting(vector<float> array, bool uniform) {
platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL); platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0]; HipContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.setAsCurrent();
HipArray data(context, array.size(), 4, "sortData"); HipArray data(context, array.size(), 4, "sortData");
data.upload(array); data.upload(array);
HipSort sort(context, new SortTrait(), array.size(), uniform); HipSort sort(context, new SortTrait(), array.size(), uniform);
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2020 Stanford University and the Authors. * * Portions copyright (c) 2008-2021 Stanford University and the Authors. *
* Portions copyright (c) 2021 Advanced Micro Devices, Inc. * * Portions copyright (c) 2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Mark Friedrichs * * Authors: Peter Eastman, Mark Friedrichs *
* Contributors: * * Contributors: *
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#define _USE_MATH_DEFINES // Needed to get M_PI #define _USE_MATH_DEFINES // Needed to get M_PI
#endif #endif
#include "AmoebaHipKernels.h" #include "AmoebaHipKernels.h"
#include "openmm/common/ContextSelector.h"
#include "openmm/internal/ContextImpl.h" #include "openmm/internal/ContextImpl.h"
#include "openmm/internal/AmoebaGeneralizedKirkwoodForceImpl.h" #include "openmm/internal/AmoebaGeneralizedKirkwoodForceImpl.h"
#include "openmm/internal/AmoebaMultipoleForceImpl.h" #include "openmm/internal/AmoebaMultipoleForceImpl.h"
...@@ -56,7 +57,7 @@ using namespace std; ...@@ -56,7 +57,7 @@ using namespace std;
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() { HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() {
cc.setAsCurrent(); ContextSelector selector(cc);
if (fft != NULL) if (fft != NULL)
delete fft; delete fft;
} }
...@@ -64,6 +65,7 @@ HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() { ...@@ -64,6 +65,7 @@ HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() {
void HipCalcAmoebaMultipoleForceKernel::initialize(const System& system, const AmoebaMultipoleForce& force) { void HipCalcAmoebaMultipoleForceKernel::initialize(const System& system, const AmoebaMultipoleForce& force) {
CommonCalcAmoebaMultipoleForceKernel::initialize(system, force); CommonCalcAmoebaMultipoleForceKernel::initialize(system, force);
if (usePME) { if (usePME) {
ContextSelector selector(cc);
HipArray& grid1 = cu.unwrap(pmeGrid1); HipArray& grid1 = cu.unwrap(pmeGrid1);
HipArray& grid2 = cu.unwrap(pmeGrid2); HipArray& grid2 = cu.unwrap(pmeGrid2);
fft = cu.createFFT(gridSizeX, gridSizeY, gridSizeZ, false, cu.getCurrentStream(), grid1, grid2); fft = cu.createFFT(gridSizeX, gridSizeY, gridSizeZ, false, cu.getCurrentStream(), grid1, grid2);
...@@ -79,7 +81,7 @@ void HipCalcAmoebaMultipoleForceKernel::computeFFT(bool forward) { ...@@ -79,7 +81,7 @@ void HipCalcAmoebaMultipoleForceKernel::computeFFT(bool forward) {
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() { HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() {
cc.setAsCurrent(); ContextSelector selector(cc);
if (sort != NULL) if (sort != NULL)
delete sort; delete sort;
if (fft != NULL) if (fft != NULL)
...@@ -91,6 +93,7 @@ HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() { ...@@ -91,6 +93,7 @@ HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() {
void HipCalcHippoNonbondedForceKernel::initialize(const System& system, const HippoNonbondedForce& force) { void HipCalcHippoNonbondedForceKernel::initialize(const System& system, const HippoNonbondedForce& force) {
CommonCalcHippoNonbondedForceKernel::initialize(system, force); CommonCalcHippoNonbondedForceKernel::initialize(system, force);
if (usePME) { if (usePME) {
ContextSelector selector(cc);
sort = new HipSort(cu, new SortTrait(), cc.getNumAtoms()); sort = new HipSort(cu, new SortTrait(), cc.getNumAtoms());
HipArray& grid1 = cu.unwrap(pmeGrid1); HipArray& grid1 = cu.unwrap(pmeGrid1);
HipArray& grid2 = cu.unwrap(pmeGrid2); HipArray& grid2 = cu.unwrap(pmeGrid2);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment