Unverified Commit ecc2d258 authored by Anton Gorenko's avatar Anton Gorenko
Browse files

Port changes from the main repository

Use cuCtxPushCurrent() and cuCtxPopCurrent() for selecting CUDA context

    https://github.com/openmm/openmm/pull/3258

Fixed uninitialized memory access

    https://github.com/openmm/openmm/issues/3392
    https://github.com/openmm/openmm/pull/3399

Fixed potential invalid memory access

    See https://github.com/openmm/openmm/pull/3428

Improved temperature reporting for Drude particles

    https://github.com/openmm/openmm/pull/3486
    https://github.com/openmm/openmm/commit/a5e42f5

Fixed race condition with multiple GPUs

    https://github.com/openmm/openmm/commit/6fb1c8a41edff980862750bc086f6a204eb50941

Use blocking sync when creating events

    https://github.com/openmm/openmm/commit/fe21d5ee4f14673a4ea38b7244991772a64ceec2

Very minor optimizations

    https://github.com/openmm/openmm/commit/109f6b2535da4e0c0dd88007d6ca06b4add2ce81

Use PocketFFT

    https://github.com/openmm/openmm/commit/1dac981a63300a2a53a7925f570995914f7163ed

Improved logic for deciding when to reorder atoms

    https://github.com/openmm/openmm/commit/48664a1f1a4490a4dabc277757545ac070e7b898

Ensure valid atom order after loading a checkpoint

    https://github.com/openmm/openmm/commit/a056d5a3754e193105409afa12c9f0c9a2d972a2

Improve performance running on multiple GPUs

    https://github.com/openmm/openmm/commit/0c82c2647de98da5c6dab7bf7a7b8b19705aadc0

Fixed errors when running on multiple GPUs

    https://github.com/openmm/openmm/commit/ed9df876d43c037c08d4762721e73e5caae086d9

Optimized reducing energy

    https://github.com/openmm/openmm/commit/2975f44
parent f717ed89
......@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -56,7 +56,7 @@ public:
* @param name the name of the array
*/
template <class T>
static HipArray* create(HipContext& context, int size, const std::string& name) {
static HipArray* create(HipContext& context, size_t size, const std::string& name) {
return new HipArray(context, size, sizeof(T), name);
}
/**
......@@ -72,7 +72,7 @@ public:
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
HipArray(HipContext& context, int size, int elementSize, const std::string& name);
HipArray(HipContext& context, size_t size, int elementSize, const std::string& name);
~HipArray();
/**
* Initialize this object.
......@@ -82,7 +82,7 @@ public:
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
void initialize(ComputeContext& context, int size, int elementSize, const std::string& name);
void initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name);
/**
* Initialize this object. The template argument is the data type of each array element.
*
......@@ -91,13 +91,13 @@ public:
* @param name the name of the array
*/
template <class T>
void initialize(ComputeContext& context, int size, const std::string& name) {
void initialize(ComputeContext& context, size_t size, const std::string& name) {
initialize(context, size, sizeof(T), name);
}
/**
* Recreate the internal storage to have a different size.
*/
void resize(int size);
void resize(size_t size);
/**
* Get whether this array has been initialized.
*/
......@@ -107,7 +107,7 @@ public:
/**
* Get the number of elements in the array.
*/
int getSize() const {
size_t getSize() const {
return size;
}
/**
......@@ -183,7 +183,8 @@ public:
private:
HipContext* context;
hipDeviceptr_t pointer;
int size, elementSize;
size_t size;
int elementSize;
bool ownsMemory;
std::string name;
};
......
......@@ -39,6 +39,7 @@
#include <map>
#include <stack>
#include <string>
#include <utility>
#define __CL_ENABLE_EXCEPTIONS
......@@ -99,10 +100,20 @@ public:
return contextIsValid;
}
/**
* Set the hipCtx_t associated with this object to be the current context. If the context is not
* Set the device associated with this object to be the current device. If the context is not
* valid, this returns without doing anything.
*/
void setAsCurrent();
/**
* Push the device associated with this object to be the current device. If the context is not
* valid, this returns without doing anything.
*/
void pushAsCurrent();
/**
* Pop the device associated with this object off the stack of contexts. If the context is not
* valid, this returns without doing anything.
*/
void popAsCurrent();
/**
* Get the hipDevice_t associated with this object.
*/
......@@ -582,6 +593,10 @@ public:
* expense of reduced simulation performance.
*/
void flushQueue();
/**
* Get the flags that should be used when creating hipEvent_t objects.
*/
unsigned int getEventFlags();
/**
* Get the flags that should be used when allocating pinned host memory.
*/
......@@ -610,6 +625,7 @@ private:
std::map<std::string, std::string> compilationDefines;
std::vector<hipModule_t> loadedModules;
hipDevice_t device;
std::stack<hipDevice_t> outerScopeDevices;
hipStream_t currentStream;
hipFunction_t clearBufferKernel;
hipFunction_t clearTwoBuffersKernel;
......
......@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2008-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -109,6 +109,18 @@ public:
* @param context the context in which to execute this kernel
*/
void setTime(ContextImpl& context, double time);
/**
* Get the current step count
*
* @param context the context in which to execute this kernel
*/
long long getStepCount(const ContextImpl& context) const;
/**
* Set the current step count
*
* @param context the context in which to execute this kernel
*/
void setStepCount(const ContextImpl& context, long long count);
/**
* Get the positions of all particles.
*
......@@ -133,6 +145,15 @@ public:
* @param velocities a vector containg the particle velocities
*/
void setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities);
/**
* Compute velocities, shifted in time to account for a leapfrog integrator. The shift
* is based on the most recently computed forces.
*
* @param context the context in which to execute this kernel
* @param timeShift the amount by which to shift the velocities in time
* @param velocities the shifted velocities are returned in this
*/
void computeShiftedVelocities(ContextImpl& context, double timeShift, std::vector<Vec3>& velocities);
/**
* Get the current forces on all particles.
*
......
......@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -339,7 +339,7 @@ private:
HipArray rebuildNeighborList;
HipSort* blockSorter;
hipEvent_t downloadCountEvent;
int* pinnedCountBuffer;
unsigned int* pinnedCountBuffer;
std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs;
std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters;
......@@ -349,8 +349,9 @@ private:
std::map<int, std::string> groupKernelSource;
double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
int startTileIndex, startBlockIndex, numBlocks, maxTiles, maxSinglePairs, numTilesInBatch, maxExclusions;
int startTileIndex, startBlockIndex, numBlocks, numTilesInBatch, maxExclusions;
int numForceThreadBlocks, forceThreadBlockSize, findInteractingBlocksThreadBlockSize, numAtoms, groupFlags;
unsigned int maxTiles, maxSinglePairs, tilesAfterReorder;
long long numTiles;
std::string kernelSource;
};
......
......@@ -91,7 +91,9 @@ private:
long long* pinnedForceBuffer;
hipFunction_t sumKernel;
hipEvent_t event;
hipStream_t peerCopyStream;
std::vector<hipEvent_t> peerCopyEvent;
std::vector<hipEvent_t> peerCopyEventLocal;
std::vector<hipStream_t> peerCopyStream;
};
/**
......
......@@ -126,8 +126,8 @@ public:
std::vector<HipContext*> contexts;
std::vector<double> contextEnergy;
bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream, deterministicForces;
int cmMotionFrequency;
int stepCount, computeForceCount;
int cmMotionFrequency, computeForceCount;
long long stepCount;
double time;
std::map<std::string, std::string> propertyValues;
ThreadPool threads;
......
......@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2012-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -27,6 +27,7 @@
#include "HipArray.h"
#include "HipContext.h"
#include "openmm/common/ContextSelector.h"
#include <iostream>
#include <sstream>
#include <vector>
......@@ -36,13 +37,13 @@ using namespace OpenMM;
HipArray::HipArray() : pointer(0), ownsMemory(false) {
}
HipArray::HipArray(HipContext& context, int size, int elementSize, const std::string& name) : pointer(0) {
HipArray::HipArray(HipContext& context, size_t size, int elementSize, const std::string& name) : pointer(0) {
initialize(context, size, elementSize, name);
}
HipArray::~HipArray() {
if (pointer != 0 && ownsMemory && context->getContextIsValid()) {
context->setAsCurrent();
ContextSelector selector(*context);
hipError_t result = hipFree(pointer);
if (result != hipSuccess) {
std::stringstream str;
......@@ -52,7 +53,7 @@ HipArray::~HipArray() {
}
}
void HipArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) {
void HipArray::initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name) {
if (this->pointer != 0)
throw OpenMMException("HipArray has already been initialized");
this->context = &dynamic_cast<HipContext&>(context);
......@@ -60,6 +61,7 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co
this->elementSize = elementSize;
this->name = name;
ownsMemory = true;
ContextSelector selector(*this->context);
hipError_t result = hipMalloc(&pointer, size*elementSize);
if (result != hipSuccess) {
std::stringstream str;
......@@ -68,11 +70,12 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co
}
}
void HipArray::resize(int size) {
void HipArray::resize(size_t size) {
if (pointer == 0)
throw OpenMMException("HipArray has not been initialized");
if (!ownsMemory)
throw OpenMMException("Cannot resize an array that does not own its storage");
ContextSelector selector(*context);
hipError_t result = hipFree(pointer);
if (result != hipSuccess) {
std::stringstream str;
......
......@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2009-2023 Stanford University and the Authors. *
* Portions copyright (c) 2020-2023 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -39,6 +39,7 @@
#include "HipProgram.h"
#include "HipFFT3D.h"
#include "openmm/common/ComputeArray.h"
#include "openmm/common/ContextSelector.h"
#include "SHA1.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
......@@ -184,13 +185,15 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy
}
contextIsValid = true;
ContextSelector selector(*this);
if (contextIndex > 0) {
int canAccess;
CHECK_RESULT(hipDeviceCanAccessPeer(&canAccess, getDevice(), platformData.contexts[0]->getDevice()));
if (canAccess) {
platformData.contexts[0]->setAsCurrent();
{
ContextSelector selector2(*platformData.contexts[0]);
CHECK_RESULT(hipDeviceEnablePeerAccess(getDevice(), 0));
setAsCurrent();
}
CHECK_RESULT(hipDeviceEnablePeerAccess(platformData.contexts[0]->getDevice(), 0));
}
}
......@@ -345,7 +348,7 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy
}
HipContext::~HipContext() {
setAsCurrent();
pushAsCurrent();
for (auto force : forces)
delete force;
for (auto listener : reorderListeners)
......@@ -366,28 +369,29 @@ HipContext::~HipContext() {
delete nonbonded;
for (auto module : loadedModules)
hipModuleUnload(module);
popAsCurrent();
contextIsValid = false;
}
void HipContext::initialize() {
hipSetDevice(device);
ContextSelector selector(*this);
string errorMessage = "Error initializing Context";
int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum");
energySum.initialize<double>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
}
else if (useMixedPrecision) {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum");
energySum.initialize<double>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
}
else {
energyBuffer.initialize<float>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<float>(*this, 1, "energySum");
energySum.initialize<float>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), getHostMallocFlags()));
}
......@@ -423,6 +427,29 @@ void HipContext::setAsCurrent() {
hipSetDevice(device);
}
void HipContext::pushAsCurrent() {
if (contextIsValid) {
// Emulate cuCtxPushCurrent's behavior
hipDevice_t outerScopeDevice;
hipGetDevice(&outerScopeDevice);
outerScopeDevices.push(outerScopeDevice);
if (device != outerScopeDevice) {
hipSetDevice(device);
}
}
}
void HipContext::popAsCurrent() {
if (contextIsValid) {
// Emulate cuCtxPopCurrent's behavior
hipDevice_t outerScopeDevice = outerScopeDevices.top();
outerScopeDevices.pop();
if (outerScopeDevice != device) {
hipSetDevice(outerScopeDevice);
}
}
}
string HipContext::getTempFileName() const {
stringstream tempFileName;
tempFileName << tempDir;
......@@ -784,12 +811,18 @@ double HipContext::reduceEnergy() {
int bufferSize = energyBuffer.getSize();
int workGroupSize = getMaxThreadBlockSize();
void* args[] = {&energyBuffer.getDevicePointer(), &energySum.getDevicePointer(), &bufferSize, &workGroupSize};
executeKernel(reduceEnergyKernel, args, workGroupSize, workGroupSize, workGroupSize*energyBuffer.getElementSize());
executeKernel(reduceEnergyKernel, args, workGroupSize*energySum.getSize(), workGroupSize, workGroupSize*energyBuffer.getElementSize());
energySum.download(pinnedBuffer);
if (getUseDoublePrecision() || getUseMixedPrecision())
return *((double*) pinnedBuffer);
else
return *((float*) pinnedBuffer);
double result = 0;
if (getUseDoublePrecision() || getUseMixedPrecision()) {
for (int i = 0; i < energySum.getSize(); i++)
result += ((double*) pinnedBuffer)[i];
}
else {
for (int i = 0; i < energySum.getSize(); i++)
result += ((float*) pinnedBuffer)[i];
}
return result;
}
void HipContext::setCharges(const vector<double>& charges) {
......@@ -850,6 +883,13 @@ vector<int> HipContext::getDevicePrecedence() {
return precedence;
}
unsigned int HipContext::getEventFlags() {
unsigned int flags = hipEventDisableTiming;
if (useBlockingSync)
flags += hipEventBlockingSync;
return flags;
}
unsigned int HipContext::getHostMallocFlags() {
#ifdef WIN32
return hipHostMallocDefault;
......
......@@ -31,7 +31,7 @@
using namespace OpenMM;
HipEvent::HipEvent(HipContext& context) : context(context), eventCreated(false) {
hipError_t result = hipEventCreateWithFlags(&event, hipEventDisableTiming);
hipError_t result = hipEventCreateWithFlags(&event, context.getEventFlags());
if (result != hipSuccess)
throw OpenMMException("Error creating HIP event:"+HipContext::getErrorString(result));
eventCreated = true;
......
......@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2009-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020-2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -27,6 +27,7 @@
#include "HipIntegrationUtilities.h"
#include "HipContext.h"
#include "openmm/common/ContextSelector.h"
using namespace OpenMM;
using namespace std;
......@@ -41,13 +42,13 @@ using namespace std;
HipIntegrationUtilities::HipIntegrationUtilities(HipContext& context, const System& system) : IntegrationUtilities(context, system),
ccmaConvergedMemory(NULL) {
CHECK_RESULT2(hipEventCreateWithFlags(&ccmaEvent, hipEventDisableTiming), "Error creating event for CCMA");
CHECK_RESULT2(hipEventCreateWithFlags(&ccmaEvent, context.getEventFlags()), "Error creating event for CCMA");
CHECK_RESULT2(hipHostMalloc((void**) &ccmaConvergedMemory, sizeof(int), context.getHostMallocFlags()), "Error allocating pinned memory");
CHECK_RESULT2(hipHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory");
}
HipIntegrationUtilities::~HipIntegrationUtilities() {
context.setAsCurrent();
ContextSelector selector(context);
if (ccmaConvergedMemory != NULL) {
hipHostFree(ccmaConvergedMemory);
hipEventDestroy(ccmaEvent);
......@@ -67,6 +68,7 @@ HipArray& HipIntegrationUtilities::getStepSize() {
}
void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, double tol) {
ContextSelector selector(context);
ComputeKernel settleKernel, shakeKernel, ccmaForceKernel;
if (constrainVelocities) {
settleKernel = settleVelKernel;
......@@ -132,6 +134,7 @@ void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, dou
}
void HipIntegrationUtilities::distributeForcesFromVirtualSites() {
ContextSelector selector(context);
if (numVsites > 0) {
vsiteForceKernel->setArg(2, context.getLongForceBuffer());
vsiteForceKernel->execute(numVsites);
......
......@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2008-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -30,6 +30,7 @@
#include "openmm/Context.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/common/ContextSelector.h"
#include "CommonKernelSources.h"
#include "HipBondedUtilities.h"
#include "HipExpressionUtilities.h"
......@@ -59,7 +60,7 @@ void HipCalcForcesAndEnergyKernel::initialize(const System& system) {
void HipCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups) {
cu.setForcesValid(true);
cu.setAsCurrent();
ContextSelector selector(cu);
cu.clearAutoclearBuffers();
for (auto computation : cu.getPreComputations())
computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
......@@ -72,7 +73,7 @@ void HipCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
}
double HipCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cu.setAsCurrent();
ContextSelector selector(cu);
cu.getBondedUtilities().computeInteractions(groups);
cu.getNonbondedUtilities().computeInteractions(groups, includeForces, includeEnergy);
double sum = 0.0;
......@@ -99,8 +100,18 @@ void HipUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
ctx->setTime(time);
}
long long HipUpdateStateDataKernel::getStepCount(const ContextImpl& context) const {
return cu.getStepCount();
}
void HipUpdateStateDataKernel::setStepCount(const ContextImpl& context, long long count) {
vector<HipContext*>& contexts = cu.getPlatformData().contexts;
for (auto ctx : contexts)
ctx->setStepCount(count);
}
void HipUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
cu.setAsCurrent();
ContextSelector selector(cu);
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
vector<float4> posCorrection;
......@@ -161,7 +172,7 @@ void HipUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>&
}
void HipUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<Vec3>& positions) {
cu.setAsCurrent();
ContextSelector selector(cu);
const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision()) {
......@@ -212,7 +223,7 @@ void HipUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<V
}
void HipUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>& velocities) {
cu.setAsCurrent();
ContextSelector selector(cu);
const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
velocities.resize(numParticles);
......@@ -237,7 +248,7 @@ void HipUpdateStateDataKernel::getVelocities(ContextImpl& context, vector<Vec3>&
}
void HipUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector<Vec3>& velocities) {
cu.setAsCurrent();
ContextSelector selector(cu);
const vector<int>& order = cu.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
......@@ -270,8 +281,12 @@ void HipUpdateStateDataKernel::setVelocities(ContextImpl& context, const vector<
}
}
void HipUpdateStateDataKernel::computeShiftedVelocities(ContextImpl& context, double timeShift, vector<Vec3>& velocities) {
cu.getIntegrationUtilities().computeShiftedVelocities(timeShift, velocities);
}
void HipUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& forces) {
cu.setAsCurrent();
ContextSelector selector(cu);
long long* force = (long long*) cu.getPinnedBuffer();
cu.getForce().download(force);
const vector<int>& order = cu.getAtomIndex();
......@@ -284,6 +299,7 @@ void HipUpdateStateDataKernel::getForces(ContextImpl& context, vector<Vec3>& for
}
void HipUpdateStateDataKernel::getEnergyParameterDerivatives(ContextImpl& context, map<string, double>& derivs) {
ContextSelector selector(cu);
const vector<string>& paramDerivNames = cu.getEnergyParamDerivNames();
int numDerivs = paramDerivNames.size();
if (numDerivs == 0)
......@@ -337,15 +353,15 @@ void HipUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const
}
void HipUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& stream) {
cu.setAsCurrent();
ContextSelector selector(cu);
int version = 3;
stream.write((char*) &version, sizeof(int));
int precision = (cu.getUseDoublePrecision() ? 2 : cu.getUseMixedPrecision() ? 1 : 0);
stream.write((char*) &precision, sizeof(int));
double time = cu.getTime();
stream.write((char*) &time, sizeof(double));
int stepCount = cu.getStepCount();
stream.write((char*) &stepCount, sizeof(int));
long long stepCount = cu.getStepCount();
stream.write((char*) &stepCount, sizeof(long long));
int stepsSinceReorder = cu.getStepsSinceReorder();
stream.write((char*) &stepsSinceReorder, sizeof(int));
char* buffer = (char*) cu.getPinnedBuffer();
......@@ -367,7 +383,7 @@ void HipUpdateStateDataKernel::createCheckpoint(ContextImpl& context, ostream& s
}
void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& stream) {
cu.setAsCurrent();
ContextSelector selector(cu);
int version;
stream.read((char*) &version, sizeof(int));
if (version != 3)
......@@ -379,8 +395,9 @@ void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& str
throw OpenMMException("Checkpoint was created with a different numeric precision");
double time;
stream.read((char*) &time, sizeof(double));
int stepCount, stepsSinceReorder;
stream.read((char*) &stepCount, sizeof(int));
long long stepCount;
stream.read((char*) &stepCount, sizeof(long long));
int stepsSinceReorder;
stream.read((char*) &stepsSinceReorder, sizeof(int));
vector<HipContext*>& contexts = cu.getPlatformData().contexts;
for (auto ctx : contexts) {
......@@ -408,6 +425,7 @@ void HipUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream& str
SimTKOpenMMUtilities::loadCheckpoint(stream);
for (auto listener : cu.getReorderListeners())
listener->execute();
cu.validateAtomOrder();
}
class HipCalcNonbondedForceKernel::ForceInfo : public HipForceInfo {
......@@ -448,7 +466,7 @@ public:
forceTemp.initialize<float4>(cu, cu.getNumAtoms(), "PmeForce");
}
float* getPosq() {
cu.setAsCurrent();
ContextSelector selector(cu);
cu.getPosq().download(posq);
return (float*) &posq[0];
}
......@@ -532,7 +550,7 @@ private:
};
HipCalcNonbondedForceKernel::~HipCalcNonbondedForceKernel() {
cu.setAsCurrent();
ContextSelector selector(cu);
if (sort != NULL)
delete sort;
if (fft != NULL)
......@@ -551,7 +569,7 @@ HipCalcNonbondedForceKernel::~HipCalcNonbondedForceKernel() {
}
void HipCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
cu.setAsCurrent();
ContextSelector selector(cu);
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
......@@ -650,8 +668,14 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
hasOffsets = (force.getNumParticleParameterOffsets() > 0 || force.getNumExceptionParameterOffsets() > 0);
if (hasOffsets)
paramsDefines["HAS_OFFSETS"] = "1";
if (force.getNumParticleParameterOffsets() > 0)
paramsDefines["HAS_PARTICLE_OFFSETS"] = "1";
if (force.getNumExceptionParameterOffsets() > 0)
paramsDefines["HAS_EXCEPTION_OFFSETS"] = "1";
if (usePosqCharges)
paramsDefines["USE_POSQ_CHARGES"] = "1";
if (doLJPME)
paramsDefines["INCLUDE_LJPME_EXCEPTIONS"] = "1";
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
......@@ -703,8 +727,16 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
defines["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
if (doLJPME) {
defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
double dar2 = dalphaR*dalphaR;
double dar4 = dar2*dar2;
double multShift6 = -invRCut6*(1.0 - exp(-dar2) * (1.0 + dar2 + 0.5*dar4));
defines["INVCUT6"] = cu.doubleToString(invRCut6);
defines["MULTSHIFT6"] = cu.doubleToString(multShift6);
}
if (cu.getContextIndex() == 0) {
paramsDefines["INCLUDE_EWALD"] = "1";
paramsDefines["EWALD_SELF_ENERGY_SCALE"] = cu.doubleToString(ONE_4PI_EPS0*alpha/sqrt(M_PI));
......@@ -766,13 +798,6 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha));
pmeDefines["USE_LJPME"] = "1";
pmeDefines["CHARGE_FROM_SIGEPS"] = "1";
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
double dar2 = dalphaR*dalphaR;
double dar4 = dar2*dar2;
double multShift6 = -invRCut6*(1.0 - exp(-dar2) * (1.0 + dar2 + 0.5*dar4));
defines["INVCUT6"] = cu.doubleToString(invRCut6);
defines["MULTSHIFT6"] = cu.doubleToString(multShift6);
module = cu.createModule(HipKernelSources::vectorOps+CommonKernelSources::pme, pmeDefines);
pmeDispersionFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
pmeDispersionGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
......@@ -813,8 +838,8 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
if (usePmeStream) {
CHECK_RESULT(hipStreamCreateWithFlags(&pmeStream, hipStreamNonBlocking), "Error creating stream for NonbondedForce");
CHECK_RESULT(hipEventCreateWithFlags(&pmeSyncEvent, hipEventDisableTiming), "Error creating event for NonbondedForce");
CHECK_RESULT(hipEventCreateWithFlags(&paramsSyncEvent, hipEventDisableTiming), "Error creating event for NonbondedForce");
CHECK_RESULT(hipEventCreateWithFlags(&pmeSyncEvent, cu.getEventFlags()), "Error creating event for NonbondedForce");
CHECK_RESULT(hipEventCreateWithFlags(&paramsSyncEvent, cu.getEventFlags()), "Error creating event for NonbondedForce");
int recipForceGroup = force.getReciprocalSpaceForceGroup();
if (recipForceGroup < 0)
recipForceGroup = force.getForceGroup();
......@@ -939,6 +964,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
replacements["USE_PERIODIC"] = force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0";
if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
if (force.getIncludeDirectSpace())
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::pmeExclusions, replacements), force.getForceGroup());
}
}
......@@ -959,7 +985,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2";
}
if (hasCoulomb)
if (hasCoulomb && !usePosqCharges)
cu.getNonbondedUtilities().addParameter(HipNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer()));
sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
if (hasLJ) {
......@@ -968,6 +994,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
cu.getNonbondedUtilities().addParameter(HipNonbondedUtilities::ParameterInfo(prefix+"sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon.getDevicePointer()));
}
source = cu.replaceStrings(source, replacements);
if (force.getIncludeDirectSpace())
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
// Initialize the exceptions.
......@@ -993,13 +1020,14 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (usePeriodic && force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0");
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams.getDevicePointer(), "float4");
if (force.getIncludeDirectSpace())
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CommonKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
// Initialize parameter offsets.
vector<vector<float4> > particleOffsetVec(force.getNumParticles());
vector<vector<float4> > exceptionOffsetVec(force.getNumExceptions());
vector<vector<float4> > exceptionOffsetVec(numExceptions);
for (int i = 0; i < force.getNumParticleParameterOffsets(); i++) {
string param;
int particle;
......@@ -1020,6 +1048,9 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
int index = exceptionIndex[exception];
if (index < startIndex || index >= endIndex)
continue;
auto paramPos = find(paramNames.begin(), paramNames.end(), param);
int paramIndex;
if (paramPos == paramNames.end()) {
......@@ -1028,13 +1059,11 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
}
else
paramIndex = paramPos-paramNames.begin();
exceptionOffsetVec[exceptionIndex[exception]].push_back(make_float4(charge, sigma, epsilon, paramIndex));
exceptionOffsetVec[index-startIndex].push_back(make_float4(charge, sigma, epsilon, paramIndex));
}
paramValues.resize(paramNames.size(), 0.0);
particleParamOffsets.initialize<float4>(cu, max(force.getNumParticleParameterOffsets(), 1), "particleParamOffsets");
exceptionParamOffsets.initialize<float4>(cu, max(force.getNumExceptionParameterOffsets(), 1), "exceptionParamOffsets");
particleOffsetIndices.initialize<int>(cu, cu.getPaddedNumAtoms()+1, "particleOffsetIndices");
exceptionOffsetIndices.initialize<int>(cu, force.getNumExceptions()+1, "exceptionOffsetIndices");
vector<int> particleOffsetIndicesVec, exceptionOffsetIndicesVec;
vector<float4> p, e;
for (int i = 0; i < particleOffsetVec.size(); i++) {
......@@ -1054,7 +1083,9 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
particleParamOffsets.upload(p);
particleOffsetIndices.upload(particleOffsetIndicesVec);
}
if (force.getNumExceptionParameterOffsets() > 0) {
exceptionParamOffsets.initialize<float4>(cu, max((int) e.size(), 1), "exceptionParamOffsets");
exceptionOffsetIndices.initialize<int>(cu, exceptionOffsetIndicesVec.size(), "exceptionOffsetIndices");
if (e.size() > 0) {
exceptionParamOffsets.upload(e);
exceptionOffsetIndices.upload(exceptionOffsetIndicesVec);
}
......@@ -1075,6 +1106,7 @@ void HipCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
// Update particle and exception parameters.
ContextSelector selector(cu);
bool paramChanged = false;
for (int i = 0; i < paramNames.size(); i++) {
double value = context.getParameter(paramNames[i]);
......@@ -1089,7 +1121,7 @@ double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (recomputeParams || hasOffsets) {
bool computeSelfEnergy = (includeEnergy && includeReciprocal);
int computeSelfEnergy = (includeEnergy && includeReciprocal);
int numAtoms = cu.getPaddedNumAtoms();
vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms,
&baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
......@@ -1258,7 +1290,7 @@ double HipCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, const NonbondedForce& force) {
// Make sure the new parameters are acceptable.
cu.setAsCurrent();
ContextSelector selector(cu);
if (force.getNumParticles() != cu.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
if (!hasCoulomb || !hasLJ) {
......@@ -1271,20 +1303,28 @@ void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
throw OpenMMException("updateParametersInContext: The nonbonded force kernel does not include Lennard-Jones interactions, because all epsilons were originally 0");
}
}
set<int> exceptionsWithOffsets;
for (int i = 0; i < force.getNumExceptionParameterOffsets(); i++) {
string param;
int exception;
double charge, sigma, epsilon;
force.getExceptionParameterOffset(i, param, exception, charge, sigma, epsilon);
exceptionsWithOffsets.insert(exception);
}
vector<int> exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
if (exceptionAtoms.size() > exceptions.size() && make_pair(particle1, particle2) == exceptionAtoms[exceptions.size()])
if (chargeProd != 0.0 || epsilon != 0.0 || exceptionsWithOffsets.find(i) != exceptionsWithOffsets.end())
exceptions.push_back(i);
else if (chargeProd != 0.0 || epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
}
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cu.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
if (numExceptions != exceptionAtoms.size())
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
// Record the per-particle parameters.
......@@ -1300,11 +1340,13 @@ void HipCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Record the exceptions.
if (numExceptions > 0) {
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<float4> baseExceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], chargeProd, sigma, epsilon);
force.getExceptionParameters(exceptions[startIndex+i], particle1, particle2, chargeProd, sigma, epsilon);
if (make_pair(particle1, particle2) != exceptionAtoms[i])
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
baseExceptionParamsVec[i] = make_float4(chargeProd, sigma, epsilon, 0);
}
baseExceptionParams.upload(baseExceptionParamsVec);
......
......@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -439,6 +439,10 @@ void HipNonbondedUtilities::computeInteractions(int forceGroups, bool includeFor
bool HipNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff)
return false;
if (context.getStepsSinceReorder() == 0)
tilesAfterReorder = pinnedCountBuffer[0];
else if (context.getStepsSinceReorder() > 25 && pinnedCountBuffer[0] > 1.1*tilesAfterReorder)
context.forceReorder();
if (pinnedCountBuffer[0] <= maxTiles && pinnedCountBuffer[1] <= maxSinglePairs)
return false;
......@@ -446,12 +450,13 @@ bool HipNonbondedUtilities::updateNeighborListSize() {
// this from happening in the future.
if (pinnedCountBuffer[0] > maxTiles) {
maxTiles = (int) (1.2*pinnedCountBuffer[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
maxTiles = (unsigned int) (1.2*pinnedCountBuffer[0]);
unsigned int numBlocks = context.getNumAtomBlocks();
int totalTiles = numBlocks*(numBlocks+1)/2;
if (maxTiles > totalTiles)
maxTiles = totalTiles;
interactingTiles.resize(maxTiles);
interactingAtoms.resize(HipContext::TileSize*maxTiles);
interactingAtoms.resize(HipContext::TileSize*(size_t) maxTiles);
if (forceArgs.size() > 0)
forceArgs[7] = &interactingTiles.getDevicePointer();
findInteractingBlocksArgs[6] = &interactingTiles.getDevicePointer();
......
......@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2011-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020-2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -27,6 +27,7 @@
#include "HipParallelKernels.h"
#include "HipKernelSources.h"
#include "openmm/common/ContextSelector.h"
using namespace OpenMM;
using namespace std;
......@@ -70,7 +71,7 @@ public:
void execute() {
// Copy coordinates over to this device and execute the kernel.
cu.setAsCurrent();
ContextSelector selector(cu);
if (cu.getContextIndex() > 0) {
hipStreamWaitEvent(cu.getCurrentStream(), event, 0);
if (!cu.getPlatformData().peerAccessSupported)
......@@ -94,13 +95,16 @@ private:
class HipParallelCalcForcesAndEnergyKernel::FinishComputationTask : public HipContext::WorkTask {
public:
FinishComputationTask(ContextImpl& context, HipContext& cu, HipCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, HipArray& contextForces, bool& valid, int2& interactionCount) :
bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, HipArray& contextForces,
bool& valid, int2& interactionCount, hipStream_t stream, hipEvent_t event, hipEvent_t localEvent) :
context(context), cu(cu), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy),
completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces), valid(valid), interactionCount(interactionCount) {
completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces), valid(valid), interactionCount(interactionCount),
stream(stream), event(event), localEvent(localEvent) {
}
void execute() {
// Execute the kernel, then download forces.
ContextSelector selector(cu);
energy += kernel.finishComputation(context, includeForce, includeEnergy, groups, valid);
if (cu.getComputeForceCount() < 200) {
// Record timing information for load balancing. Since this takes time, only do it at the start of the simulation.
......@@ -110,13 +114,16 @@ public:
}
if (includeForce) {
if (cu.getContextIndex() > 0) {
hipEventRecord(localEvent, cu.getCurrentStream());
hipStreamWaitEvent(stream, localEvent, 0);
int numAtoms = cu.getPaddedNumAtoms();
if (cu.getPlatformData().peerAccessSupported) {
int numBytes = numAtoms*3*sizeof(long long);
int offset = (cu.getContextIndex()-1)*numBytes;
HipContext& context0 = *cu.getPlatformData().contexts[0];
CHECK_RESULT(hipMemcpy(static_cast<char*>(contextForces.getDevicePointer())+offset,
cu.getForce().getDevicePointer(), numBytes, hipMemcpyDeviceToDevice), "Error copying forces");
CHECK_RESULT(hipMemcpyAsync(static_cast<char*>(contextForces.getDevicePointer())+offset,
cu.getForce().getDevicePointer(), numBytes, hipMemcpyDeviceToDevice, stream), "Error copying forces");
hipEventRecord(event, stream);
}
else
cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]);
......@@ -140,6 +147,9 @@ private:
HipArray& contextForces;
bool& valid;
int2& interactionCount;
hipStream_t stream;
hipEvent_t event;
hipEvent_t localEvent;
};
HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, HipPlatform::PlatformData& data) :
......@@ -150,20 +160,25 @@ HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(strin
}
HipParallelCalcForcesAndEnergyKernel::~HipParallelCalcForcesAndEnergyKernel() {
data.contexts[0]->setAsCurrent();
ContextSelector selector(*data.contexts[0]);
if (pinnedPositionBuffer != NULL)
hipHostFree(pinnedPositionBuffer);
if (pinnedForceBuffer != NULL)
hipHostFree(pinnedForceBuffer);
hipEventDestroy(event);
hipStreamDestroy(peerCopyStream);
for (int i = 0; i < peerCopyEvent.size(); i++)
hipEventDestroy(peerCopyEvent[i]);
for (int i = 0; i < peerCopyEventLocal.size(); i++)
hipEventDestroy(peerCopyEventLocal[i]);
for (int i = 0; i < peerCopyStream.size(); i++)
hipStreamDestroy(peerCopyStream[i]);
if (interactionCounts != NULL)
hipHostFree(interactionCounts);
}
void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
HipContext& cu = *data.contexts[0];
cu.setAsCurrent();
ContextSelector selector(cu);
hipModule_t module = cu.createModule(HipKernelSources::parallel);
sumKernel = cu.getKernel(module, "sumForces");
int numContexts = data.contexts.size();
......@@ -171,14 +186,25 @@ void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
getKernel(i).initialize(system);
for (int i = 0; i < numContexts; i++)
contextNonbondedFractions[i] = 1/(double) numContexts;
CHECK_RESULT(hipEventCreateWithFlags(&event, 0), "Error creating event");
CHECK_RESULT(hipStreamCreateWithFlags(&peerCopyStream, hipStreamNonBlocking), "Error creating stream");
CHECK_RESULT(hipEventCreateWithFlags(&event, cu.getEventFlags()), "Error creating event");
peerCopyEvent.resize(numContexts);
peerCopyEventLocal.resize(numContexts);
peerCopyStream.resize(numContexts);
for (int i = 0; i < numContexts; i++) {
CHECK_RESULT(hipEventCreateWithFlags(&peerCopyEvent[i], cu.getEventFlags()), "Error creating event");
CHECK_RESULT(hipStreamCreateWithFlags(&peerCopyStream[i], hipStreamNonBlocking), "Error creating stream");
}
for (int i = 0; i < numContexts; i++) {
HipContext& cuLocal = *data.contexts[i];
ContextSelector selectorLocal(cuLocal);
CHECK_RESULT(hipEventCreateWithFlags(&peerCopyEventLocal[i], cu.getEventFlags()), "Error creating event");
}
CHECK_RESULT(hipHostMalloc((void**) &interactionCounts, numContexts*sizeof(int2), 0), "Error creating interaction counts buffer");
}
void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
HipContext& cu = *data.contexts[0];
cu.setAsCurrent();
ContextSelector selector(cu);
if (!contextForces.isInitialized()) {
contextForces.initialize<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces");
CHECK_RESULT(hipHostMalloc((void**) &pinnedForceBuffer, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms()*sizeof(long long), hipHostMallocPortable), "Error allocating pinned memory");
......@@ -194,36 +220,44 @@ void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context
else {
int numBytes = cu.getPosq().getSize()*cu.getPosq().getElementSize();
hipEventRecord(event, cu.getCurrentStream());
hipStreamWaitEvent(peerCopyStream, event, 0);
for (int i = 1; i < (int) data.contexts.size(); i++)
for (int i = 1; i < (int) data.contexts.size(); i++) {
hipStreamWaitEvent(peerCopyStream[i], event, 0);
CHECK_RESULT(hipMemcpyAsync(
data.contexts[i]->getPosq().getDevicePointer(),
cu.getPosq().getDevicePointer(), numBytes,
hipMemcpyDeviceToDevice, peerCopyStream), "Error copying positions");
hipEventRecord(event, peerCopyStream);
hipMemcpyDeviceToDevice, peerCopyStream[i]), "Error copying positions");
hipEventRecord(peerCopyEvent[i], peerCopyStream[i]);
}
}
for (int i = 0; i < (int) data.contexts.size(); i++) {
data.contextEnergy[i] = 0.0;
HipContext& cu = *data.contexts[i];
ComputeContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, event, interactionCounts[i]));
hipEvent_t waitEvent = (cu.getPlatformData().peerAccessSupported ? peerCopyEvent[i] : event);
thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, waitEvent, interactionCounts[i]));
}
data.syncContexts();
}
double HipParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
HipContext& cu = *data.contexts[i];
ComputeContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, contextForces, valid, interactionCounts[i]));
thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i],
pinnedForceBuffer, contextForces, valid, interactionCounts[i], peerCopyStream[i], peerCopyEvent[i], peerCopyEventLocal[i]));
}
data.syncContexts();
HipContext& cu = *data.contexts[0];
ContextSelector selector(cu);
if (cu.getPlatformData().peerAccessSupported)
for (int i = 1; i < data.contexts.size(); i++)
hipStreamWaitEvent(cu.getCurrentStream(), peerCopyEvent[i], 0);
double energy = 0.0;
for (int i = 0; i < (int) data.contextEnergy.size(); i++)
energy += data.contextEnergy[i];
if (includeForce && valid) {
// Sum the forces from all devices.
HipContext& cu = *data.contexts[0];
if (!cu.getPlatformData().peerAccessSupported)
contextForces.upload(pinnedForceBuffer, false);
int bufferSize = 3*cu.getPaddedNumAtoms();
......
......@@ -80,7 +80,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
extern __shared__ mixed tempBuffer[];
const unsigned int thread = threadIdx.x;
mixed sum = 0;
for (unsigned int index = thread; index < bufferSize; index += blockDim.x)
for (unsigned int index = blockDim.x*blockIdx.x+threadIdx.x; index < bufferSize; index += blockDim.x*gridDim.x)
sum += energyBuffer[index];
tempBuffer[thread] = sum;
for (int i = 1; i < workGroupSize; i *= 2) {
......@@ -89,7 +89,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
tempBuffer[thread] += tempBuffer[thread+i];
}
if (thread == 0)
*result = tempBuffer[0];
result[blockIdx.x] = tempBuffer[0];
}
/**
......
......@@ -41,4 +41,6 @@ OpenMM::HipPlatform platform;
void initializeTests(int argc, char* argv[]) {
if (argc > 1)
platform.setPropertyDefaultValue("Precision", std::string(argv[1]));
if (argc > 2)
platform.setPropertyDefaultValue("DeviceIndex", std::string(argv[2]));
}
......@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2011-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -38,12 +38,16 @@
#include "HipArray.h"
#include "HipContext.h"
#include "HipFFT3D.h"
#include "fftpack.h"
#include "sfmt/SFMT.h"
#include "openmm/System.h"
#include <complex>
#include <iostream>
#include <cmath>
#include <set>
#ifdef _MSC_VER
#define POCKETFFT_NO_VECTORS
#endif
#include "pocketfft_hdronly.h"
using namespace OpenMM;
using namespace std;
......@@ -67,19 +71,19 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<Real2> original(xsize*ysize*zsize);
vector<t_complex> reference(original.size());
vector<complex<double>> reference(original.size());
for (int i = 0; i < (int) original.size(); i++) {
Real2 value;
value.x = (float) genrand_real2(sfmt);
value.y = (float) genrand_real2(sfmt);
original[i] = value;
reference[i] = t_complex(value.x, value.y);
reference[i] = complex<double>(value.x, value.y);
}
for (int i = 0; i < (int) reference.size(); i++) {
if (realToComplex)
reference[i] = t_complex(i%2 == 0 ? original[i/2].x : original[i/2].y, 0);
reference[i] = complex<double>(i%2 == 0 ? original[i/2].x : original[i/2].y, 0);
else
reference[i] = t_complex(original[i].x, original[i].y);
reference[i] = complex<double>(original[i].x, original[i].y);
}
HipArray grid1(context, original.size(), sizeof(Real2), "grid1");
HipArray grid2(context, original.size(), sizeof(Real2), "grid2");
......@@ -91,19 +95,21 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e
fft.execFFT(true);
vector<Real2> result;
grid2.download(result);
fftpack_t plan;
fftpack_init_3d(&plan, xsize, ysize, zsize);
fftpack_exec_3d(plan, FFTPACK_FORWARD, &reference[0], &reference[0]);
vector<size_t> shape = {(size_t) xsize, (size_t) ysize, (size_t) zsize};
vector<size_t> axes = {0, 1, 2};
vector<ptrdiff_t> stride = {(ptrdiff_t) (ysize*zsize*sizeof(complex<double>)),
(ptrdiff_t) (zsize*sizeof(complex<double>)),
(ptrdiff_t) sizeof(complex<double>)};
pocketfft::c2c(shape, stride, stride, axes, true, reference.data(), reference.data(), 1.0);
int outputZSize = (realToComplex ? zsize/2+1 : zsize);
for (int x = 0; x < xsize; x++)
for (int y = 0; y < ysize; y++)
for (int z = 0; z < outputZSize; z++) {
int index1 = x*ysize*zsize + y*zsize + z;
int index2 = x*ysize*outputZSize + y*outputZSize + z;
ASSERT_EQUAL_TOL(reference[index1].re, result[index2].x, 1e-3 * eps);
ASSERT_EQUAL_TOL(reference[index1].im, result[index2].y, 1e-3 * eps);
ASSERT_EQUAL_TOL(reference[index1].real(), result[index2].x, 1e-3 * eps);
ASSERT_EQUAL_TOL(reference[index1].imag(), result[index2].y, 1e-3 * eps);
}
fftpack_destroy(plan);
// Perform a backward transform and see if we get the original values.
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Portions copyright (c) 2008-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
......@@ -50,12 +50,18 @@ void testParallelComputation(NonbondedForce::NonbondedMethod method) {
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++)
positions[i] = Vec3(5*genrand_real2(sfmt), 5*genrand_real2(sfmt), 5*genrand_real2(sfmt));
force->addGlobalParameter("scale", 0.5);
for (int i = 0; i < numParticles; ++i)
for (int j = 0; j < i; ++j) {
Vec3 delta = positions[i]-positions[j];
if (delta.dot(delta) < 0.1)
if (delta.dot(delta) < 0.1) {
force->addException(i, j, 0, 1, 0);
}
else if (delta.dot(delta) < 0.2) {
int index = force->addException(i, j, 0.5, 1, 1.0);
force->addExceptionParameterOffset("scale", index, 0.5, 0.4, 0.3);
}
}
// Create two contexts, one with a single device and one with two devices.
......@@ -179,6 +185,7 @@ void runPlatformTests() {
testParallelComputation(NonbondedForce::NoCutoff);
testParallelComputation(NonbondedForce::Ewald);
testParallelComputation(NonbondedForce::PME);
testParallelComputation(NonbondedForce::LJPME);
testReordering();
testDeterministicForces();
if (canRunHugeTest())
......
......@@ -60,6 +60,7 @@ void testGaussian() {
platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0];
context.initialize();
context.setAsCurrent();
context.getIntegrationUtilities().initRandomNumberGenerator(0);
HipArray& random = context.getIntegrationUtilities().getRandom();
context.getIntegrationUtilities().prepareRandomNumbers(random.getSize());
......
......@@ -70,6 +70,7 @@ void verifySorting(vector<float> array, bool uniform) {
platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0];
context.initialize();
context.setAsCurrent();
HipArray data(context, array.size(), 4, "sortData");
data.upload(array);
HipSort sort(context, new SortTrait(), array.size(), uniform);
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2020 Stanford University and the Authors. *
* Portions copyright (c) 2008-2021 Stanford University and the Authors. *
* Portions copyright (c) 2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Mark Friedrichs *
* Contributors: *
......@@ -29,6 +29,7 @@
#define _USE_MATH_DEFINES // Needed to get M_PI
#endif
#include "AmoebaHipKernels.h"
#include "openmm/common/ContextSelector.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/AmoebaGeneralizedKirkwoodForceImpl.h"
#include "openmm/internal/AmoebaMultipoleForceImpl.h"
......@@ -56,7 +57,7 @@ using namespace std;
* -------------------------------------------------------------------------- */
HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() {
cc.setAsCurrent();
ContextSelector selector(cc);
if (fft != NULL)
delete fft;
}
......@@ -64,6 +65,7 @@ HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() {
void HipCalcAmoebaMultipoleForceKernel::initialize(const System& system, const AmoebaMultipoleForce& force) {
CommonCalcAmoebaMultipoleForceKernel::initialize(system, force);
if (usePME) {
ContextSelector selector(cc);
HipArray& grid1 = cu.unwrap(pmeGrid1);
HipArray& grid2 = cu.unwrap(pmeGrid2);
fft = cu.createFFT(gridSizeX, gridSizeY, gridSizeZ, false, cu.getCurrentStream(), grid1, grid2);
......@@ -79,7 +81,7 @@ void HipCalcAmoebaMultipoleForceKernel::computeFFT(bool forward) {
* -------------------------------------------------------------------------- */
HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() {
cc.setAsCurrent();
ContextSelector selector(cc);
if (sort != NULL)
delete sort;
if (fft != NULL)
......@@ -91,6 +93,7 @@ HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() {
void HipCalcHippoNonbondedForceKernel::initialize(const System& system, const HippoNonbondedForce& force) {
CommonCalcHippoNonbondedForceKernel::initialize(system, force);
if (usePME) {
ContextSelector selector(cc);
sort = new HipSort(cu, new SortTrait(), cc.getNumAtoms());
HipArray& grid1 = cu.unwrap(pmeGrid1);
HipArray& grid2 = cu.unwrap(pmeGrid2);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment