Unverified Commit ecc2d258 authored by Anton Gorenko's avatar Anton Gorenko
Browse files

Port changes from the main repository

Use cuCtxPushCurrent() and cuCtxPopCurrent() for selecting CUDA context

    https://github.com/openmm/openmm/pull/3258

Fixed uninitialized memory access

    https://github.com/openmm/openmm/issues/3392
    https://github.com/openmm/openmm/pull/3399

Fixed potential invalid memory access

    See https://github.com/openmm/openmm/pull/3428

Improved temperature reporting for Drude particles

    https://github.com/openmm/openmm/pull/3486
    https://github.com/openmm/openmm/commit/a5e42f5

Fixed race condition with multiple GPUs

    https://github.com/openmm/openmm/commit/6fb1c8a41edff980862750bc086f6a204eb50941

Use blocking sync when creating events

    https://github.com/openmm/openmm/commit/fe21d5ee4f14673a4ea38b7244991772a64ceec2

Very minor optimizations

    https://github.com/openmm/openmm/commit/109f6b2535da4e0c0dd88007d6ca06b4add2ce81

Use PocketFFT

    https://github.com/openmm/openmm/commit/1dac981a63300a2a53a7925f570995914f7163ed

Improved logic for deciding when to reorder atoms

    https://github.com/openmm/openmm/commit/48664a1f1a4490a4dabc277757545ac070e7b898

Ensure valid atom order after loading a checkpoint

    https://github.com/openmm/openmm/commit/a056d5a3754e193105409afa12c9f0c9a2d972a2

Improve performance running on multiple GPUs

    https://github.com/openmm/openmm/commit/0c82c2647de98da5c6dab7bf7a7b8b19705aadc0

Fixed errors when running on multiple GPUs

    https://github.com/openmm/openmm/commit/ed9df876d43c037c08d4762721e73e5caae086d9

Optimized reducing energy

    https://github.com/openmm/openmm/commit/2975f44
parent f717ed89
......@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -56,7 +56,7 @@ public:
* @param name the name of the array
*/
template <class T>
static HipArray* create(HipContext& context, int size, const std::string& name) {
static HipArray* create(HipContext& context, size_t size, const std::string& name) {
return new HipArray(context, size, sizeof(T), name);
}
/**
......@@ -72,7 +72,7 @@ public:
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
HipArray(HipContext& context, int size, int elementSize, const std::string& name);
HipArray(HipContext& context, size_t size, int elementSize, const std::string& name);
~HipArray();
/**
* Initialize this object.
......@@ -82,7 +82,7 @@ public:
* @param elementSize the size of each element in bytes
* @param name the name of the array
*/
void initialize(ComputeContext& context, int size, int elementSize, const std::string& name);
void initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name);
/**
* Initialize this object. The template argument is the data type of each array element.
*
......@@ -91,13 +91,13 @@ public:
* @param name the name of the array
*/
template <class T>
void initialize(ComputeContext& context, int size, const std::string& name) {
void initialize(ComputeContext& context, size_t size, const std::string& name) {
initialize(context, size, sizeof(T), name);
}
/**
* Recreate the internal storage to have a different size.
*/
void resize(int size);
void resize(size_t size);
/**
* Get whether this array has been initialized.
*/
......@@ -107,7 +107,7 @@ public:
/**
* Get the number of elements in the array.
*/
int getSize() const {
size_t getSize() const {
return size;
}
/**
......@@ -183,7 +183,8 @@ public:
private:
HipContext* context;
hipDeviceptr_t pointer;
int size, elementSize;
size_t size;
int elementSize;
bool ownsMemory;
std::string name;
};
......
......@@ -39,6 +39,7 @@
#include <map>
#include <stack>
#include <string>
#include <utility>
#define __CL_ENABLE_EXCEPTIONS
......@@ -99,10 +100,20 @@ public:
return contextIsValid;
}
/**
* Set the hipCtx_t associated with this object to be the current context. If the context is not
* Set the device associated with this object to be the current device. If the context is not
* valid, this returns without doing anything.
*/
void setAsCurrent();
/**
* Push the device associated with this object to be the current device. If the context is not
* valid, this returns without doing anything.
*/
void pushAsCurrent();
/**
* Pop the device associated with this object off the stack of contexts. If the context is not
* valid, this returns without doing anything.
*/
void popAsCurrent();
/**
* Get the hipDevice_t associated with this object.
*/
......@@ -582,6 +593,10 @@ public:
* expense of reduced simulation performance.
*/
void flushQueue();
/**
* Get the flags that should be used when creating hipEvent_t objects.
*/
unsigned int getEventFlags();
/**
* Get the flags that should be used when allocating pinned host memory.
*/
......@@ -610,6 +625,7 @@ private:
std::map<std::string, std::string> compilationDefines;
std::vector<hipModule_t> loadedModules;
hipDevice_t device;
std::stack<hipDevice_t> outerScopeDevices;
hipStream_t currentStream;
hipFunction_t clearBufferKernel;
hipFunction_t clearTwoBuffersKernel;
......
......@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2008-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -109,6 +109,18 @@ public:
* @param context the context in which to execute this kernel
*/
void setTime(ContextImpl& context, double time);
/**
* Get the current step count
*
* @param context the context in which to execute this kernel
*/
long long getStepCount(const ContextImpl& context) const;
/**
* Set the current step count
*
* @param context the context in which to execute this kernel
*/
void setStepCount(const ContextImpl& context, long long count);
/**
* Get the positions of all particles.
*
......@@ -133,6 +145,15 @@ public:
* @param velocities a vector containg the particle velocities
*/
void setVelocities(ContextImpl& context, const std::vector<Vec3>& velocities);
/**
* Compute velocities, shifted in time to account for a leapfrog integrator. The shift
* is based on the most recently computed forces.
*
* @param context the context in which to execute this kernel
* @param timeShift the amount by which to shift the velocities in time
* @param velocities the shifted velocities are returned in this
*/
void computeShiftedVelocities(ContextImpl& context, double timeShift, std::vector<Vec3>& velocities);
/**
* Get the current forces on all particles.
*
......
......@@ -9,8 +9,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -339,7 +339,7 @@ private:
HipArray rebuildNeighborList;
HipSort* blockSorter;
hipEvent_t downloadCountEvent;
int* pinnedCountBuffer;
unsigned int* pinnedCountBuffer;
std::vector<void*> forceArgs, findBlockBoundsArgs, sortBoxDataArgs, findInteractingBlocksArgs;
std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters;
......@@ -349,8 +349,9 @@ private:
std::map<int, std::string> groupKernelSource;
double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
int startTileIndex, startBlockIndex, numBlocks, maxTiles, maxSinglePairs, numTilesInBatch, maxExclusions;
int startTileIndex, startBlockIndex, numBlocks, numTilesInBatch, maxExclusions;
int numForceThreadBlocks, forceThreadBlockSize, findInteractingBlocksThreadBlockSize, numAtoms, groupFlags;
unsigned int maxTiles, maxSinglePairs, tilesAfterReorder;
long long numTiles;
std::string kernelSource;
};
......
......@@ -91,7 +91,9 @@ private:
long long* pinnedForceBuffer;
hipFunction_t sumKernel;
hipEvent_t event;
hipStream_t peerCopyStream;
std::vector<hipEvent_t> peerCopyEvent;
std::vector<hipEvent_t> peerCopyEventLocal;
std::vector<hipStream_t> peerCopyStream;
};
/**
......
......@@ -126,8 +126,8 @@ public:
std::vector<HipContext*> contexts;
std::vector<double> contextEnergy;
bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream, deterministicForces;
int cmMotionFrequency;
int stepCount, computeForceCount;
int cmMotionFrequency, computeForceCount;
long long stepCount;
double time;
std::map<std::string, std::string> propertyValues;
ThreadPool threads;
......
......@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2012-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -27,6 +27,7 @@
#include "HipArray.h"
#include "HipContext.h"
#include "openmm/common/ContextSelector.h"
#include <iostream>
#include <sstream>
#include <vector>
......@@ -36,13 +37,13 @@ using namespace OpenMM;
HipArray::HipArray() : pointer(0), ownsMemory(false) {
}
HipArray::HipArray(HipContext& context, int size, int elementSize, const std::string& name) : pointer(0) {
HipArray::HipArray(HipContext& context, size_t size, int elementSize, const std::string& name) : pointer(0) {
initialize(context, size, elementSize, name);
}
HipArray::~HipArray() {
if (pointer != 0 && ownsMemory && context->getContextIsValid()) {
context->setAsCurrent();
ContextSelector selector(*context);
hipError_t result = hipFree(pointer);
if (result != hipSuccess) {
std::stringstream str;
......@@ -52,7 +53,7 @@ HipArray::~HipArray() {
}
}
void HipArray::initialize(ComputeContext& context, int size, int elementSize, const std::string& name) {
void HipArray::initialize(ComputeContext& context, size_t size, int elementSize, const std::string& name) {
if (this->pointer != 0)
throw OpenMMException("HipArray has already been initialized");
this->context = &dynamic_cast<HipContext&>(context);
......@@ -60,6 +61,7 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co
this->elementSize = elementSize;
this->name = name;
ownsMemory = true;
ContextSelector selector(*this->context);
hipError_t result = hipMalloc(&pointer, size*elementSize);
if (result != hipSuccess) {
std::stringstream str;
......@@ -68,11 +70,12 @@ void HipArray::initialize(ComputeContext& context, int size, int elementSize, co
}
}
void HipArray::resize(int size) {
void HipArray::resize(size_t size) {
if (pointer == 0)
throw OpenMMException("HipArray has not been initialized");
if (!ownsMemory)
throw OpenMMException("Cannot resize an array that does not own its storage");
ContextSelector selector(*context);
hipError_t result = hipFree(pointer);
if (result != hipSuccess) {
std::stringstream str;
......
......@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2009-2023 Stanford University and the Authors. *
* Portions copyright (c) 2020-2023 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -39,6 +39,7 @@
#include "HipProgram.h"
#include "HipFFT3D.h"
#include "openmm/common/ComputeArray.h"
#include "openmm/common/ContextSelector.h"
#include "SHA1.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
......@@ -184,13 +185,15 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy
}
contextIsValid = true;
ContextSelector selector(*this);
if (contextIndex > 0) {
int canAccess;
CHECK_RESULT(hipDeviceCanAccessPeer(&canAccess, getDevice(), platformData.contexts[0]->getDevice()));
if (canAccess) {
platformData.contexts[0]->setAsCurrent();
CHECK_RESULT(hipDeviceEnablePeerAccess(getDevice(), 0));
setAsCurrent();
{
ContextSelector selector2(*platformData.contexts[0]);
CHECK_RESULT(hipDeviceEnablePeerAccess(getDevice(), 0));
}
CHECK_RESULT(hipDeviceEnablePeerAccess(platformData.contexts[0]->getDevice(), 0));
}
}
......@@ -345,7 +348,7 @@ HipContext::HipContext(const System& system, int deviceIndex, bool useBlockingSy
}
HipContext::~HipContext() {
setAsCurrent();
pushAsCurrent();
for (auto force : forces)
delete force;
for (auto listener : reorderListeners)
......@@ -366,28 +369,29 @@ HipContext::~HipContext() {
delete nonbonded;
for (auto module : loadedModules)
hipModuleUnload(module);
popAsCurrent();
contextIsValid = false;
}
void HipContext::initialize() {
hipSetDevice(device);
ContextSelector selector(*this);
string errorMessage = "Error initializing Context";
int numEnergyBuffers = max(numThreadBlocks*ThreadBlockSize, nonbonded->getNumEnergyBuffers());
if (useDoublePrecision) {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum");
energySum.initialize<double>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
}
else if (useMixedPrecision) {
energyBuffer.initialize<double>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<double>(*this, 1, "energySum");
energySum.initialize<double>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*4, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(double), getHostMallocFlags()));
}
else {
energyBuffer.initialize<float>(*this, numEnergyBuffers, "energyBuffer");
energySum.initialize<float>(*this, 1, "energySum");
energySum.initialize<float>(*this, multiprocessors, "energySum");
int pinnedBufferSize = max(paddedNumAtoms*6, numEnergyBuffers);
CHECK_RESULT(hipHostMalloc(&pinnedBuffer, pinnedBufferSize*sizeof(float), getHostMallocFlags()));
}
......@@ -423,6 +427,29 @@ void HipContext::setAsCurrent() {
hipSetDevice(device);
}
void HipContext::pushAsCurrent() {
if (contextIsValid) {
// Emulate cuCtxPushCurrent's behavior
hipDevice_t outerScopeDevice;
hipGetDevice(&outerScopeDevice);
outerScopeDevices.push(outerScopeDevice);
if (device != outerScopeDevice) {
hipSetDevice(device);
}
}
}
void HipContext::popAsCurrent() {
if (contextIsValid) {
// Emulate cuCtxPopCurrent's behavior
hipDevice_t outerScopeDevice = outerScopeDevices.top();
outerScopeDevices.pop();
if (outerScopeDevice != device) {
hipSetDevice(outerScopeDevice);
}
}
}
string HipContext::getTempFileName() const {
stringstream tempFileName;
tempFileName << tempDir;
......@@ -784,12 +811,18 @@ double HipContext::reduceEnergy() {
int bufferSize = energyBuffer.getSize();
int workGroupSize = getMaxThreadBlockSize();
void* args[] = {&energyBuffer.getDevicePointer(), &energySum.getDevicePointer(), &bufferSize, &workGroupSize};
executeKernel(reduceEnergyKernel, args, workGroupSize, workGroupSize, workGroupSize*energyBuffer.getElementSize());
executeKernel(reduceEnergyKernel, args, workGroupSize*energySum.getSize(), workGroupSize, workGroupSize*energyBuffer.getElementSize());
energySum.download(pinnedBuffer);
if (getUseDoublePrecision() || getUseMixedPrecision())
return *((double*) pinnedBuffer);
else
return *((float*) pinnedBuffer);
double result = 0;
if (getUseDoublePrecision() || getUseMixedPrecision()) {
for (int i = 0; i < energySum.getSize(); i++)
result += ((double*) pinnedBuffer)[i];
}
else {
for (int i = 0; i < energySum.getSize(); i++)
result += ((float*) pinnedBuffer)[i];
}
return result;
}
void HipContext::setCharges(const vector<double>& charges) {
......@@ -850,6 +883,13 @@ vector<int> HipContext::getDevicePrecedence() {
return precedence;
}
unsigned int HipContext::getEventFlags() {
unsigned int flags = hipEventDisableTiming;
if (useBlockingSync)
flags += hipEventBlockingSync;
return flags;
}
unsigned int HipContext::getHostMallocFlags() {
#ifdef WIN32
return hipHostMallocDefault;
......
......@@ -31,7 +31,7 @@
using namespace OpenMM;
HipEvent::HipEvent(HipContext& context) : context(context), eventCreated(false) {
hipError_t result = hipEventCreateWithFlags(&event, hipEventDisableTiming);
hipError_t result = hipEventCreateWithFlags(&event, context.getEventFlags());
if (result != hipSuccess)
throw OpenMMException("Error creating HIP event:"+HipContext::getErrorString(result));
eventCreated = true;
......
......@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2009-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020-2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -27,6 +27,7 @@
#include "HipIntegrationUtilities.h"
#include "HipContext.h"
#include "openmm/common/ContextSelector.h"
using namespace OpenMM;
using namespace std;
......@@ -41,13 +42,13 @@ using namespace std;
HipIntegrationUtilities::HipIntegrationUtilities(HipContext& context, const System& system) : IntegrationUtilities(context, system),
ccmaConvergedMemory(NULL) {
CHECK_RESULT2(hipEventCreateWithFlags(&ccmaEvent, hipEventDisableTiming), "Error creating event for CCMA");
CHECK_RESULT2(hipEventCreateWithFlags(&ccmaEvent, context.getEventFlags()), "Error creating event for CCMA");
CHECK_RESULT2(hipHostMalloc((void**) &ccmaConvergedMemory, sizeof(int), context.getHostMallocFlags()), "Error allocating pinned memory");
CHECK_RESULT2(hipHostGetDevicePointer(&ccmaConvergedDeviceMemory, ccmaConvergedMemory, 0), "Error getting device address for pinned memory");
}
HipIntegrationUtilities::~HipIntegrationUtilities() {
context.setAsCurrent();
ContextSelector selector(context);
if (ccmaConvergedMemory != NULL) {
hipHostFree(ccmaConvergedMemory);
hipEventDestroy(ccmaEvent);
......@@ -67,6 +68,7 @@ HipArray& HipIntegrationUtilities::getStepSize() {
}
void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, double tol) {
ContextSelector selector(context);
ComputeKernel settleKernel, shakeKernel, ccmaForceKernel;
if (constrainVelocities) {
settleKernel = settleVelKernel;
......@@ -132,6 +134,7 @@ void HipIntegrationUtilities::applyConstraintsImpl(bool constrainVelocities, dou
}
void HipIntegrationUtilities::distributeForcesFromVirtualSites() {
ContextSelector selector(context);
if (numVsites > 0) {
vsiteForceKernel->setArg(2, context.getLongForceBuffer());
vsiteForceKernel->execute(numVsites);
......
This diff is collapsed.
......@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2018 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2009-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -439,6 +439,10 @@ void HipNonbondedUtilities::computeInteractions(int forceGroups, bool includeFor
bool HipNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff)
return false;
if (context.getStepsSinceReorder() == 0)
tilesAfterReorder = pinnedCountBuffer[0];
else if (context.getStepsSinceReorder() > 25 && pinnedCountBuffer[0] > 1.1*tilesAfterReorder)
context.forceReorder();
if (pinnedCountBuffer[0] <= maxTiles && pinnedCountBuffer[1] <= maxSinglePairs)
return false;
......@@ -446,12 +450,13 @@ bool HipNonbondedUtilities::updateNeighborListSize() {
// this from happening in the future.
if (pinnedCountBuffer[0] > maxTiles) {
maxTiles = (int) (1.2*pinnedCountBuffer[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
maxTiles = (unsigned int) (1.2*pinnedCountBuffer[0]);
unsigned int numBlocks = context.getNumAtomBlocks();
int totalTiles = numBlocks*(numBlocks+1)/2;
if (maxTiles > totalTiles)
maxTiles = totalTiles;
interactingTiles.resize(maxTiles);
interactingAtoms.resize(HipContext::TileSize*maxTiles);
interactingAtoms.resize(HipContext::TileSize*(size_t) maxTiles);
if (forceArgs.size() > 0)
forceArgs[7] = &interactingTiles.getDevicePointer();
findInteractingBlocksArgs[6] = &interactingTiles.getDevicePointer();
......
......@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2019 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2011-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020-2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -27,6 +27,7 @@
#include "HipParallelKernels.h"
#include "HipKernelSources.h"
#include "openmm/common/ContextSelector.h"
using namespace OpenMM;
using namespace std;
......@@ -70,7 +71,7 @@ public:
void execute() {
// Copy coordinates over to this device and execute the kernel.
cu.setAsCurrent();
ContextSelector selector(cu);
if (cu.getContextIndex() > 0) {
hipStreamWaitEvent(cu.getCurrentStream(), event, 0);
if (!cu.getPlatformData().peerAccessSupported)
......@@ -94,13 +95,16 @@ private:
class HipParallelCalcForcesAndEnergyKernel::FinishComputationTask : public HipContext::WorkTask {
public:
FinishComputationTask(ContextImpl& context, HipContext& cu, HipCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, HipArray& contextForces, bool& valid, int2& interactionCount) :
bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, HipArray& contextForces,
bool& valid, int2& interactionCount, hipStream_t stream, hipEvent_t event, hipEvent_t localEvent) :
context(context), cu(cu), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy),
completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces), valid(valid), interactionCount(interactionCount) {
completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces), valid(valid), interactionCount(interactionCount),
stream(stream), event(event), localEvent(localEvent) {
}
void execute() {
// Execute the kernel, then download forces.
ContextSelector selector(cu);
energy += kernel.finishComputation(context, includeForce, includeEnergy, groups, valid);
if (cu.getComputeForceCount() < 200) {
// Record timing information for load balancing. Since this takes time, only do it at the start of the simulation.
......@@ -110,13 +114,16 @@ public:
}
if (includeForce) {
if (cu.getContextIndex() > 0) {
hipEventRecord(localEvent, cu.getCurrentStream());
hipStreamWaitEvent(stream, localEvent, 0);
int numAtoms = cu.getPaddedNumAtoms();
if (cu.getPlatformData().peerAccessSupported) {
int numBytes = numAtoms*3*sizeof(long long);
int offset = (cu.getContextIndex()-1)*numBytes;
HipContext& context0 = *cu.getPlatformData().contexts[0];
CHECK_RESULT(hipMemcpy(static_cast<char*>(contextForces.getDevicePointer())+offset,
cu.getForce().getDevicePointer(), numBytes, hipMemcpyDeviceToDevice), "Error copying forces");
CHECK_RESULT(hipMemcpyAsync(static_cast<char*>(contextForces.getDevicePointer())+offset,
cu.getForce().getDevicePointer(), numBytes, hipMemcpyDeviceToDevice, stream), "Error copying forces");
hipEventRecord(event, stream);
}
else
cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]);
......@@ -140,6 +147,9 @@ private:
HipArray& contextForces;
bool& valid;
int2& interactionCount;
hipStream_t stream;
hipEvent_t event;
hipEvent_t localEvent;
};
HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, HipPlatform::PlatformData& data) :
......@@ -150,20 +160,25 @@ HipParallelCalcForcesAndEnergyKernel::HipParallelCalcForcesAndEnergyKernel(strin
}
HipParallelCalcForcesAndEnergyKernel::~HipParallelCalcForcesAndEnergyKernel() {
data.contexts[0]->setAsCurrent();
ContextSelector selector(*data.contexts[0]);
if (pinnedPositionBuffer != NULL)
hipHostFree(pinnedPositionBuffer);
if (pinnedForceBuffer != NULL)
hipHostFree(pinnedForceBuffer);
hipEventDestroy(event);
hipStreamDestroy(peerCopyStream);
for (int i = 0; i < peerCopyEvent.size(); i++)
hipEventDestroy(peerCopyEvent[i]);
for (int i = 0; i < peerCopyEventLocal.size(); i++)
hipEventDestroy(peerCopyEventLocal[i]);
for (int i = 0; i < peerCopyStream.size(); i++)
hipStreamDestroy(peerCopyStream[i]);
if (interactionCounts != NULL)
hipHostFree(interactionCounts);
}
void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
HipContext& cu = *data.contexts[0];
cu.setAsCurrent();
ContextSelector selector(cu);
hipModule_t module = cu.createModule(HipKernelSources::parallel);
sumKernel = cu.getKernel(module, "sumForces");
int numContexts = data.contexts.size();
......@@ -171,14 +186,25 @@ void HipParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
getKernel(i).initialize(system);
for (int i = 0; i < numContexts; i++)
contextNonbondedFractions[i] = 1/(double) numContexts;
CHECK_RESULT(hipEventCreateWithFlags(&event, 0), "Error creating event");
CHECK_RESULT(hipStreamCreateWithFlags(&peerCopyStream, hipStreamNonBlocking), "Error creating stream");
CHECK_RESULT(hipEventCreateWithFlags(&event, cu.getEventFlags()), "Error creating event");
peerCopyEvent.resize(numContexts);
peerCopyEventLocal.resize(numContexts);
peerCopyStream.resize(numContexts);
for (int i = 0; i < numContexts; i++) {
CHECK_RESULT(hipEventCreateWithFlags(&peerCopyEvent[i], cu.getEventFlags()), "Error creating event");
CHECK_RESULT(hipStreamCreateWithFlags(&peerCopyStream[i], hipStreamNonBlocking), "Error creating stream");
}
for (int i = 0; i < numContexts; i++) {
HipContext& cuLocal = *data.contexts[i];
ContextSelector selectorLocal(cuLocal);
CHECK_RESULT(hipEventCreateWithFlags(&peerCopyEventLocal[i], cu.getEventFlags()), "Error creating event");
}
CHECK_RESULT(hipHostMalloc((void**) &interactionCounts, numContexts*sizeof(int2), 0), "Error creating interaction counts buffer");
}
void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
HipContext& cu = *data.contexts[0];
cu.setAsCurrent();
ContextSelector selector(cu);
if (!contextForces.isInitialized()) {
contextForces.initialize<long long>(cu, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms(), "contextForces");
CHECK_RESULT(hipHostMalloc((void**) &pinnedForceBuffer, 3*(data.contexts.size()-1)*cu.getPaddedNumAtoms()*sizeof(long long), hipHostMallocPortable), "Error allocating pinned memory");
......@@ -194,36 +220,44 @@ void HipParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context
else {
int numBytes = cu.getPosq().getSize()*cu.getPosq().getElementSize();
hipEventRecord(event, cu.getCurrentStream());
hipStreamWaitEvent(peerCopyStream, event, 0);
for (int i = 1; i < (int) data.contexts.size(); i++)
for (int i = 1; i < (int) data.contexts.size(); i++) {
hipStreamWaitEvent(peerCopyStream[i], event, 0);
CHECK_RESULT(hipMemcpyAsync(
data.contexts[i]->getPosq().getDevicePointer(),
cu.getPosq().getDevicePointer(), numBytes,
hipMemcpyDeviceToDevice, peerCopyStream), "Error copying positions");
hipEventRecord(event, peerCopyStream);
hipMemcpyDeviceToDevice, peerCopyStream[i]), "Error copying positions");
hipEventRecord(peerCopyEvent[i], peerCopyStream[i]);
}
}
for (int i = 0; i < (int) data.contexts.size(); i++) {
data.contextEnergy[i] = 0.0;
HipContext& cu = *data.contexts[i];
ComputeContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, event, interactionCounts[i]));
hipEvent_t waitEvent = (cu.getPlatformData().peerAccessSupported ? peerCopyEvent[i] : event);
thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, waitEvent, interactionCounts[i]));
}
data.syncContexts();
}
double HipParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
for (int i = 0; i < (int) data.contexts.size(); i++) {
HipContext& cu = *data.contexts[i];
ComputeContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, contextForces, valid, interactionCounts[i]));
thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i],
pinnedForceBuffer, contextForces, valid, interactionCounts[i], peerCopyStream[i], peerCopyEvent[i], peerCopyEventLocal[i]));
}
data.syncContexts();
HipContext& cu = *data.contexts[0];
ContextSelector selector(cu);
if (cu.getPlatformData().peerAccessSupported)
for (int i = 1; i < data.contexts.size(); i++)
hipStreamWaitEvent(cu.getCurrentStream(), peerCopyEvent[i], 0);
double energy = 0.0;
for (int i = 0; i < (int) data.contextEnergy.size(); i++)
energy += data.contextEnergy[i];
if (includeForce && valid) {
// Sum the forces from all devices.
HipContext& cu = *data.contexts[0];
if (!cu.getPlatformData().peerAccessSupported)
contextForces.upload(pinnedForceBuffer, false);
int bufferSize = 3*cu.getPaddedNumAtoms();
......
......@@ -80,7 +80,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
extern __shared__ mixed tempBuffer[];
const unsigned int thread = threadIdx.x;
mixed sum = 0;
for (unsigned int index = thread; index < bufferSize; index += blockDim.x)
for (unsigned int index = blockDim.x*blockIdx.x+threadIdx.x; index < bufferSize; index += blockDim.x*gridDim.x)
sum += energyBuffer[index];
tempBuffer[thread] = sum;
for (int i = 1; i < workGroupSize; i *= 2) {
......@@ -89,7 +89,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
tempBuffer[thread] += tempBuffer[thread+i];
}
if (thread == 0)
*result = tempBuffer[0];
result[blockIdx.x] = tempBuffer[0];
}
/**
......
......@@ -41,4 +41,6 @@ OpenMM::HipPlatform platform;
void initializeTests(int argc, char* argv[]) {
if (argc > 1)
platform.setPropertyDefaultValue("Precision", std::string(argv[1]));
if (argc > 2)
platform.setPropertyDefaultValue("DeviceIndex", std::string(argv[2]));
}
......@@ -6,8 +6,8 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Portions copyright (c) 2011-2022 Stanford University and the Authors. *
* Portions copyright (c) 2020-2022 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
* *
......@@ -38,12 +38,16 @@
#include "HipArray.h"
#include "HipContext.h"
#include "HipFFT3D.h"
#include "fftpack.h"
#include "sfmt/SFMT.h"
#include "openmm/System.h"
#include <complex>
#include <iostream>
#include <cmath>
#include <set>
#ifdef _MSC_VER
#define POCKETFFT_NO_VECTORS
#endif
#include "pocketfft_hdronly.h"
using namespace OpenMM;
using namespace std;
......@@ -67,19 +71,19 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<Real2> original(xsize*ysize*zsize);
vector<t_complex> reference(original.size());
vector<complex<double>> reference(original.size());
for (int i = 0; i < (int) original.size(); i++) {
Real2 value;
value.x = (float) genrand_real2(sfmt);
value.y = (float) genrand_real2(sfmt);
original[i] = value;
reference[i] = t_complex(value.x, value.y);
reference[i] = complex<double>(value.x, value.y);
}
for (int i = 0; i < (int) reference.size(); i++) {
if (realToComplex)
reference[i] = t_complex(i%2 == 0 ? original[i/2].x : original[i/2].y, 0);
reference[i] = complex<double>(i%2 == 0 ? original[i/2].x : original[i/2].y, 0);
else
reference[i] = t_complex(original[i].x, original[i].y);
reference[i] = complex<double>(original[i].x, original[i].y);
}
HipArray grid1(context, original.size(), sizeof(Real2), "grid1");
HipArray grid2(context, original.size(), sizeof(Real2), "grid2");
......@@ -91,19 +95,21 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize, double e
fft.execFFT(true);
vector<Real2> result;
grid2.download(result);
fftpack_t plan;
fftpack_init_3d(&plan, xsize, ysize, zsize);
fftpack_exec_3d(plan, FFTPACK_FORWARD, &reference[0], &reference[0]);
vector<size_t> shape = {(size_t) xsize, (size_t) ysize, (size_t) zsize};
vector<size_t> axes = {0, 1, 2};
vector<ptrdiff_t> stride = {(ptrdiff_t) (ysize*zsize*sizeof(complex<double>)),
(ptrdiff_t) (zsize*sizeof(complex<double>)),
(ptrdiff_t) sizeof(complex<double>)};
pocketfft::c2c(shape, stride, stride, axes, true, reference.data(), reference.data(), 1.0);
int outputZSize = (realToComplex ? zsize/2+1 : zsize);
for (int x = 0; x < xsize; x++)
for (int y = 0; y < ysize; y++)
for (int z = 0; z < outputZSize; z++) {
int index1 = x*ysize*zsize + y*zsize + z;
int index2 = x*ysize*outputZSize + y*outputZSize + z;
ASSERT_EQUAL_TOL(reference[index1].re, result[index2].x, 1e-3 * eps);
ASSERT_EQUAL_TOL(reference[index1].im, result[index2].y, 1e-3 * eps);
ASSERT_EQUAL_TOL(reference[index1].real(), result[index2].x, 1e-3 * eps);
ASSERT_EQUAL_TOL(reference[index1].imag(), result[index2].y, 1e-3 * eps);
}
fftpack_destroy(plan);
// Perform a backward transform and see if we get the original values.
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Portions copyright (c) 2008-2021 Stanford University and the Authors. *
* Portions copyright (c) 2020 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Nicholas Curtis *
* Contributors: *
......@@ -50,13 +50,19 @@ void testParallelComputation(NonbondedForce::NonbondedMethod method) {
vector<Vec3> positions(numParticles);
for (int i = 0; i < numParticles; i++)
positions[i] = Vec3(5*genrand_real2(sfmt), 5*genrand_real2(sfmt), 5*genrand_real2(sfmt));
force->addGlobalParameter("scale", 0.5);
for (int i = 0; i < numParticles; ++i)
for (int j = 0; j < i; ++j) {
Vec3 delta = positions[i]-positions[j];
if (delta.dot(delta) < 0.1)
if (delta.dot(delta) < 0.1) {
force->addException(i, j, 0, 1, 0);
}
else if (delta.dot(delta) < 0.2) {
int index = force->addException(i, j, 0.5, 1, 1.0);
force->addExceptionParameterOffset("scale", index, 0.5, 0.4, 0.3);
}
}
// Create two contexts, one with a single device and one with two devices.
VerletIntegrator integrator1(0.01);
......@@ -179,6 +185,7 @@ void runPlatformTests() {
testParallelComputation(NonbondedForce::NoCutoff);
testParallelComputation(NonbondedForce::Ewald);
testParallelComputation(NonbondedForce::PME);
testParallelComputation(NonbondedForce::LJPME);
testReordering();
testDeterministicForces();
if (canRunHugeTest())
......
......@@ -60,6 +60,7 @@ void testGaussian() {
platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0];
context.initialize();
context.setAsCurrent();
context.getIntegrationUtilities().initRandomNumberGenerator(0);
HipArray& random = context.getIntegrationUtilities().getRandom();
context.getIntegrationUtilities().prepareRandomNumbers(random.getSize());
......
......@@ -70,6 +70,7 @@ void verifySorting(vector<float> array, bool uniform) {
platform.getPropertyDefaultValue(HipPlatform::HipDisablePmeStream()), "false", 1, NULL);
HipContext& context = *platformData.contexts[0];
context.initialize();
context.setAsCurrent();
HipArray data(context, array.size(), 4, "sortData");
data.upload(array);
HipSort sort(context, new SortTrait(), array.size(), uniform);
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2020 Stanford University and the Authors. *
* Portions copyright (c) 2008-2021 Stanford University and the Authors. *
* Portions copyright (c) 2021 Advanced Micro Devices, Inc. *
* Authors: Peter Eastman, Mark Friedrichs *
* Contributors: *
......@@ -29,6 +29,7 @@
#define _USE_MATH_DEFINES // Needed to get M_PI
#endif
#include "AmoebaHipKernels.h"
#include "openmm/common/ContextSelector.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/AmoebaGeneralizedKirkwoodForceImpl.h"
#include "openmm/internal/AmoebaMultipoleForceImpl.h"
......@@ -56,7 +57,7 @@ using namespace std;
* -------------------------------------------------------------------------- */
HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() {
cc.setAsCurrent();
ContextSelector selector(cc);
if (fft != NULL)
delete fft;
}
......@@ -64,6 +65,7 @@ HipCalcAmoebaMultipoleForceKernel::~HipCalcAmoebaMultipoleForceKernel() {
void HipCalcAmoebaMultipoleForceKernel::initialize(const System& system, const AmoebaMultipoleForce& force) {
CommonCalcAmoebaMultipoleForceKernel::initialize(system, force);
if (usePME) {
ContextSelector selector(cc);
HipArray& grid1 = cu.unwrap(pmeGrid1);
HipArray& grid2 = cu.unwrap(pmeGrid2);
fft = cu.createFFT(gridSizeX, gridSizeY, gridSizeZ, false, cu.getCurrentStream(), grid1, grid2);
......@@ -79,7 +81,7 @@ void HipCalcAmoebaMultipoleForceKernel::computeFFT(bool forward) {
* -------------------------------------------------------------------------- */
HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() {
cc.setAsCurrent();
ContextSelector selector(cc);
if (sort != NULL)
delete sort;
if (fft != NULL)
......@@ -91,6 +93,7 @@ HipCalcHippoNonbondedForceKernel::~HipCalcHippoNonbondedForceKernel() {
void HipCalcHippoNonbondedForceKernel::initialize(const System& system, const HippoNonbondedForce& force) {
CommonCalcHippoNonbondedForceKernel::initialize(system, force);
if (usePME) {
ContextSelector selector(cc);
sort = new HipSort(cu, new SortTrait(), cc.getNumAtoms());
HipArray& grid1 = cu.unwrap(pmeGrid1);
HipArray& grid2 = cu.unwrap(pmeGrid2);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment