"vscode:/vscode.git/clone" did not exist on "2a1569a7ee064aa618f4bac455fb890a86e2e21a"
Commit 669e314e authored by John Chodera (MSKCC)'s avatar John Chodera (MSKCC)
Browse files

Merge remote-tracking branch 'upstream/master' into fix-mts

parents 283b1acd 388bf972
...@@ -109,7 +109,7 @@ before_install: ...@@ -109,7 +109,7 @@ before_install:
sudo easy_install pytest; sudo easy_install pytest;
fi fi
- if [[ "$OPENCL" == "true" ]]; then - if [[ "$OPENCL" == "true" ]]; then
wget https://jenkins.choderalab.org/userContent/AMD-APP-SDKInstaller-v3.0.130.135-GA-linux64.tar.bz2; wget http://s3.amazonaws.com/omnia-ci/AMD-APP-SDKInstaller-v3.0.130.135-GA-linux64.tar.bz2;
tar -xjf AMD-APP-SDK*.tar.bz2; tar -xjf AMD-APP-SDK*.tar.bz2;
AMDAPPSDK=${HOME}/AMDAPPSDK; AMDAPPSDK=${HOME}/AMDAPPSDK;
export OPENCL_VENDOR_PATH=${AMDAPPSDK}/etc/OpenCL/vendors; export OPENCL_VENDOR_PATH=${AMDAPPSDK}/etc/OpenCL/vendors;
......
[![Build Status](https://travis-ci.org/pandegroup/openmm.svg?branch=master)](https://travis-ci.org/pandegroup/openmm)
[![Anaconda Cloud Badge](https://anaconda.org/omnia/openmm/badges/downloads.svg)](https://anaconda.org/omnia/openmm)
## OpenMM: A High Performance Molecular Dynamics Library ## OpenMM: A High Performance Molecular Dynamics Library
Introduction Introduction
...@@ -10,15 +13,3 @@ Getting Help ...@@ -10,15 +13,3 @@ Getting Help
------------ ------------
Need Help? Check out the [documentation](http://docs.openmm.org/) and [discussion forums](https://simtk.org/forums/viewforum.php?f=161). Need Help? Check out the [documentation](http://docs.openmm.org/) and [discussion forums](https://simtk.org/forums/viewforum.php?f=161).
[C++ API Reference](http://docs.openmm.org/6.3.0/api-c++/namespaceOpenMM.html)
[Python API Reference](http://docs.openmm.org/6.3.0/api-python/annotated.html)
Badges
------
* Travis CI `linux` and `osx` integration tests:
* GitHub master [![Build Status](https://travis-ci.org/pandegroup/openmm.svg?branch=master)](https://travis-ci.org/pandegroup/openmm)
* `openmm-dev` recipe [![Build Status](https://travis-ci.org/omnia-md/conda-dev-recipes.svg?branch=master)](https://travis-ci.org/omnia-md/conda-dev-recipes)
* Anaconda Cloud `openmm` conda release: [![Binstar `openmm` conda release](https://anaconda.org/omnia/openmm/badges/version.svg)](https://anaconda.org/omnia/openmm)
* Anaconda Cloud `openmm-dev` conda package: [![Binstar `openmm-dev` conda package](https://anaconda.org/omnia/openmm-dev/badges/version.svg)](https://anaconda.org/omnia/openmm-dev)
...@@ -78,8 +78,9 @@ public: ...@@ -78,8 +78,9 @@ public:
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded * @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param kernel the code to evaluate the interaction * @param kernel the code to evaluate the interaction
* @param forceGroup the force group in which the interaction should be calculated * @param forceGroup the force group in which the interaction should be calculated
* @param supportsPairList specifies whether this interaction can work with a neighbor list that uses a separate pair list
*/ */
void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup); void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel, int forceGroup, bool supportsPairList=false);
/** /**
* Add a per-atom parameter that the default interaction kernel may depend on. * Add a per-atom parameter that the default interaction kernel may depend on.
*/ */
...@@ -189,6 +190,12 @@ public: ...@@ -189,6 +190,12 @@ public:
CudaArray& getInteractingAtoms() { CudaArray& getInteractingAtoms() {
return *interactingAtoms; return *interactingAtoms;
} }
/**
* Get the array containing single pairs in the neighbor list.
*/
CudaArray& getSinglePairs() {
return *singlePairs;
}
/** /**
* Get the array containing exclusion flags. * Get the array containing exclusion flags.
*/ */
...@@ -270,6 +277,8 @@ private: ...@@ -270,6 +277,8 @@ private:
CudaArray* interactingTiles; CudaArray* interactingTiles;
CudaArray* interactingAtoms; CudaArray* interactingAtoms;
CudaArray* interactionCount; CudaArray* interactionCount;
CudaArray* singlePairs;
CudaArray* singlePairCount;
CudaArray* blockCenter; CudaArray* blockCenter;
CudaArray* blockBoundingBox; CudaArray* blockBoundingBox;
CudaArray* sortedBlocks; CudaArray* sortedBlocks;
...@@ -288,8 +297,8 @@ private: ...@@ -288,8 +297,8 @@ private:
std::map<int, double> groupCutoff; std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource; std::map<int, std::string> groupKernelSource;
double lastCutoff; double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList; bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList, canUsePairList;
int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags; int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, maxSinglePairs, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags;
}; };
/** /**
......
...@@ -83,7 +83,7 @@ private: ...@@ -83,7 +83,7 @@ private:
std::vector<Kernel> kernels; std::vector<Kernel> kernels;
std::vector<long long> completionTimes; std::vector<long long> completionTimes;
std::vector<double> contextNonbondedFractions; std::vector<double> contextNonbondedFractions;
int* tileCounts; int2* interactionCounts;
CudaArray* contextForces; CudaArray* contextForces;
void* pinnedPositionBuffer; void* pinnedPositionBuffer;
long long* pinnedForceBuffer; long long* pinnedForceBuffer;
......
...@@ -130,8 +130,10 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -130,8 +130,10 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
#endif #endif
struct stat info; struct stat info;
isNvccAvailable = (res == 0 && stat(tempDir.c_str(), &info) == 0); isNvccAvailable = (res == 0 && stat(tempDir.c_str(), &info) == 0);
int cudaDriverVersion;
cuDriverGetVersion(&cudaDriverVersion);
static bool hasShownNvccWarning = false; static bool hasShownNvccWarning = false;
if (hasCompilerKernel && !isNvccAvailable && !hasShownNvccWarning) { if (hasCompilerKernel && !isNvccAvailable && !hasShownNvccWarning && cudaDriverVersion < 8000) {
hasShownNvccWarning = true; hasShownNvccWarning = true;
printf("Could not find nvcc. Using runtime compiler, which may produce slower performance. "); printf("Could not find nvcc. Using runtime compiler, which may produce slower performance. ");
#ifdef WIN32 #ifdef WIN32
...@@ -208,14 +210,14 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -208,14 +210,14 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
int major, minor; int major, minor;
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device)); CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
int numThreadBlocksPerComputeUnit = (major >= 6 ? 4 : 6); int numThreadBlocksPerComputeUnit = (major >= 6 ? 4 : 6);
#if __CUDA_API_VERSION < 7000 if (cudaDriverVersion < 7000) {
// This is a workaround to support GTX 980 with CUDA 6.5. It reports // This is a workaround to support GTX 980 with CUDA 6.5. It reports
// its compute capability as 5.2, but the compiler doesn't support // its compute capability as 5.2, but the compiler doesn't support
// anything beyond 5.0. // anything beyond 5.0.
if (major == 5) if (major == 5)
minor = 0; minor = 0;
#endif }
#if __CUDA_API_VERSION < 8000 if (cudaDriverVersion < 8000) {
// This is a workaround to support Pascal with CUDA 7.5. It reports // This is a workaround to support Pascal with CUDA 7.5. It reports
// its compute capability as 6.x, but the compiler doesn't support // its compute capability as 6.x, but the compiler doesn't support
// anything beyond 5.3. // anything beyond 5.3.
...@@ -223,7 +225,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -223,7 +225,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
major = 5; major = 5;
minor = 3; minor = 3;
} }
#endif }
gpuArchitecture = intToString(major)+intToString(minor); gpuArchitecture = intToString(major)+intToString(minor);
computeCapability = major+0.1*minor; computeCapability = major+0.1*minor;
......
...@@ -1920,7 +1920,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1920,7 +1920,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Add the interaction to the default nonbonded kernel. // Add the interaction to the default nonbonded kernel.
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines); string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup()); cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
if (hasLJ) if (hasLJ)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon->getDevicePointer())); cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon->getDevicePointer()));
...@@ -2314,7 +2314,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const ...@@ -2314,7 +2314,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
if (force.getNumInteractionGroups() > 0) if (force.getNumInteractionGroups() > 0)
initInteractionGroups(force, source, tableTypes); initInteractionGroups(force, source, tableTypes);
else { else {
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup()); cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory())); cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"params"+cu.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
......
...@@ -64,15 +64,16 @@ private: ...@@ -64,15 +64,16 @@ private:
CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true), CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true),
exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL), exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL),
interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL), interactionCount(NULL), singlePairs(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL),
oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), pinnedCountBuffer(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0) { oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), pinnedCountBuffer(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0),
canUsePairList(true) {
// Decide how many thread blocks to use. // Decide how many thread blocks to use.
string errorMessage = "Error initializing nonbonded utilities"; string errorMessage = "Error initializing nonbonded utilities";
int multiprocessors; int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, context.getDevice())); CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, context.getDevice()));
CHECK_RESULT(cuEventCreate(&downloadCountEvent, 0)); CHECK_RESULT(cuEventCreate(&downloadCountEvent, 0));
CHECK_RESULT(cuMemHostAlloc((void**) &pinnedCountBuffer, sizeof(int), CU_MEMHOSTALLOC_PORTABLE)); CHECK_RESULT(cuMemHostAlloc((void**) &pinnedCountBuffer, 2*sizeof(int), CU_MEMHOSTALLOC_PORTABLE));
numForceThreadBlocks = 4*multiprocessors; numForceThreadBlocks = 4*multiprocessors;
forceThreadBlockSize = (context.getComputeCapability() < 2.0 ? 128 : 256); forceThreadBlockSize = (context.getComputeCapability() < 2.0 ? 128 : 256);
} }
...@@ -92,6 +93,8 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() { ...@@ -92,6 +93,8 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
delete interactingAtoms; delete interactingAtoms;
if (interactionCount != NULL) if (interactionCount != NULL)
delete interactionCount; delete interactionCount;
if (singlePairs != NULL)
delete singlePairs;
if (blockCenter != NULL) if (blockCenter != NULL)
delete blockCenter; delete blockCenter;
if (blockBoundingBox != NULL) if (blockBoundingBox != NULL)
...@@ -113,7 +116,7 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() { ...@@ -113,7 +116,7 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
cuEventDestroy(downloadCountEvent); cuEventDestroy(downloadCountEvent);
} }
void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) { void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup, bool supportsPairList) {
if (groupCutoff.size() > 0) { if (groupCutoff.size() > 0) {
if (usesCutoff != useCutoff) if (usesCutoff != useCutoff)
throw OpenMMException("All Forces must agree on whether to use a cutoff"); throw OpenMMException("All Forces must agree on whether to use a cutoff");
...@@ -128,6 +131,7 @@ void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, ...@@ -128,6 +131,7 @@ void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic,
usePeriodic = usesPeriodic; usePeriodic = usesPeriodic;
groupCutoff[forceGroup] = cutoffDistance; groupCutoff[forceGroup] = cutoffDistance;
groupFlags |= 1<<forceGroup; groupFlags |= 1<<forceGroup;
canUsePairList &= supportsPairList;
if (kernel.size() > 0) { if (kernel.size() > 0) {
if (groupKernelSource.find(forceGroup) == groupKernelSource.end()) if (groupKernelSource.find(forceGroup) == groupKernelSource.end())
groupKernelSource[forceGroup] = ""; groupKernelSource[forceGroup] = "";
...@@ -279,9 +283,11 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -279,9 +283,11 @@ void CudaNonbondedUtilities::initialize(const System& system) {
maxTiles = numTiles; maxTiles = numTiles;
if (maxTiles < 1) if (maxTiles < 1)
maxTiles = 1; maxTiles = 1;
maxSinglePairs = 5*numAtoms;
interactingTiles = CudaArray::create<int>(context, maxTiles, "interactingTiles"); interactingTiles = CudaArray::create<int>(context, maxTiles, "interactingTiles");
interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms");
interactionCount = CudaArray::create<unsigned int>(context, 1, "interactionCount"); interactionCount = CudaArray::create<unsigned int>(context, 2, "interactionCount");
singlePairs = CudaArray::create<int2>(context, maxSinglePairs, "singlePairs");
int elementSize = (context.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (context.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
blockCenter = new CudaArray(context, numAtomBlocks, 4*elementSize, "blockCenter"); blockCenter = new CudaArray(context, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox = new CudaArray(context, numAtomBlocks, 4*elementSize, "blockBoundingBox"); blockBoundingBox = new CudaArray(context, numAtomBlocks, 4*elementSize, "blockBoundingBox");
...@@ -291,7 +297,7 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -291,7 +297,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
oldPositions = new CudaArray(context, numAtoms, 4*elementSize, "oldPositions"); oldPositions = new CudaArray(context, numAtoms, 4*elementSize, "oldPositions");
rebuildNeighborList = CudaArray::create<int>(context, 1, "rebuildNeighborList"); rebuildNeighborList = CudaArray::create<int>(context, 1, "rebuildNeighborList");
blockSorter = new CudaSort(context, new BlockSortTrait(context.getUseDoublePrecision()), numAtomBlocks); blockSorter = new CudaSort(context, new BlockSortTrait(context.getUseDoublePrecision()), numAtomBlocks);
vector<unsigned int> count(1, 0); vector<unsigned int> count(2, 0);
interactionCount->upload(count); interactionCount->upload(count);
} }
...@@ -316,6 +322,8 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -316,6 +322,8 @@ void CudaNonbondedUtilities::initialize(const System& system) {
forceArgs.push_back(&blockCenter->getDevicePointer()); forceArgs.push_back(&blockCenter->getDevicePointer());
forceArgs.push_back(&blockBoundingBox->getDevicePointer()); forceArgs.push_back(&blockBoundingBox->getDevicePointer());
forceArgs.push_back(&interactingAtoms->getDevicePointer()); forceArgs.push_back(&interactingAtoms->getDevicePointer());
forceArgs.push_back(&maxSinglePairs);
forceArgs.push_back(&singlePairs->getDevicePointer());
} }
for (int i = 0; i < (int) parameters.size(); i++) for (int i = 0; i < (int) parameters.size(); i++)
forceArgs.push_back(&parameters[i].getMemory()); forceArgs.push_back(&parameters[i].getMemory());
...@@ -353,8 +361,10 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -353,8 +361,10 @@ void CudaNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksArgs.push_back(&interactionCount->getDevicePointer()); findInteractingBlocksArgs.push_back(&interactionCount->getDevicePointer());
findInteractingBlocksArgs.push_back(&interactingTiles->getDevicePointer()); findInteractingBlocksArgs.push_back(&interactingTiles->getDevicePointer());
findInteractingBlocksArgs.push_back(&interactingAtoms->getDevicePointer()); findInteractingBlocksArgs.push_back(&interactingAtoms->getDevicePointer());
findInteractingBlocksArgs.push_back(&singlePairs->getDevicePointer());
findInteractingBlocksArgs.push_back(&context.getPosq().getDevicePointer()); findInteractingBlocksArgs.push_back(&context.getPosq().getDevicePointer());
findInteractingBlocksArgs.push_back(&maxTiles); findInteractingBlocksArgs.push_back(&maxTiles);
findInteractingBlocksArgs.push_back(&maxSinglePairs);
findInteractingBlocksArgs.push_back(&startBlockIndex); findInteractingBlocksArgs.push_back(&startBlockIndex);
findInteractingBlocksArgs.push_back(&numBlocks); findInteractingBlocksArgs.push_back(&numBlocks);
findInteractingBlocksArgs.push_back(&sortedBlocks->getDevicePointer()); findInteractingBlocksArgs.push_back(&sortedBlocks->getDevicePointer());
...@@ -424,12 +434,13 @@ void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeFo ...@@ -424,12 +434,13 @@ void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeFo
bool CudaNonbondedUtilities::updateNeighborListSize() { bool CudaNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff) if (!useCutoff)
return false; return false;
if (pinnedCountBuffer[0] <= (unsigned int) maxTiles) if (pinnedCountBuffer[0] <= maxTiles && pinnedCountBuffer[1] <= maxSinglePairs)
return false; return false;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent // The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future. // this from happening in the future.
if (pinnedCountBuffer[0] > maxTiles) {
maxTiles = (int) (1.2*pinnedCountBuffer[0]); maxTiles = (int) (1.2*pinnedCountBuffer[0]);
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2; int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
if (maxTiles > totalTiles) if (maxTiles > totalTiles)
...@@ -446,6 +457,16 @@ bool CudaNonbondedUtilities::updateNeighborListSize() { ...@@ -446,6 +457,16 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
if (forceArgs.size() > 0) if (forceArgs.size() > 0)
forceArgs[17] = &interactingAtoms->getDevicePointer(); forceArgs[17] = &interactingAtoms->getDevicePointer();
findInteractingBlocksArgs[7] = &interactingAtoms->getDevicePointer(); findInteractingBlocksArgs[7] = &interactingAtoms->getDevicePointer();
}
if (pinnedCountBuffer[1] > maxSinglePairs) {
maxSinglePairs = (int) (1.2*pinnedCountBuffer[1]);
delete singlePairs;
singlePairs = NULL; // Avoid an error in the destructor if the following allocation fails
singlePairs = CudaArray::create<int2>(context, maxSinglePairs, "singlePairs");
if (forceArgs.size() > 0)
forceArgs[19] = &singlePairs->getDevicePointer();
findInteractingBlocksArgs[8] = &singlePairs->getDevicePointer();
}
forceRebuildNeighborList = true; forceRebuildNeighborList = true;
context.setForcesValid(false); context.setForcesValid(false);
return true; return true;
...@@ -493,6 +514,7 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) { ...@@ -493,6 +514,7 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
if (usePeriodic) if (usePeriodic)
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
defines["MAX_EXCLUSIONS"] = context.intToString(maxExclusions); defines["MAX_EXCLUSIONS"] = context.intToString(maxExclusions);
defines["MAX_BITS_FOR_PAIRS"] = (canUsePairList ? "2" : "0");
CUmodule interactingBlocksProgram = context.createModule(CudaKernelSources::vectorOps+CudaKernelSources::findInteractingBlocks, defines); CUmodule interactingBlocksProgram = context.createModule(CudaKernelSources::vectorOps+CudaKernelSources::findInteractingBlocks, defines);
kernels.findBlockBoundsKernel = context.getKernel(interactingBlocksProgram, "findBlockBounds"); kernels.findBlockBoundsKernel = context.getKernel(interactingBlocksProgram, "findBlockBounds");
kernels.sortBoxDataKernel = context.getKernel(interactingBlocksProgram, "sortBoxData"); kernels.sortBoxDataKernel = context.getKernel(interactingBlocksProgram, "sortBoxData");
......
...@@ -63,8 +63,8 @@ if (result != CUDA_SUCCESS) { \ ...@@ -63,8 +63,8 @@ if (result != CUDA_SUCCESS) { \
class CudaParallelCalcForcesAndEnergyKernel::BeginComputationTask : public CudaContext::WorkTask { class CudaParallelCalcForcesAndEnergyKernel::BeginComputationTask : public CudaContext::WorkTask {
public: public:
BeginComputationTask(ContextImpl& context, CudaContext& cu, CudaCalcForcesAndEnergyKernel& kernel, BeginComputationTask(ContextImpl& context, CudaContext& cu, CudaCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, int groups, void* pinnedMemory, CUevent event, int& numTiles) : context(context), cu(cu), kernel(kernel), bool includeForce, bool includeEnergy, int groups, void* pinnedMemory, CUevent event, int2& interactionCount) : context(context), cu(cu), kernel(kernel),
includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), pinnedMemory(pinnedMemory), event(event), numTiles(numTiles) { includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), pinnedMemory(pinnedMemory), event(event), interactionCount(interactionCount) {
} }
void execute() { void execute() {
// Copy coordinates over to this device and execute the kernel. // Copy coordinates over to this device and execute the kernel.
...@@ -77,7 +77,7 @@ public: ...@@ -77,7 +77,7 @@ public:
} }
kernel.beginComputation(context, includeForce, includeEnergy, groups); kernel.beginComputation(context, includeForce, includeEnergy, groups);
if (cu.getNonbondedUtilities().getUsePeriodic()) if (cu.getNonbondedUtilities().getUsePeriodic())
cu.getNonbondedUtilities().getInteractionCount().download(&numTiles, false); cu.getNonbondedUtilities().getInteractionCount().download(&interactionCount, false);
} }
private: private:
ContextImpl& context; ContextImpl& context;
...@@ -87,15 +87,15 @@ private: ...@@ -87,15 +87,15 @@ private:
int groups; int groups;
void* pinnedMemory; void* pinnedMemory;
CUevent event; CUevent event;
int& numTiles; int2& interactionCount;
}; };
class CudaParallelCalcForcesAndEnergyKernel::FinishComputationTask : public CudaContext::WorkTask { class CudaParallelCalcForcesAndEnergyKernel::FinishComputationTask : public CudaContext::WorkTask {
public: public:
FinishComputationTask(ContextImpl& context, CudaContext& cu, CudaCalcForcesAndEnergyKernel& kernel, FinishComputationTask(ContextImpl& context, CudaContext& cu, CudaCalcForcesAndEnergyKernel& kernel,
bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, CudaArray& contextForces, bool& valid, int& numTiles) : bool includeForce, bool includeEnergy, int groups, double& energy, long long& completionTime, long long* pinnedMemory, CudaArray& contextForces, bool& valid, int2& interactionCount) :
context(context), cu(cu), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy), context(context), cu(cu), kernel(kernel), includeForce(includeForce), includeEnergy(includeEnergy), groups(groups), energy(energy),
completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces), valid(valid), numTiles(numTiles) { completionTime(completionTime), pinnedMemory(pinnedMemory), contextForces(contextForces), valid(valid), interactionCount(interactionCount) {
} }
void execute() { void execute() {
// Execute the kernel, then download forces. // Execute the kernel, then download forces.
...@@ -120,7 +120,8 @@ public: ...@@ -120,7 +120,8 @@ public:
cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]); cu.getForce().download(&pinnedMemory[(cu.getContextIndex()-1)*numAtoms*3]);
} }
} }
if (cu.getNonbondedUtilities().getUsePeriodic() && numTiles > cu.getNonbondedUtilities().getInteractingTiles().getSize()) { if (cu.getNonbondedUtilities().getUsePeriodic() && (interactionCount.x > cu.getNonbondedUtilities().getInteractingTiles().getSize() ||
interactionCount.y > cu.getNonbondedUtilities().getSinglePairs().getSize())) {
valid = false; valid = false;
cu.getNonbondedUtilities().updateNeighborListSize(); cu.getNonbondedUtilities().updateNeighborListSize();
} }
...@@ -136,12 +137,12 @@ private: ...@@ -136,12 +137,12 @@ private:
long long* pinnedMemory; long long* pinnedMemory;
CudaArray& contextForces; CudaArray& contextForces;
bool& valid; bool& valid;
int& numTiles; int2& interactionCount;
}; };
CudaParallelCalcForcesAndEnergyKernel::CudaParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, CudaPlatform::PlatformData& data) : CudaParallelCalcForcesAndEnergyKernel::CudaParallelCalcForcesAndEnergyKernel(string name, const Platform& platform, CudaPlatform::PlatformData& data) :
CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextNonbondedFractions(data.contexts.size()), CalcForcesAndEnergyKernel(name, platform), data(data), completionTimes(data.contexts.size()), contextNonbondedFractions(data.contexts.size()),
tileCounts(NULL), contextForces(NULL), pinnedPositionBuffer(NULL), pinnedForceBuffer(NULL) { interactionCounts(NULL), contextForces(NULL), pinnedPositionBuffer(NULL), pinnedForceBuffer(NULL) {
for (int i = 0; i < (int) data.contexts.size(); i++) for (int i = 0; i < (int) data.contexts.size(); i++)
kernels.push_back(Kernel(new CudaCalcForcesAndEnergyKernel(name, platform, *data.contexts[i]))); kernels.push_back(Kernel(new CudaCalcForcesAndEnergyKernel(name, platform, *data.contexts[i])));
} }
...@@ -156,8 +157,8 @@ CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel() ...@@ -156,8 +157,8 @@ CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel()
cuMemFreeHost(pinnedForceBuffer); cuMemFreeHost(pinnedForceBuffer);
cuEventDestroy(event); cuEventDestroy(event);
cuStreamDestroy(peerCopyStream); cuStreamDestroy(peerCopyStream);
if (tileCounts != NULL) if (interactionCounts != NULL)
cuMemFreeHost(tileCounts); cuMemFreeHost(interactionCounts);
} }
void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) { void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
...@@ -172,7 +173,7 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) { ...@@ -172,7 +173,7 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
contextNonbondedFractions[i] = 1/(double) numContexts; contextNonbondedFractions[i] = 1/(double) numContexts;
CHECK_RESULT(cuEventCreate(&event, 0), "Error creating event"); CHECK_RESULT(cuEventCreate(&event, 0), "Error creating event");
CHECK_RESULT(cuStreamCreate(&peerCopyStream, CU_STREAM_NON_BLOCKING), "Error creating stream"); CHECK_RESULT(cuStreamCreate(&peerCopyStream, CU_STREAM_NON_BLOCKING), "Error creating stream");
CHECK_RESULT(cuMemHostAlloc((void**) &tileCounts, numContexts*sizeof(int), 0), "Error creating tile count buffer"); CHECK_RESULT(cuMemHostAlloc((void**) &interactionCounts, numContexts*sizeof(int2), 0), "Error creating interaction counts buffer");
} }
void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) { void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
...@@ -202,7 +203,7 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex ...@@ -202,7 +203,7 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex
data.contextEnergy[i] = 0.0; data.contextEnergy[i] = 0.0;
CudaContext& cu = *data.contexts[i]; CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread(); CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, event, tileCounts[i])); thread.addTask(new BeginComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, pinnedPositionBuffer, event, interactionCounts[i]));
} }
} }
...@@ -210,7 +211,7 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con ...@@ -210,7 +211,7 @@ double CudaParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& con
for (int i = 0; i < (int) data.contexts.size(); i++) { for (int i = 0; i < (int) data.contexts.size(); i++) {
CudaContext& cu = *data.contexts[i]; CudaContext& cu = *data.contexts[i];
CudaContext::WorkThread& thread = cu.getWorkThread(); CudaContext::WorkThread& thread = cu.getWorkThread();
thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, *contextForces, valid, tileCounts[i])); thread.addTask(new FinishComputationTask(context, cu, getKernel(i), includeForce, includeEnergy, groups, data.contextEnergy[i], completionTimes[i], pinnedForceBuffer, *contextForces, valid, interactionCounts[i]));
} }
data.syncContexts(); data.syncContexts();
double energy = 0.0; double energy = 0.0;
......
...@@ -71,9 +71,60 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co ...@@ -71,9 +71,60 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co
if (rebuild) { if (rebuild) {
rebuildNeighborList[0] = 1; rebuildNeighborList[0] = 1;
interactionCount[0] = 0; interactionCount[0] = 0;
interactionCount[1] = 0;
} }
} }
__device__ int saveSinglePairs(int x, int* atoms, int* flags, int length, unsigned int maxSinglePairs, unsigned int* singlePairCount, int2* singlePairs, int* sumBuffer, volatile int& pairStartIndex) {
// Record interactions that should be computed as single pairs rather than in blocks.
const int indexInWarp = threadIdx.x%32;
int sum = 0;
for (int i = indexInWarp; i < length; i += 32) {
int count = __popc(flags[i]);
sum += (count <= MAX_BITS_FOR_PAIRS ? count : 0);
}
sumBuffer[indexInWarp] = sum;
for (int step = 1; step < 32; step *= 2) {
int add = (indexInWarp >= step ? sumBuffer[indexInWarp-step] : 0);
sumBuffer[indexInWarp] += add;
}
int pairsToStore = sumBuffer[31];
if (indexInWarp == 0)
pairStartIndex = atomicAdd(singlePairCount, pairsToStore);
int pairIndex = pairStartIndex + (indexInWarp > 0 ? sumBuffer[indexInWarp-1] : 0);
for (int i = indexInWarp; i < length; i += 32) {
int count = __popc(flags[i]);
if (count <= MAX_BITS_FOR_PAIRS && pairIndex+count < maxSinglePairs) {
int f = flags[i];
while (f != 0) {
singlePairs[pairIndex] = make_int2(atoms[i], x*TILE_SIZE+__ffs(f)-1);
f &= f-1;
pairIndex++;
}
}
}
// Compact the remaining interactions.
const int warpMask = (1<<indexInWarp)-1;
int numCompacted = 0;
for (int start = 0; start < length; start += 32) {
int i = start+indexInWarp;
int atom = atoms[i];
int flag = flags[i];
bool include = (i < length && __popc(flags[i]) > MAX_BITS_FOR_PAIRS);
int includeFlags = __ballot(include);
if (include) {
int index = numCompacted+__popc(includeFlags&warpMask);
atoms[index] = atom;
flags[index] = flag;
}
numCompacted += __popc(includeFlags);
}
return numCompacted;
}
/** /**
* Compare the bounding boxes for each pair of atom blocks (comprised of 32 atoms each), forming a tile. If the two * Compare the bounding boxes for each pair of atom blocks (comprised of 32 atoms each), forming a tile. If the two
* atom blocks are sufficiently far apart, mark them as non-interacting. There are two stages in the algorithm. * atom blocks are sufficiently far apart, mark them as non-interacting. There are two stages in the algorithm.
...@@ -124,8 +175,9 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co ...@@ -124,8 +175,9 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co
* *
*/ */
extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
unsigned int* __restrict__ interactionCount, int* __restrict__ interactingTiles, unsigned int* __restrict__ interactingAtoms, const real4* __restrict__ posq, unsigned int* __restrict__ interactionCount, int* __restrict__ interactingTiles, unsigned int* __restrict__ interactingAtoms,
unsigned int maxTiles, unsigned int startBlockIndex, unsigned int numBlocks, real2* __restrict__ sortedBlocks, const real4* __restrict__ sortedBlockCenter, int2* __restrict__ singlePairs, const real4* __restrict__ posq, unsigned int maxTiles, unsigned int maxSinglePairs,
unsigned int startBlockIndex, unsigned int numBlocks, real2* __restrict__ sortedBlocks, const real4* __restrict__ sortedBlockCenter,
const real4* __restrict__ sortedBlockBoundingBox, const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices, const real4* __restrict__ sortedBlockBoundingBox, const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices,
real4* __restrict__ oldPositions, const int* __restrict__ rebuildNeighborList) { real4* __restrict__ oldPositions, const int* __restrict__ rebuildNeighborList) {
...@@ -138,12 +190,17 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea ...@@ -138,12 +190,17 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
const int warpIndex = (blockIdx.x*blockDim.x+threadIdx.x)/32; const int warpIndex = (blockIdx.x*blockDim.x+threadIdx.x)/32;
const int warpMask = (1<<indexInWarp)-1; const int warpMask = (1<<indexInWarp)-1;
__shared__ int workgroupBuffer[BUFFER_SIZE*(GROUP_SIZE/32)]; __shared__ int workgroupBuffer[BUFFER_SIZE*(GROUP_SIZE/32)];
__shared__ int workgroupFlagsBuffer[BUFFER_SIZE*(GROUP_SIZE/32)];
__shared__ int warpExclusions[MAX_EXCLUSIONS*(GROUP_SIZE/32)]; __shared__ int warpExclusions[MAX_EXCLUSIONS*(GROUP_SIZE/32)];
__shared__ real3 posBuffer[GROUP_SIZE]; __shared__ real3 posBuffer[GROUP_SIZE];
__shared__ volatile int workgroupTileIndex[GROUP_SIZE/32]; __shared__ volatile int workgroupTileIndex[GROUP_SIZE/32];
__shared__ int sumBuffer[GROUP_SIZE];
__shared__ int worksgroupPairStartIndex[GROUP_SIZE/32];
int* buffer = workgroupBuffer+BUFFER_SIZE*(warpStart/32); int* buffer = workgroupBuffer+BUFFER_SIZE*(warpStart/32);
int* flagsBuffer = workgroupFlagsBuffer+BUFFER_SIZE*(warpStart/32);
int* exclusionsForX = warpExclusions+MAX_EXCLUSIONS*(warpStart/32); int* exclusionsForX = warpExclusions+MAX_EXCLUSIONS*(warpStart/32);
volatile int& tileStartIndex = workgroupTileIndex[warpStart/32]; volatile int& tileStartIndex = workgroupTileIndex[warpStart/32];
volatile int& pairStartIndex = worksgroupPairStartIndex[warpStart/32];
// Loop over blocks. // Loop over blocks.
...@@ -227,7 +284,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea ...@@ -227,7 +284,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
APPLY_PERIODIC_TO_DELTA(atomDelta) APPLY_PERIODIC_TO_DELTA(atomDelta)
#endif #endif
int atomFlags = ballot(atomDelta.x*atomDelta.x+atomDelta.y*atomDelta.y+atomDelta.z*atomDelta.z < (PADDED_CUTOFF+blockCenterY.w)*(PADDED_CUTOFF+blockCenterY.w)); int atomFlags = ballot(atomDelta.x*atomDelta.x+atomDelta.y*atomDelta.y+atomDelta.z*atomDelta.z < (PADDED_CUTOFF+blockCenterY.w)*(PADDED_CUTOFF+blockCenterY.w));
bool interacts = false; int interacts = 0;
if (atom2 < NUM_ATOMS && atomFlags != 0) { if (atom2 < NUM_ATOMS && atomFlags != 0) {
int first = __ffs(atomFlags)-1; int first = __ffs(atomFlags)-1;
int last = 32-__clz(atomFlags); int last = 32-__clz(atomFlags);
...@@ -236,14 +293,14 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea ...@@ -236,14 +293,14 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
for (int j = first; j < last; j++) { for (int j = first; j < last; j++) {
real3 delta = pos2-posBuffer[warpStart+j]; real3 delta = pos2-posBuffer[warpStart+j];
APPLY_PERIODIC_TO_DELTA(delta) APPLY_PERIODIC_TO_DELTA(delta)
interacts |= (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED); interacts |= (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED ? 1<<j : 0);
} }
} }
else { else {
#endif #endif
for (int j = first; j < last; j++) { for (int j = first; j < last; j++) {
real3 delta = pos2-posBuffer[warpStart+j]; real3 delta = pos2-posBuffer[warpStart+j];
interacts |= (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED); interacts |= (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED ? 1<<j : 0);
} }
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
} }
...@@ -253,15 +310,22 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea ...@@ -253,15 +310,22 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
// Add any interacting atoms to the buffer. // Add any interacting atoms to the buffer.
int includeAtomFlags = __ballot(interacts); int includeAtomFlags = __ballot(interacts);
if (interacts) if (interacts) {
buffer[neighborsInBuffer+__popc(includeAtomFlags&warpMask)] = atom2; int index = neighborsInBuffer+__popc(includeAtomFlags&warpMask);
buffer[index] = atom2;
flagsBuffer[index] = interacts;
}
neighborsInBuffer += __popc(includeAtomFlags); neighborsInBuffer += __popc(includeAtomFlags);
if (neighborsInBuffer > BUFFER_SIZE-TILE_SIZE) { if (neighborsInBuffer > BUFFER_SIZE-TILE_SIZE) {
// Store the new tiles to memory. // Store the new tiles to memory.
#if MAX_BITS_FOR_PAIRS > 0
neighborsInBuffer = saveSinglePairs(x, buffer, flagsBuffer, neighborsInBuffer, maxSinglePairs, &interactionCount[1], singlePairs, sumBuffer+warpStart, pairStartIndex);
#endif
int tilesToStore = neighborsInBuffer/TILE_SIZE; int tilesToStore = neighborsInBuffer/TILE_SIZE;
if (tilesToStore > 0) {
if (indexInWarp == 0) if (indexInWarp == 0)
tileStartIndex = atomicAdd(interactionCount, tilesToStore); tileStartIndex = atomicAdd(&interactionCount[0], tilesToStore);
int newTileStartIndex = tileStartIndex; int newTileStartIndex = tileStartIndex;
if (newTileStartIndex+tilesToStore <= maxTiles) { if (newTileStartIndex+tilesToStore <= maxTiles) {
if (indexInWarp < tilesToStore) if (indexInWarp < tilesToStore)
...@@ -274,13 +338,18 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea ...@@ -274,13 +338,18 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
} }
} }
} }
}
// If we have a partially filled buffer, store it to memory. // If we have a partially filled buffer, store it to memory.
#if MAX_BITS_FOR_PAIRS > 0
if (neighborsInBuffer > 32)
neighborsInBuffer = saveSinglePairs(x, buffer, flagsBuffer, neighborsInBuffer, maxSinglePairs, &interactionCount[1], singlePairs, sumBuffer+warpStart, pairStartIndex);
#endif
if (neighborsInBuffer > 0) { if (neighborsInBuffer > 0) {
int tilesToStore = (neighborsInBuffer+TILE_SIZE-1)/TILE_SIZE; int tilesToStore = (neighborsInBuffer+TILE_SIZE-1)/TILE_SIZE;
if (indexInWarp == 0) if (indexInWarp == 0)
tileStartIndex = atomicAdd(interactionCount, tilesToStore); tileStartIndex = atomicAdd(&interactionCount[0], tilesToStore);
int newTileStartIndex = tileStartIndex; int newTileStartIndex = tileStartIndex;
if (newTileStartIndex+tilesToStore <= maxTiles) { if (newTileStartIndex+tilesToStore <= maxTiles) {
if (indexInWarp < tilesToStore) if (indexInWarp < tilesToStore)
......
...@@ -103,9 +103,10 @@ extern "C" __global__ void computeNonbonded( ...@@ -103,9 +103,10 @@ extern "C" __global__ void computeNonbonded(
unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions, unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount,real4 periodicBoxSize, real4 invPeriodicBoxSize, , const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms, unsigned int maxSinglePairs,
const int2* __restrict__ singlePairs
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
...@@ -278,10 +279,10 @@ extern "C" __global__ void computeNonbonded( ...@@ -278,10 +279,10 @@ extern "C" __global__ void computeNonbonded(
localData[tbx+tj].fz += dEdR2.z; localData[tbx+tj].fz += dEdR2.z;
#endif #endif
#endif // end USE_SYMMETRIC #endif // end USE_SYMMETRIC
#endif
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA SHUFFLE_WARP_DATA
#endif #endif
#endif
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
excl >>= 1; excl >>= 1;
#endif #endif
...@@ -484,9 +485,9 @@ extern "C" __global__ void computeNonbonded( ...@@ -484,9 +485,9 @@ extern "C" __global__ void computeNonbonded(
localData[tbx+tj].fz += dEdR2.z; localData[tbx+tj].fz += dEdR2.z;
#endif #endif
#endif // end USE_SYMMETRIC #endif // end USE_SYMMETRIC
#endif
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA SHUFFLE_WARP_DATA
#endif
#endif #endif
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
} }
...@@ -555,9 +556,9 @@ extern "C" __global__ void computeNonbonded( ...@@ -555,9 +556,9 @@ extern "C" __global__ void computeNonbonded(
localData[tbx+tj].fz += dEdR2.z; localData[tbx+tj].fz += dEdR2.z;
#endif #endif
#endif // end USE_SYMMETRIC #endif // end USE_SYMMETRIC
#endif
#ifdef ENABLE_SHUFFLE #ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA SHUFFLE_WARP_DATA
#endif
#endif #endif
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
} }
...@@ -588,6 +589,59 @@ extern "C" __global__ void computeNonbonded( ...@@ -588,6 +589,59 @@ extern "C" __global__ void computeNonbonded(
} }
pos++; pos++;
} }
// Third loop: single pairs that aren't part of a tile.
#if USE_CUTOFF
const unsigned int numPairs = interactionCount[1];
if (numPairs > maxSinglePairs)
return; // There wasn't enough memory for the neighbor list.
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numPairs; i += blockDim.x*gridDim.x) {
int2 pair = singlePairs[i];
int atom1 = pair.x;
int atom2 = pair.y;
real4 posq1 = posq[atom1];
real4 posq2 = posq[atom2];
LOAD_ATOM1_PARAMETERS
int j = atom2;
atom2 = threadIdx.x;
DECLARE_LOCAL_PARAMETERS
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_ATOM2_PARAMETERS
atom2 = pair.y;
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2);
real r = r2*invR;
#ifdef USE_SYMMETRIC
real dEdR = 0.0f;
#else
real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0);
#endif
bool hasExclusions = false;
bool isExcluded = false;
real tempEnergy = 0.0f;
const real interactionScale = 1.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
real3 dEdR1 = delta*dEdR;
real3 dEdR2 = -dEdR1;
#endif
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (-dEdR1.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (-dEdR1.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (-dEdR1.z*0x100000000)));
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (-dEdR2.x*0x100000000)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (-dEdR2.y*0x100000000)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (-dEdR2.z*0x100000000)));
#endif
}
#endif
#ifdef INCLUDE_ENERGY #ifdef INCLUDE_ENERGY
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
#endif #endif
......
...@@ -266,11 +266,11 @@ void ReferenceVariableStochasticDynamics::update(const OpenMM::System& system, v ...@@ -266,11 +266,11 @@ void ReferenceVariableStochasticDynamics::update(const OpenMM::System& system, v
// copy xPrime -> atomCoordinates // copy xPrime -> atomCoordinates
RealOpenMM invStepSize = 1.0/getDeltaT();
for (int ii = 0; ii < numberOfAtoms; ii++) { for (int ii = 0; ii < numberOfAtoms; ii++) {
if (masses[ii] != 0.0) { if (masses[ii] != 0.0) {
atomCoordinates[ii][0] = xPrime[ii][0]; velocities[ii] = (xPrime[ii]-atomCoordinates[ii])*invStepSize;
atomCoordinates[ii][1] = xPrime[ii][1]; atomCoordinates[ii] = xPrime[ii];
atomCoordinates[ii][2] = xPrime[ii][2];
} }
} }
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
int getForces, int getForces,
int getEnergy, int getEnergy,
int getParameters, int getParameters,
int getParameterDerivatives,
int enforcePeriodic, int enforcePeriodic,
bitmask32t groups) { bitmask32t groups) {
State state; State state;
...@@ -38,6 +39,7 @@ ...@@ -38,6 +39,7 @@
if (getForces) types |= State::Forces; if (getForces) types |= State::Forces;
if (getEnergy) types |= State::Energy; if (getEnergy) types |= State::Energy;
if (getParameters) types |= State::Parameters; if (getParameters) types |= State::Parameters;
if (getParameterDerivatives) types |= State::ParameterDerivatives;
try { try {
state = self->getState(types, enforcePeriodic, groups); state = self->getState(types, enforcePeriodic, groups);
} }
...@@ -53,7 +55,7 @@ ...@@ -53,7 +55,7 @@
%pythoncode %{ %pythoncode %{
def getState(self, getPositions=False, getVelocities=False, def getState(self, getPositions=False, getVelocities=False,
getForces=False, getEnergy=False, getParameters=False, getForces=False, getEnergy=False, getParameters=False,
enforcePeriodicBox=False, groups=-1): getParameterDerivatives=False, enforcePeriodicBox=False, groups=-1):
"""Get a State object recording the current state information stored in this context. """Get a State object recording the current state information stored in this context.
Parameters Parameters
...@@ -66,8 +68,10 @@ ...@@ -66,8 +68,10 @@
whether to store the forces acting on particles in the State whether to store the forces acting on particles in the State
getEnergy : bool=False getEnergy : bool=False
whether to store potential and kinetic energy in the State whether to store potential and kinetic energy in the State
getParameter : bool=False getParameters : bool=False
whether to store context parameters in the State whether to store context parameters in the State
getParameterDerivatives : bool=False
whether to store parameter derivatives in the State
enforcePeriodicBox : bool=False enforcePeriodicBox : bool=False
if false, the position of each particle will be whatever position if false, the position of each particle will be whatever position
is stored in the Context, regardless of periodic boundary conditions. is stored in the Context, regardless of periodic boundary conditions.
...@@ -79,9 +83,9 @@ ...@@ -79,9 +83,9 @@
can also be passed as an unsigned integer interpreted as a bitmask, can also be passed as an unsigned integer interpreted as a bitmask,
in which case group i will be included if (groups&(1<<i)) != 0. in which case group i will be included if (groups&(1<<i)) != 0.
""" """
getP, getV, getF, getE, getPa, enforcePeriodic = map(bool, getP, getV, getF, getE, getPa, getPd, enforcePeriodic = map(bool,
(getPositions, getVelocities, getForces, getEnergy, getParameters, (getPositions, getVelocities, getForces, getEnergy, getParameters,
enforcePeriodicBox)) getParameterDerivatives, enforcePeriodicBox))
try: try:
# is the input integer-like? # is the input integer-like?
...@@ -95,8 +99,8 @@ ...@@ -95,8 +99,8 @@
raise TypeError('%s is neither an int nor set' % groups) raise TypeError('%s is neither an int nor set' % groups)
(simTime, periodicBoxVectorsList, energy, coordList, velList, (simTime, periodicBoxVectorsList, energy, coordList, velList,
forceList, paramMap) = \ forceList, paramMap, paramDerivMap) = \
self._getStateAsLists(getP, getV, getF, getE, getPa, enforcePeriodic, groups_mask) self._getStateAsLists(getP, getV, getF, getE, getPa, getPd, enforcePeriodic, groups_mask)
state = State(simTime=simTime, state = State(simTime=simTime,
energy=energy, energy=energy,
...@@ -104,7 +108,8 @@ ...@@ -104,7 +108,8 @@
velList=velList, velList=velList,
forceList=forceList, forceList=forceList,
periodicBoxVectorsList=periodicBoxVectorsList, periodicBoxVectorsList=periodicBoxVectorsList,
paramMap=paramMap) paramMap=paramMap,
paramDerivMap=paramDerivMap)
return state return state
def setState(self, state): def setState(self, state):
...@@ -176,6 +181,7 @@ Parameters: ...@@ -176,6 +181,7 @@ Parameters:
int getForces, int getForces,
int getEnergy, int getEnergy,
int getParameters, int getParameters,
int getParameterDerivatives,
int enforcePeriodic, int enforcePeriodic,
int groups) { int groups) {
State state; State state;
...@@ -186,6 +192,7 @@ Parameters: ...@@ -186,6 +192,7 @@ Parameters:
if (getForces) types |= State::Forces; if (getForces) types |= State::Forces;
if (getEnergy) types |= State::Energy; if (getEnergy) types |= State::Energy;
if (getParameters) types |= State::Parameters; if (getParameters) types |= State::Parameters;
if (getParameterDerivatives) types |= State::ParameterDerivatives;
try { try {
state = self->getState(copy, types, enforcePeriodic, groups); state = self->getState(copy, types, enforcePeriodic, groups);
} }
...@@ -206,6 +213,7 @@ Parameters: ...@@ -206,6 +213,7 @@ Parameters:
getForces=False, getForces=False,
getEnergy=False, getEnergy=False,
getParameters=False, getParameters=False,
getParameterDerivatives=False,
enforcePeriodicBox=False, enforcePeriodicBox=False,
groups=-1): groups=-1):
"""Get a State object recording the current state information about one copy of the system. """Get a State object recording the current state information about one copy of the system.
...@@ -222,8 +230,10 @@ Parameters: ...@@ -222,8 +230,10 @@ Parameters:
whether to store the forces acting on particles in the State whether to store the forces acting on particles in the State
getEnergy : bool=False getEnergy : bool=False
whether to store potential and kinetic energy in the State whether to store potential and kinetic energy in the State
getParameter : bool=False getParameters : bool=False
whether to store context parameters in the State whether to store context parameters in the State
getParameterDerivatives : bool=False
whether to store parameter derivatives in the State
enforcePeriodicBox : bool=False enforcePeriodicBox : bool=False
if false, the position of each particle will be whatever position if false, the position of each particle will be whatever position
is stored in the Context, regardless of periodic boundary conditions. is stored in the Context, regardless of periodic boundary conditions.
...@@ -235,9 +245,9 @@ Parameters: ...@@ -235,9 +245,9 @@ Parameters:
can also be passed as an unsigned integer interpreted as a bitmask, can also be passed as an unsigned integer interpreted as a bitmask,
in which case group i will be included if (groups&(1<<i)) != 0. in which case group i will be included if (groups&(1<<i)) != 0.
""" """
getP, getV, getF, getE, getPa, enforcePeriodic = map(bool, getP, getV, getF, getE, getPa, getPd, enforcePeriodic = map(bool,
(getPositions, getVelocities, getForces, getEnergy, getParameters, (getPositions, getVelocities, getForces, getEnergy, getParameters,
enforcePeriodicBox)) getParameterDerivatives, enforcePeriodicBox))
try: try:
# is the input integer-like? # is the input integer-like?
...@@ -250,8 +260,8 @@ Parameters: ...@@ -250,8 +260,8 @@ Parameters:
raise TypeError('%s is neither an int nor set' % groups) raise TypeError('%s is neither an int nor set' % groups)
(simTime, periodicBoxVectorsList, energy, coordList, velList, (simTime, periodicBoxVectorsList, energy, coordList, velList,
forceList, paramMap) = \ forceList, paramMap, paramDerivMap) = \
self._getStateAsLists(copy, getP, getV, getF, getE, getPa, enforcePeriodic, groups_mask) self._getStateAsLists(getP, getV, getF, getE, getPa, getPd, enforcePeriodic, groups_mask)
state = State(simTime=simTime, state = State(simTime=simTime,
energy=energy, energy=energy,
...@@ -259,7 +269,8 @@ Parameters: ...@@ -259,7 +269,8 @@ Parameters:
velList=velList, velList=velList,
forceList=forceList, forceList=forceList,
periodicBoxVectorsList=periodicBoxVectorsList, periodicBoxVectorsList=periodicBoxVectorsList,
paramMap=paramMap) paramMap=paramMap,
paramDerivMap=paramDerivMap)
return state return state
%} %}
} }
...@@ -361,8 +372,9 @@ Parameters: ...@@ -361,8 +372,9 @@ Parameters:
double time, double time,
const std::vector<Vec3>& boxVectors, const std::vector<Vec3>& boxVectors,
const std::map<string, double>& params, const std::map<string, double>& params,
const std::map<string, double>& paramDerivs,
int types) { int types) {
OpenMM::State myState = _convertListsToState(pos,vel,forces,kineticEnergy,potentialEnergy,time,boxVectors,params,types); OpenMM::State myState = _convertListsToState(pos,vel,forces,kineticEnergy,potentialEnergy,time,boxVectors,params,paramDerivs,types);
std::stringstream buffer; std::stringstream buffer;
OpenMM::XmlSerializer::serialize<OpenMM::State>(&myState, "State", buffer); OpenMM::XmlSerializer::serialize<OpenMM::State>(&myState, "State", buffer);
return buffer.str(); return buffer.str();
...@@ -386,6 +398,7 @@ Parameters: ...@@ -386,6 +398,7 @@ Parameters:
kineticEnergy = 0.0 kineticEnergy = 0.0
potentialEnergy = 0.0 potentialEnergy = 0.0
params = {} params = {}
paramDerivs = {}
types = 0 types = 0
try: try:
positions = pythonState.getPositions().value_in_unit(unit.nanometers) positions = pythonState.getPositions().value_in_unit(unit.nanometers)
...@@ -413,16 +426,21 @@ Parameters: ...@@ -413,16 +426,21 @@ Parameters:
types |= 16 types |= 16
except: except:
pass pass
try:
params = pythonState.getEnergyParameterDerivatives()
types |= 32
except:
pass
time = pythonState.getTime().value_in_unit(unit.picoseconds) time = pythonState.getTime().value_in_unit(unit.picoseconds)
boxVectors = pythonState.getPeriodicBoxVectors().value_in_unit(unit.nanometers) boxVectors = pythonState.getPeriodicBoxVectors().value_in_unit(unit.nanometers)
string = XmlSerializer._serializeStateAsLists(positions, velocities, forces, kineticEnergy, potentialEnergy, time, boxVectors, params, types) string = XmlSerializer._serializeStateAsLists(positions, velocities, forces, kineticEnergy, potentialEnergy, time, boxVectors, params, paramDerivs, types)
return string return string
@staticmethod @staticmethod
def _deserializeState(pythonString): def _deserializeState(pythonString):
(simTime, periodicBoxVectorsList, energy, coordList, velList, (simTime, periodicBoxVectorsList, energy, coordList, velList,
forceList, paramMap) = XmlSerializer._deserializeStringIntoLists(pythonString) forceList, paramMap, paramDerivMap) = XmlSerializer._deserializeStringIntoLists(pythonString)
state = State(simTime=simTime, state = State(simTime=simTime,
energy=energy, energy=energy,
...@@ -430,7 +448,8 @@ Parameters: ...@@ -430,7 +448,8 @@ Parameters:
velList=velList, velList=velList,
forceList=forceList, forceList=forceList,
periodicBoxVectorsList=periodicBoxVectorsList, periodicBoxVectorsList=periodicBoxVectorsList,
paramMap=paramMap) paramMap=paramMap,
paramDerivMap=paramDerivMap)
return state return state
@staticmethod @staticmethod
......
...@@ -28,6 +28,7 @@ State _convertListsToState( const std::vector<Vec3> &pos, ...@@ -28,6 +28,7 @@ State _convertListsToState( const std::vector<Vec3> &pos,
double time, double time,
const std::vector<Vec3> &boxVectors, const std::vector<Vec3> &boxVectors,
const std::map<std::string, double> &params, const std::map<std::string, double> &params,
const std::map<std::string, double> &paramDerivs,
int types ) { int types ) {
State::StateBuilder sb(time); State::StateBuilder sb(time);
if(types & State::Positions) if(types & State::Positions)
...@@ -40,6 +41,8 @@ State _convertListsToState( const std::vector<Vec3> &pos, ...@@ -40,6 +41,8 @@ State _convertListsToState( const std::vector<Vec3> &pos,
sb.setEnergy(kineticEnergy, potentialEnergy); sb.setEnergy(kineticEnergy, potentialEnergy);
if(types & State::Parameters) if(types & State::Parameters)
sb.setParameters(params); sb.setParameters(params);
if(types & State::ParameterDerivatives)
sb.setEnergyParameterDerivatives(paramDerivs);
sb.setPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]); sb.setPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
return sb.getState(); return sb.getState();
} }
...@@ -53,6 +56,7 @@ PyObject *_convertStateToLists(const State& state) { ...@@ -53,6 +56,7 @@ PyObject *_convertStateToLists(const State& state) {
PyObject *pForces; PyObject *pForces;
PyObject *pyTuple; PyObject *pyTuple;
PyObject *pParameters; PyObject *pParameters;
PyObject *pParameterDerivs;
simTime=state.getTime(); simTime=state.getTime();
OpenMM::Vec3 myVecA; OpenMM::Vec3 myVecA;
...@@ -112,11 +116,21 @@ PyObject *_convertStateToLists(const State& state) { ...@@ -112,11 +116,21 @@ PyObject *_convertStateToLists(const State& state) {
pParameters = Py_None; pParameters = Py_None;
Py_INCREF(Py_None); Py_INCREF(Py_None);
} }
try {
pParameterDerivs = PyDict_New();
const std::map<std::string, double>& params = state.getEnergyParameterDerivatives();
for (std::map<std::string, double>::const_iterator iter = params.begin(); iter != params.end(); ++iter)
PyDict_SetItemString(pParameterDerivs, iter->first.c_str(), Py_BuildValue("d", iter->second));
}
catch (std::exception& ex) {
pParameterDerivs = Py_None;
Py_INCREF(Py_None);
}
pyTuple=Py_BuildValue("(d,N,N,N,N,N,N)", pyTuple=Py_BuildValue("(d,N,N,N,N,N,N,N)",
simTime, pPeriodicBoxVectorsList, pEnergy, simTime, pPeriodicBoxVectorsList, pEnergy,
pPositions, pVelocities, pPositions, pVelocities,
pForces, pParameters); pForces, pParameters, pParameterDerivs);
return pyTuple; return pyTuple;
} }
......
...@@ -63,7 +63,8 @@ class State(_object): ...@@ -63,7 +63,8 @@ class State(_object):
velList=None, velList=None,
forceList=None, forceList=None,
periodicBoxVectorsList=None, periodicBoxVectorsList=None,
paramMap=None): paramMap=None,
paramDerivMap=None):
self._simTime=simTime self._simTime=simTime
self._periodicBoxVectorsList=periodicBoxVectorsList self._periodicBoxVectorsList=periodicBoxVectorsList
self._periodicBoxVectorsListNumpy=None self._periodicBoxVectorsListNumpy=None
...@@ -80,6 +81,7 @@ class State(_object): ...@@ -80,6 +81,7 @@ class State(_object):
self._forceList=forceList self._forceList=forceList
self._forceListNumpy=None self._forceListNumpy=None
self._paramMap=paramMap self._paramMap=paramMap
self._paramDerivMap=paramDerivMap
def __getstate__(self): def __getstate__(self):
serializationString = XmlSerializer.serialize(self) serializationString = XmlSerializer.serialize(self)
...@@ -221,6 +223,18 @@ class State(_object): ...@@ -221,6 +223,18 @@ class State(_object):
raise TypeError('Parameters were not requested in getState() call, so are not available.') raise TypeError('Parameters were not requested in getState() call, so are not available.')
return self._paramMap return self._paramMap
def getEnergyParameterDerivatives(self):
"""Get a map containing derivatives of the potential energy with respect to context parameters.
In most cases derivatives are only calculated if the corresponding Force objects have been
specifically told to compute them. Otherwise, the values in the map will be zero. Likewise,
if multiple Forces depend on the same parameter but only some have been told to compute
derivatives with respect to it, the returned value will include only the contributions from
the Forces that were told to compute it."""
if self._paramDerivMap is None:
raise TypeError('Parameter derivatives were not requested in getState() call, so are not available.')
return self._paramDerivMap
%} %}
%pythonappend OpenMM::Context::Context %{ %pythonappend OpenMM::Context::Context %{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment