Commit 1ebe88ba authored by Robert McGibbon's avatar Robert McGibbon
Browse files

merge

parents 804cbb22 a37dbc96
cmake_modules
CMakeLists.txt
docs-source
examples
libraries
olla
openmmapi
platforms
plugins
serialization
tests
wrappers
openmm/cmake_modules
openmm/CMakeLists.txt
openmm/docs-source
install/docs
openmm/examples
openmm/libraries
openmm/olla
openmm/openmmapi
openmm/platforms
openmm/plugins
openmm/serialization
openmm/tests
openmm/wrappers
......@@ -24,8 +24,8 @@ CMAKE_FLAGS+=" -DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-7.0/bin/nvcc"
CMAKE_FLAGS+=" -DCUDA_SDK_ROOT_DIR=/usr/local/cuda-7.0/"
CMAKE_FLAGS+=" -DCUDA_TOOLKIT_INCLUDE=/usr/local/cuda-7.0/include"
CMAKE_FLAGS+=" -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-7.0/"
CMAKE_FLAGS+=" -DOPENCL_INCLUDE_DIR=/usr/local/cuda-7.0/include"
CMAKE_FLAGS+=" -DOPENCL_LIBRARY=/usr/local/cuda-7.0/lib64/libOpenCL.so"
CMAKE_FLAGS+=" -DOPENCL_INCLUDE_DIR=/opt/AMDAPPSDK-2.9-1/include/"
CMAKE_FLAGS+=" -DOPENCL_LIBRARY=/opt/AMDAPPSDK-2.9-1/lib/x86_64/libOpenCL.so"
# Set location for FFTW3
PREFIX="$WORKSPACE/miniconda"
......
......@@ -6,7 +6,7 @@
export WORKSPACE=`pwd`
# Install miniconda
export VERSION="3.7.0"
export VERSION="Latest"
export PLATFORM="Linux"
export ARCH="x86_64"
export MINICONDA="Miniconda-$VERSION-$PLATFORM-$ARCH.sh"
......
......@@ -6,7 +6,7 @@
export WORKSPACE=`pwd`
# Install miniconda
export VERSION="3.7.0"
export VERSION="Latest"
export PLATFORM="MacOSX"
export ARCH="x86_64"
export MINICONDA="Miniconda-$VERSION-$PLATFORM-$ARCH.sh"
......
#!/bin/bash
# Build script for Linux distribution, for use in automated packaging.
# Note that this must be run from outside the checked-out openmm/ directory.
# Set relative workspace path.
export WORKSPACE=`pwd`
# Add conda binaries to path.
PATH=$WORKSPACE/miniconda/bin:$PATH
INSTALL=`pwd`/install
if [ -e $INSTALL ]; then
rm -rf $INSTALL
fi
CMAKE_FLAGS="-DCMAKE_INSTALL_PREFIX=$INSTALL"
# setting the rpath so that libOpenMMPME.so finds the right libfftw3
#CMAKE_FLAGS+=" -DCMAKE_INSTALL_RPATH=.."
CMAKE_FLAGS+=" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++"
CMAKE_FLAGS+=" -DOPENMM_BUILD_AMOEBA_CUDA_LIB=OFF"
CMAKE_FLAGS+=" -DOPENMM_BUILD_CPU_LIB=OFF"
CMAKE_FLAGS+=" -DOPENMM_BUILD_CUDA_COMPILER_PLUGIN=OFF"
CMAKE_FLAGS+=" -DOPENMM_BUILD_CUDA_LIB=OFF"
CMAKE_FLAGS+=" -DOPENMM_BUILD_DRUDE_CUDA_LIB=OFF"
CMAKE_FLAGS+=" -DOPENMM_BUILD_DRUDE_OPENCL_LIB=OFF"
CMAKE_FLAGS+=" -DOPENMM_BUILD_OPENCL_LIB=OFF"
CMAKE_FLAGS+=" -DOPENMM_BUILD_RPMD_CUDA_LIB=OFF"
CMAKE_FLAGS+=" -DOPENMM_BUILD_RPMD_OPENCL_LIB=OFF"
# Set location for FFTW3
#PREFIX="$WORKSPACE/miniconda"
#CMAKE_FLAGS+=" -DFFTW_INCLUDES=$PREFIX/include"
#CMAKE_FLAGS+=" -DFFTW_LIBRARY=$PREFIX/lib/libfftw3f.so"
#CMAKE_FLAGS+=" -DFFTW_THREADS_LIBRARY=$PREFIX/lib/libfftw3f_threads.so"
# Build in subdirectory.
if [ -e build ]; then
rm -rf build
fi
mkdir build
cd build
cmake ../openmm $CMAKE_FLAGS
make -j4 all DoxygenApiDocs sphinxpdf
# Install.
make install
#!/bin/bash
# Packaging script for Linux distribution, for use in automated packaging.
# Note that this must be run from outside the checked-out openmm/ directory.
# CONFIGURE HERE
export PACKAGE_DIR="packaging" # directory to stuff packaged source distribution
export VERSION=$(sed -nr "s/OPENMM_VERSION:STRING=(.*)/\1/p" build/CMakeCache.txt)
export PACKAGE_SUBDIR="OpenMM-${VERSION}-Source" # directory where distribution will be unpacked
export DISTRO_PREFIX="OpenMM-${VERSION}-Source" # prefix for source distribution (e.g. ${DISTRIBUTION_NAME}.zip)
# Perform all work in a work directory.
cd work
# Clean up.
rm -rf $PACKAGE_DIR
# Make a directory to contain packaged source distribution
mkdir $PACKAGE_DIR
mkdir $PACKAGE_DIR/$PACKAGE_SUBDIR
for filename in $( cat openmm/devtools/packaging/manifests/source/manifest.txt ); do
CMD="cp -r $filename $PACKAGE_DIR/$PACKAGE_SUBDIR"
echo $CMD
`$CMD`
done
# Add the install.sh script
#CMD="cp -r openmm/devtools/packaging/install.sh $PACKAGE_DIR/$PACKAGE_SUBDIR"
#echo $CMD
#`$CMD`
# Make Python source distribution.
echo "Building Python source distribution..."
pushd .
cd build
make PythonSdist
cd python/dist
tar zxf OpenMM-${VERSION}.tar.gz
mv OpenMM-${VERSION} python
popd
cp -r build/python/dist/python $PACKAGE_DIR/$PACKAGE_SUBDIR
# Create archives.
cd $PACKAGE_DIR
mkdir compressed
tar zcf compressed/${DISTRO_PREFIX}.tgz $PACKAGE_SUBDIR
zip -r compressed/${DISTRO_PREFIX}.zip $PACKAGE_SUBDIR
cd ..
#!/bin/tcsh
# Prepare for build by ensuring necessary prerequisites are locally installed.
# Set relative workspace path.
export WORKSPACE=`pwd`
# Install miniconda
export VERSION="Latest"
export PLATFORM="Linux"
export ARCH="x86_64"
export MINICONDA="Miniconda-$VERSION-$PLATFORM-$ARCH.sh"
if [ -f miniconda ];
then
echo "miniconda already exists"
else
echo "Downloading miniconda..."
rm -rf Miniconda-*
wget --quiet http://repo.continuum.io/miniconda/${MINICONDA}
bash ${MINICONDA} -b -p miniconda
PIP_ARGS="-U"
fi
# Add to path.
export PATH=$WORKSPACE/miniconda/bin:$PATH
# Ensure configuration is up to date.
conda config --add channels http://conda.binstar.org/omnia
conda install --yes --quiet swig fftw3f pip
pip install sphinxcontrib-bibtex
......@@ -92,6 +92,7 @@ EXCLUDE_SYMLINKS = NO
EXCLUDE_PATTERNS = */tests/* \
*/openmmapi/src/* \
*/.svn/* \
*/internal/* \
*/olla/include/openmm/kernels.h \
*/DrudeKernels.h \
*/RpmdKernels.h \
......
......@@ -20,6 +20,23 @@ ENDIF(BUILD_TESTING)
# this CMakeLists file rather than letting CMake visit them as SUBDIRS.
SET(OPENMM_SOURCE_SUBDIRS .)
IF(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_COMPILER_IS_CLANGXX 1)
ENDIF(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
IF(CMAKE_COMPILER_IS_CLANGXX)
EXECUTE_PROCESS( COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE __clang_full_version_string )
string (REGEX REPLACE ".*clang version ([0-9]+\\.[0-9]+).*" "\\1" CLANG_VERSION_STRING ${__clang_full_version_string})
unset(__clang_full_version_string)
ENDIF(CMAKE_COMPILER_IS_CLANGXX)
IF(CMAKE_COMPILER_IS_CLANGXX AND CLANG_VERSION_STRING STREQUAL "3.6")
message(FATAL_ERROR "The OpenMM CPU platform cannot be built with your current compiler, clang-3.6, due to bugs \
in the compiler's AVX support. Either downgrade to clang-3.5, upgrade to clang-3.7 or later, switch to an \
alternative compiler such as GCC, or turn off building of the CPU platform by unsetting the CMake variable \
OPENMM_BUILD_CPU_LIB.")
ENDIF(CMAKE_COMPILER_IS_CLANGXX AND CLANG_VERSION_STRING STREQUAL "3.6")
# Collect up information about the version of the OpenMM library we're building
# and make it available to the code so it can be built into the binaries.
......
......@@ -7,7 +7,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2014 Stanford University and the Authors. *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -900,6 +900,62 @@ void testInteractionGroupLongRangeCorrection() {
ASSERT_EQUAL_TOL(expected, energy2-energy1, 1e-4);
}
void testMultipleCutoffs() {
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
// Add multiple nonbonded forces that have different cutoffs.
CustomNonbondedForce* nonbonded1 = new CustomNonbondedForce("2*r");
nonbonded1->addParticle(vector<double>());
nonbonded1->addParticle(vector<double>());
nonbonded1->setNonbondedMethod(CustomNonbondedForce::CutoffNonPeriodic);
nonbonded1->setCutoffDistance(2.5);
system.addForce(nonbonded1);
CustomNonbondedForce* nonbonded2 = new CustomNonbondedForce("3*r");
nonbonded2->addParticle(vector<double>());
nonbonded2->addParticle(vector<double>());
nonbonded2->setNonbondedMethod(CustomNonbondedForce::CutoffNonPeriodic);
nonbonded2->setCutoffDistance(2.9);
nonbonded2->setForceGroup(1);
system.addForce(nonbonded2);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(0, 0, 0);
for (double r = 2.4; r < 3.2; r += 0.2) {
positions[1][1] = r;
context.setPositions(positions);
double e1 = (r < 2.5 ? 2.0*r : 0.0);
double e2 = (r < 2.9 ? 3.0*r : 0.0);
double f1 = (r < 2.5 ? 2.0 : 0.0);
double f2 = (r < 2.9 ? 3.0 : 0.0);
// Check the first force.
State state = context.getState(State::Forces | State::Energy, false, 1);
ASSERT_EQUAL_VEC(Vec3(0, f1, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f1, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e1, state.getPotentialEnergy(), TOL);
// Check the second force.
state = context.getState(State::Forces | State::Energy, false, 2);
ASSERT_EQUAL_VEC(Vec3(0, f2, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f2, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e2, state.getPotentialEnergy(), TOL);
// Check the sum of both forces.
state = context.getState(State::Forces | State::Energy);
ASSERT_EQUAL_VEC(Vec3(0, f1+f2, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f1-f2, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e1+e2, state.getPotentialEnergy(), TOL);
}
}
int main() {
try {
if (!CpuPlatform::isProcessorSupported()) {
......@@ -924,6 +980,7 @@ int main() {
testInteractionGroups();
testLargeInteractionGroup();
testInteractionGroupLongRangeCorrection();
testMultipleCutoffs();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -752,7 +752,7 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private:
double prefactor, surfaceAreaFactor;
double prefactor, surfaceAreaFactor, cutoff;
bool hasCreatedKernels;
int maxTiles;
CudaContext& cu;
......@@ -802,6 +802,7 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private:
double cutoff;
bool hasInitializedKernels, needParameterGradient;
int maxTiles, numComputedValues;
CudaContext& cu;
......
......@@ -129,31 +129,17 @@ public:
return forceThreadBlockSize;
}
/**
* Get the cutoff distance.
* Get the maximum cutoff distance used by any force group.
*/
double getCutoffDistance() {
return cutoff;
}
/**
* Get whether any interactions have been added.
*/
bool getHasInteractions() {
return cutoff != -1.0;
}
/**
* Get the force group in which nonbonded interactions should be computed.
*/
int getForceGroup() {
return nonbondedForceGroup;
}
double getMaxCutoffDistance();
/**
* Prepare to compute interactions. This updates the neighbor list.
*/
void prepareInteractions();
void prepareInteractions(int forceGroups);
/**
* Compute the nonbonded interactions.
*/
void computeInteractions();
void computeInteractions(int forceGroups);
/**
* Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
*/
......@@ -246,16 +232,20 @@ public:
* @param arguments arrays (other than per-atom parameters) that should be passed as arguments to the kernel
* @param useExclusions specifies whether exclusions are applied to this interaction
* @param isSymmetric specifies whether the interaction is symmetric
* @param groups the set of force groups this kernel is for
*/
CUfunction createInteractionKernel(const std::string& source, std::vector<ParameterInfo>& params, std::vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric);
CUfunction createInteractionKernel(const std::string& source, std::vector<ParameterInfo>& params, std::vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups);
/**
* Create the set of kernels that will be needed for a particular combination of force groups.
*
* @param groups the set of force groups
*/
void createKernelsForGroups(int groups);
private:
class KernelSet;
class BlockSortTrait;
CudaContext& context;
CUfunction forceKernel;
CUfunction findBlockBoundsKernel;
CUfunction sortBoxDataKernel;
CUfunction findInteractingBlocksKernel;
CUfunction findInteractionsWithinBlocksKernel;
std::map<int, KernelSet> groupKernels;
CudaArray* exclusionTiles;
CudaArray* exclusions;
CudaArray* exclusionIndices;
......@@ -275,11 +265,26 @@ private:
std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters;
std::vector<ParameterInfo> arguments;
std::string kernelSource;
std::map<std::string, std::string> kernelDefines;
double cutoff;
std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource;
double lastCutoff;
bool useCutoff, usePeriodic, anyExclusions, usePadding, forceRebuildNeighborList;
int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, numForceThreadBlocks, forceThreadBlockSize, nonbondedForceGroup, numAtoms;
int startTileIndex, numTiles, startBlockIndex, numBlocks, maxTiles, maxExclusions, numForceThreadBlocks, forceThreadBlockSize, numAtoms, groupFlags;
};
/**
* This class stores the kernels to execute for a set of force groups.
*/
class CudaNonbondedUtilities::KernelSet {
public:
bool hasForces;
double cutoffDistance;
CUfunction forceKernel;
CUfunction findBlockBoundsKernel;
CUfunction sortBoxDataKernel;
CUfunction findInteractingBlocksKernel;
CUfunction findInteractionsWithinBlocksKernel;
};
/**
......
......@@ -49,6 +49,7 @@
#include <fstream>
#include <iomanip>
#include <iostream>
#include <set>
#include <sstream>
#include <typeinfo>
#include <cudaProfiler.h>
......@@ -405,13 +406,32 @@ void CudaContext::setAsCurrent() {
}
string CudaContext::replaceStrings(const string& input, const std::map<std::string, std::string>& replacements) const {
static set<char> symbolChars;
if (symbolChars.size() == 0) {
symbolChars.insert('_');
for (char c = 'a'; c <= 'z'; c++)
symbolChars.insert(c);
for (char c = 'A'; c <= 'Z'; c++)
symbolChars.insert(c);
for (char c = '0'; c <= '9'; c++)
symbolChars.insert(c);
}
string result = input;
for (map<string, string>::const_iterator iter = replacements.begin(); iter != replacements.end(); iter++) {
int index = -1;
int index = 0;
int size = iter->first.size();
do {
index = result.find(iter->first);
if (index != result.npos)
result.replace(index, iter->first.size(), iter->second);
index = result.find(iter->first, index);
if (index != result.npos) {
if ((index == 0 || symbolChars.find(result[index-1]) == symbolChars.end()) && (index == result.size()-size || symbolChars.find(result[index+size]) == symbolChars.end())) {
// We have found a complete symbol, not part of a longer symbol.
result.replace(index, size, iter->second);
index += iter->second.size();
}
else
index++;
}
} while (index != result.npos);
}
return result;
......@@ -1216,7 +1236,7 @@ void CudaContext::reorderAtomsImpl() {
if (useHilbert)
binWidth = (Real) (max(max(maxx-minx, maxy-miny), maxz-minz)/255.0);
else
binWidth = (Real) (0.2*nonbonded->getCutoffDistance());
binWidth = (Real) (0.2*nonbonded->getMaxCutoffDistance());
Real invBinWidth = (Real) (1.0/binWidth);
int xbins = 1 + (int) ((maxx-minx)*invBinWidth);
int ybins = 1 + (int) ((maxy-miny)*invBinWidth);
......
......@@ -98,16 +98,13 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
for (vector<CudaContext::ForcePreComputation*>::iterator iter = cu.getPreComputations().begin(); iter != cu.getPreComputations().end(); ++iter)
(*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0);
cu.setComputeForceCount(cu.getComputeForceCount()+1);
if (includeNonbonded)
nb.prepareInteractions();
nb.prepareInteractions(groups);
}
double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cu.getBondedUtilities().computeInteractions(groups);
if ((groups&(1<<cu.getNonbondedUtilities().getForceGroup())) != 0)
cu.getNonbondedUtilities().computeInteractions();
cu.getNonbondedUtilities().computeInteractions(groups);
double sum = 0.0;
for (vector<CudaContext::ForcePostComputation*>::iterator iter = cu.getPostComputations().begin(); iter != cu.getPostComputations().end(); ++iter)
sum += (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
......@@ -2570,8 +2567,9 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
cutoff = force.getCutoffDistance();
string source = CudaKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, force.getCutoffDistance(), vector<vector<int> >(), source, force.getForceGroup());
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params->getDevicePointer()));;
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce->getDevicePointer()));;
cu.addForce(new CudaGBSAOBCForceInfo(force));
......@@ -2591,8 +2589,8 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
defines["USE_PERIODIC"] = "1";
if (cu.getComputeCapability() >= 3.0 && !cu.getUseDoublePrecision())
defines["ENABLE_SHUFFLE"] = "1";
defines["CUTOFF_SQUARED"] = cu.doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance());
defines["CUTOFF"] = cu.doubleToString(nb.getCutoffDistance());
defines["CUTOFF_SQUARED"] = cu.doubleToString(cutoff*cutoff);
defines["CUTOFF"] = cu.doubleToString(cutoff);
defines["PREFACTOR"] = cu.doubleToString(prefactor);
defines["SURFACE_AREA_FACTOR"] = cu.doubleToString(surfaceAreaFactor);
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
......@@ -2764,6 +2762,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
cu.setAsCurrent();
if (cu.getPlatformData().contexts.size() > 1)
throw OpenMMException("CustomGBForce does not support using multiple CUDA devices");
cutoff = force.getCutoffDistance();
bool useExclusionsForValue = false;
numComputedValues = force.getNumComputedValues();
vector<string> computedValueNames(force.getNumComputedValues());
......@@ -2956,7 +2955,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
pairValueDefines["NEED_PADDING"] = "1";
pairValueDefines["WARPS_PER_GROUP"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize()/CudaContext::TileSize);
pairValueDefines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize());
pairValueDefines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
pairValueDefines["CUTOFF_SQUARED"] = cu.doubleToString(cutoff*cutoff);
pairValueDefines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
pairValueDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
pairValueDefines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
......@@ -3123,7 +3122,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
pairEnergyDefines["NEED_PADDING"] = "1";
pairEnergyDefines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize());
pairEnergyDefines["WARPS_PER_GROUP"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize()/CudaContext::TileSize);
pairEnergyDefines["CUTOFF_SQUARED"] = cu.doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
pairEnergyDefines["CUTOFF_SQUARED"] = cu.doubleToString(cutoff*cutoff);
pairEnergyDefines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
pairEnergyDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
pairEnergyDefines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
......@@ -3367,7 +3366,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
globals->upload(globalParamValues);
arguments.push_back(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer()));
}
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup());
for (int i = 0; i < (int) parameters.size(); i++)
cu.getNonbondedUtilities().addParameter(parameters[i]);
for (int i = 0; i < (int) arguments.size(); i++)
......@@ -3393,7 +3392,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
int endExclusionIndex = (cu.getContextIndex()+1)*numExclusionTiles/numContexts;
pairValueDefines["FIRST_EXCLUSION_TILE"] = cu.intToString(startExclusionIndex);
pairValueDefines["LAST_EXCLUSION_TILE"] = cu.intToString(endExclusionIndex);
pairValueDefines["CUTOFF"] = cu.doubleToString(nb.getCutoffDistance());
pairValueDefines["CUTOFF"] = cu.doubleToString(cutoff);
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+pairValueSrc, pairValueDefines);
pairValueKernel = cu.getKernel(module, "computeN2Value");
pairValueSrc = "";
......@@ -3407,7 +3406,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
int endExclusionIndex = (cu.getContextIndex()+1)*numExclusionTiles/numContexts;
pairEnergyDefines["FIRST_EXCLUSION_TILE"] = cu.intToString(startExclusionIndex);
pairEnergyDefines["LAST_EXCLUSION_TILE"] = cu.intToString(endExclusionIndex);
pairEnergyDefines["CUTOFF"] = cu.doubleToString(nb.getCutoffDistance());
pairEnergyDefines["CUTOFF"] = cu.doubleToString(cutoff);
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+pairEnergySrc, pairEnergyDefines);
pairEnergyKernel = cu.getKernel(module, "computeN2Energy");
pairEnergySrc = "";
......
......@@ -62,10 +62,10 @@ private:
bool useDouble;
};
CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), cutoff(-1.0), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true),
CudaNonbondedUtilities::CudaNonbondedUtilities(CudaContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true),
exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL),
interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL),
oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), nonbondedForceGroup(0), forceRebuildNeighborList(true) {
oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0) {
// Decide how many thread blocks to use.
string errorMessage = "Error initializing nonbonded utilities";
......@@ -109,24 +109,28 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
}
void CudaNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) {
if (cutoff != -1.0) {
if (groupCutoff.size() > 0) {
if (usesCutoff != useCutoff)
throw OpenMMException("All Forces must agree on whether to use a cutoff");
if (usesPeriodic != usePeriodic)
throw OpenMMException("All Forces must agree on whether to use periodic boundary conditions");
if (cutoffDistance != cutoff)
throw OpenMMException("All Forces must use the same cutoff distance");
if (forceGroup != nonbondedForceGroup)
throw OpenMMException("All nonbonded forces must be in the same force group");
if (usesCutoff && groupCutoff.find(forceGroup) != groupCutoff.end() && groupCutoff[forceGroup] != cutoffDistance)
throw OpenMMException("All Forces in a single force group must use the same cutoff distance");
}
if (usesExclusions)
requestExclusions(exclusionList);
useCutoff = usesCutoff;
usePeriodic = usesPeriodic;
cutoff = cutoffDistance;
if (kernel.size() > 0)
kernelSource += kernel+"\n";
nonbondedForceGroup = forceGroup;
groupCutoff[forceGroup] = cutoffDistance;
groupFlags |= 1<<forceGroup;
if (kernel.size() > 0) {
if (groupKernelSource.find(forceGroup) == groupKernelSource.end())
groupKernelSource[forceGroup] = "";
map<string, string> replacements;
replacements["CUTOFF"] = "CUTOFF_"+context.intToString(forceGroup);
replacements["CUTOFF_SQUARED"] = "CUTOFF_"+context.intToString(forceGroup)+"_SQUARED";
groupKernelSource[forceGroup] += context.replaceStrings(kernel, replacements)+"\n";
}
}
void CudaNonbondedUtilities::addParameter(const ParameterInfo& parameter) {
......@@ -213,6 +217,9 @@ void CudaNonbondedUtilities::initialize(const System& system) {
exclusionIndicesVec.insert(exclusionIndicesVec.end(), exclusionBlocksForBlock[i].begin(), exclusionBlocksForBlock[i].end());
exclusionRowIndicesVec[i+1] = exclusionIndicesVec.size();
}
maxExclusions = 0;
for (int i = 0; i < (int) exclusionBlocksForBlock.size(); i++)
maxExclusions = (maxExclusions > exclusionBlocksForBlock[i].size() ? maxExclusions : exclusionBlocksForBlock[i].size());
exclusionIndices = CudaArray::create<unsigned int>(context, exclusionIndicesVec.size(), "exclusionIndices");
exclusionRowIndices = CudaArray::create<unsigned int>(context, exclusionRowIndicesVec.size(), "exclusionRowIndices");
exclusionIndices->upload(exclusionIndicesVec);
......@@ -270,29 +277,33 @@ void CudaNonbondedUtilities::initialize(const System& system) {
interactionCount->upload(count);
}
// Create kernels.
// Record arguments for kernels.
if (kernelSource.size() > 0)
forceKernel = createInteractionKernel(kernelSource, parameters, arguments, true, true);
forceArgs.push_back(&context.getForce().getDevicePointer());
forceArgs.push_back(&context.getEnergyBuffer().getDevicePointer());
forceArgs.push_back(&context.getPosq().getDevicePointer());
forceArgs.push_back(&exclusions->getDevicePointer());
forceArgs.push_back(&exclusionTiles->getDevicePointer());
forceArgs.push_back(&startTileIndex);
forceArgs.push_back(&numTiles);
if (useCutoff) {
forceArgs.push_back(&interactingTiles->getDevicePointer());
forceArgs.push_back(&interactionCount->getDevicePointer());
forceArgs.push_back(context.getPeriodicBoxSizePointer());
forceArgs.push_back(context.getInvPeriodicBoxSizePointer());
forceArgs.push_back(context.getPeriodicBoxVecXPointer());
forceArgs.push_back(context.getPeriodicBoxVecYPointer());
forceArgs.push_back(context.getPeriodicBoxVecZPointer());
forceArgs.push_back(&maxTiles);
forceArgs.push_back(&blockCenter->getDevicePointer());
forceArgs.push_back(&blockBoundingBox->getDevicePointer());
forceArgs.push_back(&interactingAtoms->getDevicePointer());
}
for (int i = 0; i < (int) parameters.size(); i++)
forceArgs.push_back(&parameters[i].getMemory());
for (int i = 0; i < (int) arguments.size(); i++)
forceArgs.push_back(&arguments[i].getMemory());
if (useCutoff) {
double padding = (usePadding ? 0.1*cutoff : 0.0);
double paddedCutoff = cutoff+padding;
map<string, string> defines;
defines["TILE_SIZE"] = context.intToString(CudaContext::TileSize);
defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks());
defines["NUM_ATOMS"] = context.intToString(context.getNumAtoms());
defines["PADDING"] = context.doubleToString(padding);
defines["PADDED_CUTOFF"] = context.doubleToString(paddedCutoff);
defines["PADDED_CUTOFF_SQUARED"] = context.doubleToString(paddedCutoff*paddedCutoff);
defines["NUM_TILES_WITH_EXCLUSIONS"] = context.intToString(exclusionTiles->getSize());
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
int maxExclusions = 0;
for (int i = 0; i < (int) exclusionBlocksForBlock.size(); i++)
maxExclusions = (maxExclusions > exclusionBlocksForBlock[i].size() ? maxExclusions : exclusionBlocksForBlock[i].size());
defines["MAX_EXCLUSIONS"] = context.intToString(maxExclusions);
CUmodule interactingBlocksProgram = context.createModule(CudaKernelSources::vectorOps+CudaKernelSources::findInteractingBlocks, defines);
findBlockBoundsKernel = context.getKernel(interactingBlocksProgram, "findBlockBounds");
findBlockBoundsArgs.push_back(&numAtoms);
findBlockBoundsArgs.push_back(context.getPeriodicBoxSizePointer());
findBlockBoundsArgs.push_back(context.getInvPeriodicBoxSizePointer());
......@@ -304,7 +315,6 @@ void CudaNonbondedUtilities::initialize(const System& system) {
findBlockBoundsArgs.push_back(&blockBoundingBox->getDevicePointer());
findBlockBoundsArgs.push_back(&rebuildNeighborList->getDevicePointer());
findBlockBoundsArgs.push_back(&sortedBlocks->getDevicePointer());
sortBoxDataKernel = context.getKernel(interactingBlocksProgram, "sortBoxData");
sortBoxDataArgs.push_back(&sortedBlocks->getDevicePointer());
sortBoxDataArgs.push_back(&blockCenter->getDevicePointer());
sortBoxDataArgs.push_back(&blockBoundingBox->getDevicePointer());
......@@ -315,7 +325,6 @@ void CudaNonbondedUtilities::initialize(const System& system) {
sortBoxDataArgs.push_back(&interactionCount->getDevicePointer());
sortBoxDataArgs.push_back(&rebuildNeighborList->getDevicePointer());
sortBoxDataArgs.push_back(&forceRebuildNeighborList);
findInteractingBlocksKernel = context.getKernel(interactingBlocksProgram, "findBlocksWithInteractions");
findInteractingBlocksArgs.push_back(context.getPeriodicBoxSizePointer());
findInteractingBlocksArgs.push_back(context.getInvPeriodicBoxSizePointer());
findInteractingBlocksArgs.push_back(context.getPeriodicBoxVecXPointer());
......@@ -338,30 +347,48 @@ void CudaNonbondedUtilities::initialize(const System& system) {
}
}
void CudaNonbondedUtilities::prepareInteractions() {
double CudaNonbondedUtilities::getMaxCutoffDistance() {
double cutoff = 0.0;
for (map<int, double>::const_iterator iter = groupCutoff.begin(); iter != groupCutoff.end(); ++iter)
cutoff = max(cutoff, iter->second);
return cutoff;
}
void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
if ((forceGroups&groupFlags) == 0)
return;
if (groupKernels.find(forceGroups) == groupKernels.end())
createKernelsForGroups(forceGroups);
if (!useCutoff)
return;
if (numTiles == 0)
return;
KernelSet& kernels = groupKernels[forceGroups];
if (usePeriodic) {
double4 box = context.getPeriodicBoxSize();
double minAllowedSize = 1.999999*cutoff;
double minAllowedSize = 1.999999*kernels.cutoffDistance;
if (box.x < minAllowedSize || box.y < minAllowedSize || box.z < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
}
// Compute the neighbor list.
context.executeKernel(findBlockBoundsKernel, &findBlockBoundsArgs[0], context.getNumAtoms());
if (lastCutoff != kernels.cutoffDistance)
forceRebuildNeighborList = true;
context.executeKernel(kernels.findBlockBoundsKernel, &findBlockBoundsArgs[0], context.getNumAtoms());
blockSorter->sort(*sortedBlocks);
context.executeKernel(sortBoxDataKernel, &sortBoxDataArgs[0], context.getNumAtoms());
context.executeKernel(findInteractingBlocksKernel, &findInteractingBlocksArgs[0], context.getNumAtoms(), 256);
context.executeKernel(kernels.sortBoxDataKernel, &sortBoxDataArgs[0], context.getNumAtoms());
context.executeKernel(kernels.findInteractingBlocksKernel, &findInteractingBlocksArgs[0], context.getNumAtoms(), 256);
forceRebuildNeighborList = false;
lastCutoff = kernels.cutoffDistance;
}
void CudaNonbondedUtilities::computeInteractions() {
if (kernelSource.size() > 0) {
context.executeKernel(forceKernel, &forceArgs[0], numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
void CudaNonbondedUtilities::computeInteractions(int forceGroups) {
if ((forceGroups&groupFlags) == 0)
return;
KernelSet& kernels = groupKernels[forceGroups];
if (kernels.hasForces) {
context.executeKernel(kernels.forceKernel, &forceArgs[0], numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
if (context.getComputeForceCount() == 1)
updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough.
}
......@@ -411,7 +438,43 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
forceRebuildNeighborList = true;
}
CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, vector<ParameterInfo>& params, vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric) {
void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
KernelSet kernels;
double cutoff = 0.0;
string source;
for (int i = 0; i < 32; i++) {
if ((groups&(1<<i)) != 0) {
cutoff = max(cutoff, groupCutoff[i]);
source += groupKernelSource[i];
}
}
kernels.hasForces = (source.size() > 0);
kernels.cutoffDistance = cutoff;
if (kernels.hasForces)
kernels.forceKernel = createInteractionKernel(source, parameters, arguments, true, true, groups);
if (useCutoff && kernels.hasForces) {
double padding = (usePadding ? 0.1*cutoff : 0.0);
double paddedCutoff = cutoff+padding;
map<string, string> defines;
defines["TILE_SIZE"] = context.intToString(CudaContext::TileSize);
defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks());
defines["NUM_ATOMS"] = context.intToString(context.getNumAtoms());
defines["PADDING"] = context.doubleToString(padding);
defines["PADDED_CUTOFF"] = context.doubleToString(paddedCutoff);
defines["PADDED_CUTOFF_SQUARED"] = context.doubleToString(paddedCutoff*paddedCutoff);
defines["NUM_TILES_WITH_EXCLUSIONS"] = context.intToString(exclusionTiles->getSize());
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
defines["MAX_EXCLUSIONS"] = context.intToString(maxExclusions);
CUmodule interactingBlocksProgram = context.createModule(CudaKernelSources::vectorOps+CudaKernelSources::findInteractingBlocks, defines);
kernels.findBlockBoundsKernel = context.getKernel(interactingBlocksProgram, "findBlockBounds");
kernels.sortBoxDataKernel = context.getKernel(interactingBlocksProgram, "sortBoxData");
kernels.findInteractingBlocksKernel = context.getKernel(interactingBlocksProgram, "findBlocksWithInteractions");
}
groupKernels[groups] = kernels;
}
CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, vector<ParameterInfo>& params, vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups) {
map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = source;
const string suffixes[] = {"x", "y", "z", "w"};
......@@ -588,8 +651,16 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
if (useShuffle)
defines["ENABLE_SHUFFLE"] = "1";
defines["THREAD_BLOCK_SIZE"] = context.intToString(forceThreadBlockSize);
defines["CUTOFF_SQUARED"] = context.doubleToString(cutoff*cutoff);
defines["CUTOFF"] = context.doubleToString(cutoff);
double maxCutoff = 0.0;
for (int i = 0; i < 32; i++) {
if ((groups&(1<<i)) != 0) {
double cutoff = groupCutoff[i];
maxCutoff = max(maxCutoff, cutoff);
defines["CUTOFF_"+context.intToString(i)+"_SQUARED"] = context.doubleToString(cutoff*cutoff);
defines["CUTOFF_"+context.intToString(i)] = context.doubleToString(cutoff);
}
}
defines["MAX_CUTOFF"] = context.doubleToString(maxCutoff);
defines["NUM_ATOMS"] = context.intToString(context.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks());
......@@ -605,33 +676,5 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines["PARAMETER_SIZE_IS_EVEN"] = "1";
CUmodule program = context.createModule(CudaKernelSources::vectorOps+context.replaceStrings(CudaKernelSources::nonbonded, replacements), defines);
CUfunction kernel = context.getKernel(program, "computeNonbonded");
// Set arguments to the Kernel.
int index = 0;
forceArgs.push_back(&context.getForce().getDevicePointer());
forceArgs.push_back(&context.getEnergyBuffer().getDevicePointer());
forceArgs.push_back(&context.getPosq().getDevicePointer());
forceArgs.push_back(&exclusions->getDevicePointer());
forceArgs.push_back(&exclusionTiles->getDevicePointer());
forceArgs.push_back(&startTileIndex);
forceArgs.push_back(&numTiles);
if (useCutoff) {
forceArgs.push_back(&interactingTiles->getDevicePointer());
forceArgs.push_back(&interactionCount->getDevicePointer());
forceArgs.push_back(context.getPeriodicBoxSizePointer());
forceArgs.push_back(context.getInvPeriodicBoxSizePointer());
forceArgs.push_back(context.getPeriodicBoxVecXPointer());
forceArgs.push_back(context.getPeriodicBoxVecYPointer());
forceArgs.push_back(context.getPeriodicBoxVecZPointer());
forceArgs.push_back(&maxTiles);
forceArgs.push_back(&blockCenter->getDevicePointer());
forceArgs.push_back(&blockBoundingBox->getDevicePointer());
forceArgs.push_back(&interactingAtoms->getDevicePointer());
}
for (int i = 0; i < (int) params.size(); i++)
forceArgs.push_back(&params[i].getMemory());
for (int i = 0; i < (int) arguments.size(); i++)
forceArgs.push_back(&arguments[i].getMemory());
return kernel;
}
......@@ -330,9 +330,9 @@ extern "C" __global__ void computeNonbonded(
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
}
else
#endif
......
......@@ -974,6 +974,62 @@ void testInteractionGroupLongRangeCorrection() {
ASSERT_EQUAL_TOL(expected, energy2-energy1, 1e-4);
}
void testMultipleCutoffs() {
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
// Add multiple nonbonded forces that have different cutoffs.
CustomNonbondedForce* nonbonded1 = new CustomNonbondedForce("2*r");
nonbonded1->addParticle(vector<double>());
nonbonded1->addParticle(vector<double>());
nonbonded1->setNonbondedMethod(CustomNonbondedForce::CutoffNonPeriodic);
nonbonded1->setCutoffDistance(2.5);
system.addForce(nonbonded1);
CustomNonbondedForce* nonbonded2 = new CustomNonbondedForce("3*r");
nonbonded2->addParticle(vector<double>());
nonbonded2->addParticle(vector<double>());
nonbonded2->setNonbondedMethod(CustomNonbondedForce::CutoffNonPeriodic);
nonbonded2->setCutoffDistance(2.9);
nonbonded2->setForceGroup(1);
system.addForce(nonbonded2);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(0, 0, 0);
for (double r = 2.4; r < 3.2; r += 0.2) {
positions[1][1] = r;
context.setPositions(positions);
double e1 = (r < 2.5 ? 2.0*r : 0.0);
double e2 = (r < 2.9 ? 3.0*r : 0.0);
double f1 = (r < 2.5 ? 2.0 : 0.0);
double f2 = (r < 2.9 ? 3.0 : 0.0);
// Check the first force.
State state = context.getState(State::Forces | State::Energy, false, 1);
ASSERT_EQUAL_VEC(Vec3(0, f1, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f1, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e1, state.getPotentialEnergy(), TOL);
// Check the second force.
state = context.getState(State::Forces | State::Energy, false, 2);
ASSERT_EQUAL_VEC(Vec3(0, f2, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f2, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e2, state.getPotentialEnergy(), TOL);
// Check the sum of both forces.
state = context.getState(State::Forces | State::Energy);
ASSERT_EQUAL_VEC(Vec3(0, f1+f2, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f1-f2, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e1+e2, state.getPotentialEnergy(), TOL);
}
}
int main(int argc, char* argv[]) {
try {
if (argc > 1)
......@@ -998,6 +1054,7 @@ int main(int argc, char* argv[]) {
testInteractionGroups();
testLargeInteractionGroup();
testInteractionGroupLongRangeCorrection();
testMultipleCutoffs();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
......
......@@ -732,7 +732,7 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private:
double prefactor, surfaceAreaFactor;
double prefactor, surfaceAreaFactor, cutoff;
bool hasCreatedKernels;
int maxTiles;
OpenCLContext& cl;
......@@ -783,6 +783,7 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private:
double cutoff;
bool hasInitializedKernels, needParameterGradient;
int maxTiles, numComputedValues;
OpenCLContext& cl;
......
......@@ -135,31 +135,23 @@ public:
return forceThreadBlockSize;
}
/**
* Get the cutoff distance.
* Get the maximum cutoff distance used by any force group.
*/
double getCutoffDistance() {
return cutoff;
}
double getMaxCutoffDistance();
/**
* Get whether any interactions have been added.
*/
bool getHasInteractions() {
return cutoff != -1.0;
}
/**
* Get the force group in which nonbonded interactions should be computed.
*/
int getForceGroup() {
return nonbondedForceGroup;
return (groupCutoff.size() > 0);
}
/**
* Prepare to compute interactions. This updates the neighbor list.
*/
void prepareInteractions();
void prepareInteractions(int forceGroups);
/**
* Compute the nonbonded interactions.
*/
void computeInteractions();
void computeInteractions(int forceGroups);
/**
* Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
*/
......@@ -252,16 +244,20 @@ public:
* @param arguments arrays (other than per-atom parameters) that should be passed as arguments to the kernel
* @param useExclusions specifies whether exclusions are applied to this interaction
* @param isSymmetric specifies whether the interaction is symmetric
* @param groups the set of force groups this kernel is for
*/
cl::Kernel createInteractionKernel(const std::string& source, const std::vector<ParameterInfo>& params, const std::vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups);
/**
* Create the set of kernels that will be needed for a particular combination of force groups.
*
* @param groups the set of force groups
*/
cl::Kernel createInteractionKernel(const std::string& source, const std::vector<ParameterInfo>& params, const std::vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric) const;
void createKernelsForGroups(int groups);
private:
class KernelSet;
class BlockSortTrait;
OpenCLContext& context;
cl::Kernel forceKernel;
cl::Kernel findBlockBoundsKernel;
cl::Kernel sortBoxDataKernel;
cl::Kernel findInteractingBlocksKernel;
cl::Kernel findInteractionsWithinBlocksKernel;
std::map<int, KernelSet> groupKernels;
OpenCLArray* exclusionTiles;
OpenCLArray* exclusions;
OpenCLArray* exclusionIndices;
......@@ -280,12 +276,27 @@ private:
std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters;
std::vector<ParameterInfo> arguments;
std::string kernelSource;
std::map<std::string, std::string> kernelDefines;
double cutoff;
bool useCutoff, usePeriodic, deviceIsCpu, anyExclusions, usePadding;
int numForceBuffers, startTileIndex, numTiles, startBlockIndex, numBlocks, numForceThreadBlocks;
int forceThreadBlockSize, interactingBlocksThreadBlockSize, nonbondedForceGroup;
std::map<int, double> groupCutoff;
std::map<int, std::string> groupKernelSource;
double lastCutoff;
bool useCutoff, usePeriodic, deviceIsCpu, anyExclusions, usePadding, forceRebuildNeighborList;
int numForceBuffers, startTileIndex, numTiles, startBlockIndex, numBlocks, maxExclusions, numForceThreadBlocks;
int forceThreadBlockSize, interactingBlocksThreadBlockSize, groupFlags;
};
/**
* This class stores the kernels to execute for a set of force groups.
*/
class OpenCLNonbondedUtilities::KernelSet {
public:
bool hasForces;
double cutoffDistance;
cl::Kernel forceKernel;
cl::Kernel findBlockBoundsKernel;
cl::Kernel sortBoxDataKernel;
cl::Kernel findInteractingBlocksKernel;
cl::Kernel findInteractionsWithinBlocksKernel;
};
/**
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment