Commit f065df01 authored by Peter Eastman's avatar Peter Eastman
Browse files

OpenCL allows different force groups to have different cutoffs

parent ab8d97b3
...@@ -732,7 +732,7 @@ public: ...@@ -732,7 +732,7 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force); void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private: private:
double prefactor, surfaceAreaFactor; double prefactor, surfaceAreaFactor, cutoff;
bool hasCreatedKernels; bool hasCreatedKernels;
int maxTiles; int maxTiles;
OpenCLContext& cl; OpenCLContext& cl;
...@@ -783,6 +783,7 @@ public: ...@@ -783,6 +783,7 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force); void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private: private:
double cutoff;
bool hasInitializedKernels, needParameterGradient; bool hasInitializedKernels, needParameterGradient;
int maxTiles, numComputedValues; int maxTiles, numComputedValues;
OpenCLContext& cl; OpenCLContext& cl;
......
...@@ -135,31 +135,23 @@ public: ...@@ -135,31 +135,23 @@ public:
return forceThreadBlockSize; return forceThreadBlockSize;
} }
/** /**
* Get the cutoff distance. * Get the maximum cutoff distance used by any force group.
*/ */
double getCutoffDistance() { double getMaxCutoffDistance();
return cutoff;
}
/** /**
* Get whether any interactions have been added. * Get whether any interactions have been added.
*/ */
bool getHasInteractions() { bool getHasInteractions() {
return cutoff != -1.0; return (groupCutoff.size() > 0);
}
/**
* Get the force group in which nonbonded interactions should be computed.
*/
int getForceGroup() {
return nonbondedForceGroup;
} }
/** /**
* Prepare to compute interactions. This updates the neighbor list. * Prepare to compute interactions. This updates the neighbor list.
*/ */
void prepareInteractions(); void prepareInteractions(int forceGroups);
/** /**
* Compute the nonbonded interactions. * Compute the nonbonded interactions.
*/ */
void computeInteractions(); void computeInteractions(int forceGroups);
/** /**
* Check to see if the neighbor list arrays are large enough, and make them bigger if necessary. * Check to see if the neighbor list arrays are large enough, and make them bigger if necessary.
*/ */
...@@ -252,16 +244,20 @@ public: ...@@ -252,16 +244,20 @@ public:
* @param arguments arrays (other than per-atom parameters) that should be passed as arguments to the kernel * @param arguments arrays (other than per-atom parameters) that should be passed as arguments to the kernel
* @param useExclusions specifies whether exclusions are applied to this interaction * @param useExclusions specifies whether exclusions are applied to this interaction
* @param isSymmetric specifies whether the interaction is symmetric * @param isSymmetric specifies whether the interaction is symmetric
* @param groups the set of force groups this kernel is for
*/
cl::Kernel createInteractionKernel(const std::string& source, const std::vector<ParameterInfo>& params, const std::vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups);
/**
* Create the set of kernels that will be needed for a particular combination of force groups.
*
* @param groups the set of force groups
*/ */
cl::Kernel createInteractionKernel(const std::string& source, const std::vector<ParameterInfo>& params, const std::vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric) const; void createKernelsForGroups(int groups);
private: private:
class KernelSet;
class BlockSortTrait; class BlockSortTrait;
OpenCLContext& context; OpenCLContext& context;
cl::Kernel forceKernel; std::map<int, KernelSet> groupKernels;
cl::Kernel findBlockBoundsKernel;
cl::Kernel sortBoxDataKernel;
cl::Kernel findInteractingBlocksKernel;
cl::Kernel findInteractionsWithinBlocksKernel;
OpenCLArray* exclusionTiles; OpenCLArray* exclusionTiles;
OpenCLArray* exclusions; OpenCLArray* exclusions;
OpenCLArray* exclusionIndices; OpenCLArray* exclusionIndices;
...@@ -280,12 +276,27 @@ private: ...@@ -280,12 +276,27 @@ private:
std::vector<std::vector<int> > atomExclusions; std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters; std::vector<ParameterInfo> parameters;
std::vector<ParameterInfo> arguments; std::vector<ParameterInfo> arguments;
std::string kernelSource; std::map<int, double> groupCutoff;
std::map<std::string, std::string> kernelDefines; std::map<int, std::string> groupKernelSource;
double cutoff; double lastCutoff;
bool useCutoff, usePeriodic, deviceIsCpu, anyExclusions, usePadding; bool useCutoff, usePeriodic, deviceIsCpu, anyExclusions, usePadding, forceRebuildNeighborList;
int numForceBuffers, startTileIndex, numTiles, startBlockIndex, numBlocks, numForceThreadBlocks; int numForceBuffers, startTileIndex, numTiles, startBlockIndex, numBlocks, maxExclusions, numForceThreadBlocks;
int forceThreadBlockSize, interactingBlocksThreadBlockSize, nonbondedForceGroup; int forceThreadBlockSize, interactingBlocksThreadBlockSize, groupFlags;
};
/**
* This class stores the kernels to execute for a set of force groups.
*/
class OpenCLNonbondedUtilities::KernelSet {
public:
bool hasForces;
double cutoffDistance;
cl::Kernel forceKernel;
cl::Kernel findBlockBoundsKernel;
cl::Kernel sortBoxDataKernel;
cl::Kernel findInteractingBlocksKernel;
cl::Kernel findInteractionsWithinBlocksKernel;
}; };
/** /**
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include <algorithm> #include <algorithm>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <set>
#include <sstream> #include <sstream>
#include <typeinfo> #include <typeinfo>
...@@ -492,13 +493,32 @@ void OpenCLContext::addForce(OpenCLForceInfo* force) { ...@@ -492,13 +493,32 @@ void OpenCLContext::addForce(OpenCLForceInfo* force) {
} }
string OpenCLContext::replaceStrings(const string& input, const std::map<std::string, std::string>& replacements) const { string OpenCLContext::replaceStrings(const string& input, const std::map<std::string, std::string>& replacements) const {
static set<char> symbolChars;
if (symbolChars.size() == 0) {
symbolChars.insert('_');
for (char c = 'a'; c <= 'z'; c++)
symbolChars.insert(c);
for (char c = 'A'; c <= 'Z'; c++)
symbolChars.insert(c);
for (char c = '0'; c <= '9'; c++)
symbolChars.insert(c);
}
string result = input; string result = input;
for (map<string, string>::const_iterator iter = replacements.begin(); iter != replacements.end(); iter++) { for (map<string, string>::const_iterator iter = replacements.begin(); iter != replacements.end(); iter++) {
int index = -1; int index = 0;
int size = iter->first.size();
do { do {
index = result.find(iter->first); index = result.find(iter->first, index);
if (index != result.npos) if (index != result.npos) {
result.replace(index, iter->first.size(), iter->second); if ((index == 0 || symbolChars.find(result[index-1]) == symbolChars.end()) && (index == result.size()-size || symbolChars.find(result[index+size]) == symbolChars.end())) {
// We have found a complete symbol, not part of a longer symbol.
result.replace(index, size, iter->second);
index += iter->second.size();
}
else
index++;
}
} while (index != result.npos); } while (index != result.npos);
} }
return result; return result;
...@@ -1130,7 +1150,7 @@ void OpenCLContext::reorderAtomsImpl() { ...@@ -1130,7 +1150,7 @@ void OpenCLContext::reorderAtomsImpl() {
if (useHilbert) if (useHilbert)
binWidth = (Real) (max(max(maxx-minx, maxy-miny), maxz-minz)/255.0); binWidth = (Real) (max(max(maxx-minx, maxy-miny), maxz-minz)/255.0);
else else
binWidth = (Real) (0.2*nonbonded->getCutoffDistance()); binWidth = (Real) (0.2*nonbonded->getMaxCutoffDistance());
Real invBinWidth = (Real) (1.0/binWidth); Real invBinWidth = (Real) (1.0/binWidth);
int xbins = 1 + (int) ((maxx-minx)*invBinWidth); int xbins = 1 + (int) ((maxx-minx)*invBinWidth);
int ybins = 1 + (int) ((maxy-miny)*invBinWidth); int ybins = 1 + (int) ((maxy-miny)*invBinWidth);
......
...@@ -121,16 +121,13 @@ void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, boo ...@@ -121,16 +121,13 @@ void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, boo
for (vector<OpenCLContext::ForcePreComputation*>::iterator iter = cl.getPreComputations().begin(); iter != cl.getPreComputations().end(); ++iter) for (vector<OpenCLContext::ForcePreComputation*>::iterator iter = cl.getPreComputations().begin(); iter != cl.getPreComputations().end(); ++iter)
(*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups); (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities(); OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0);
cl.setComputeForceCount(cl.getComputeForceCount()+1); cl.setComputeForceCount(cl.getComputeForceCount()+1);
if (includeNonbonded) nb.prepareInteractions(groups);
nb.prepareInteractions();
} }
double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) { double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cl.getBondedUtilities().computeInteractions(groups); cl.getBondedUtilities().computeInteractions(groups);
if ((groups&(1<<cl.getNonbondedUtilities().getForceGroup())) != 0) cl.getNonbondedUtilities().computeInteractions(groups);
cl.getNonbondedUtilities().computeInteractions();
double sum = 0.0; double sum = 0.0;
for (vector<OpenCLContext::ForcePostComputation*>::iterator iter = cl.getPostComputations().begin(); iter != cl.getPostComputations().end(); ++iter) for (vector<OpenCLContext::ForcePostComputation*>::iterator iter = cl.getPostComputations().begin(); iter != cl.getPostComputations().end(); ++iter)
sum += (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups); sum += (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
...@@ -2643,8 +2640,9 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB ...@@ -2643,8 +2640,9 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy(); surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff); bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic); bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
cutoff = force.getCutoffDistance();
string source = OpenCLKernelSources::gbsaObc2; string source = OpenCLKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, force.getCutoffDistance(), vector<vector<int> >(), source, force.getForceGroup()); nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params->getDeviceBuffer()));; nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params->getDeviceBuffer()));;
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("bornForce", "real", 1, elementSize, bornForce->getDeviceBuffer()));; nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("bornForce", "real", 1, elementSize, bornForce->getDeviceBuffer()));;
cl.addForce(new OpenCLGBSAOBCForceInfo(nb.getNumForceBuffers(), force)); cl.addForce(new OpenCLGBSAOBCForceInfo(nb.getNumForceBuffers(), force));
...@@ -2663,8 +2661,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2663,8 +2661,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
defines["USE_CUTOFF"] = "1"; defines["USE_CUTOFF"] = "1";
if (nb.getUsePeriodic()) if (nb.getUsePeriodic())
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
defines["CUTOFF_SQUARED"] = cl.doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance()); defines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
defines["CUTOFF"] = cl.doubleToString(nb.getCutoffDistance()); defines["CUTOFF"] = cl.doubleToString(cutoff);
defines["PREFACTOR"] = cl.doubleToString(prefactor); defines["PREFACTOR"] = cl.doubleToString(prefactor);
defines["SURFACE_AREA_FACTOR"] = cl.doubleToString(surfaceAreaFactor); defines["SURFACE_AREA_FACTOR"] = cl.doubleToString(surfaceAreaFactor);
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms()); defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
...@@ -2856,6 +2854,7 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() { ...@@ -2856,6 +2854,7 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() {
void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) { void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) {
if (cl.getPlatformData().contexts.size() > 1) if (cl.getPlatformData().contexts.size() > 1)
throw OpenMMException("CustomGBForce does not support using multiple OpenCL devices"); throw OpenMMException("CustomGBForce does not support using multiple OpenCL devices");
cutoff = force.getCutoffDistance();
bool useExclusionsForValue = false; bool useExclusionsForValue = false;
numComputedValues = force.getNumComputedValues(); numComputedValues = force.getNumComputedValues();
vector<string> computedValueNames(force.getNumComputedValues()); vector<string> computedValueNames(force.getNumComputedValues());
...@@ -3047,7 +3046,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3047,7 +3046,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (useExclusionsForValue) if (useExclusionsForValue)
pairValueDefines["USE_EXCLUSIONS"] = "1"; pairValueDefines["USE_EXCLUSIONS"] = "1";
pairValueDefines["FORCE_WORK_GROUP_SIZE"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize()); pairValueDefines["FORCE_WORK_GROUP_SIZE"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize());
pairValueDefines["CUTOFF_SQUARED"] = cl.doubleToString(force.getCutoffDistance()*force.getCutoffDistance()); pairValueDefines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
pairValueDefines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms()); pairValueDefines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
pairValueDefines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms()); pairValueDefines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
pairValueDefines["NUM_BLOCKS"] = cl.intToString(cl.getNumAtomBlocks()); pairValueDefines["NUM_BLOCKS"] = cl.intToString(cl.getNumAtomBlocks());
...@@ -3240,7 +3239,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3240,7 +3239,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (anyExclusions) if (anyExclusions)
pairEnergyDefines["USE_EXCLUSIONS"] = "1"; pairEnergyDefines["USE_EXCLUSIONS"] = "1";
pairEnergyDefines["FORCE_WORK_GROUP_SIZE"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize()); pairEnergyDefines["FORCE_WORK_GROUP_SIZE"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize());
pairEnergyDefines["CUTOFF_SQUARED"] = cl.doubleToString(force.getCutoffDistance()*force.getCutoffDistance()); pairEnergyDefines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
pairEnergyDefines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms()); pairEnergyDefines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
pairEnergyDefines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms()); pairEnergyDefines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
pairEnergyDefines["NUM_BLOCKS"] = cl.intToString(cl.getNumAtomBlocks()); pairEnergyDefines["NUM_BLOCKS"] = cl.intToString(cl.getNumAtomBlocks());
...@@ -3492,7 +3491,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3492,7 +3491,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
globals->upload(globalParamValues); globals->upload(globalParamValues);
arguments.push_back(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDeviceBuffer())); arguments.push_back(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDeviceBuffer()));
} }
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, force.getCutoffDistance(), exclusionList, source, force.getForceGroup()); cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup());
for (int i = 0; i < (int) parameters.size(); i++) for (int i = 0; i < (int) parameters.size(); i++)
cl.getNonbondedUtilities().addParameter(parameters[i]); cl.getNonbondedUtilities().addParameter(parameters[i]);
for (int i = 0; i < (int) arguments.size(); i++) for (int i = 0; i < (int) arguments.size(); i++)
...@@ -3527,7 +3526,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -3527,7 +3526,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
int endExclusionIndex = (cl.getContextIndex()+1)*numExclusionTiles/numContexts; int endExclusionIndex = (cl.getContextIndex()+1)*numExclusionTiles/numContexts;
pairValueDefines["FIRST_EXCLUSION_TILE"] = cl.intToString(startExclusionIndex); pairValueDefines["FIRST_EXCLUSION_TILE"] = cl.intToString(startExclusionIndex);
pairValueDefines["LAST_EXCLUSION_TILE"] = cl.intToString(endExclusionIndex); pairValueDefines["LAST_EXCLUSION_TILE"] = cl.intToString(endExclusionIndex);
pairValueDefines["CUTOFF"] = cl.doubleToString(nb.getCutoffDistance()); pairValueDefines["CUTOFF"] = cl.doubleToString(cutoff);
cl::Program program = cl.createProgram(pairValueSrc, pairValueDefines); cl::Program program = cl.createProgram(pairValueSrc, pairValueDefines);
pairValueKernel = cl::Kernel(program, "computeN2Value"); pairValueKernel = cl::Kernel(program, "computeN2Value");
pairValueSrc = ""; pairValueSrc = "";
...@@ -3541,7 +3540,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -3541,7 +3540,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
int endExclusionIndex = (cl.getContextIndex()+1)*numExclusionTiles/numContexts; int endExclusionIndex = (cl.getContextIndex()+1)*numExclusionTiles/numContexts;
pairEnergyDefines["FIRST_EXCLUSION_TILE"] = cl.intToString(startExclusionIndex); pairEnergyDefines["FIRST_EXCLUSION_TILE"] = cl.intToString(startExclusionIndex);
pairEnergyDefines["LAST_EXCLUSION_TILE"] = cl.intToString(endExclusionIndex); pairEnergyDefines["LAST_EXCLUSION_TILE"] = cl.intToString(endExclusionIndex);
pairEnergyDefines["CUTOFF"] = cl.doubleToString(nb.getCutoffDistance()); pairEnergyDefines["CUTOFF"] = cl.doubleToString(cutoff);
cl::Program program = cl.createProgram(pairEnergySrc, pairEnergyDefines); cl::Program program = cl.createProgram(pairEnergySrc, pairEnergyDefines);
pairEnergyKernel = cl::Kernel(program, "computeN2Energy"); pairEnergyKernel = cl::Kernel(program, "computeN2Energy");
pairEnergySrc = ""; pairEnergySrc = "";
......
...@@ -54,10 +54,10 @@ private: ...@@ -54,10 +54,10 @@ private:
bool useDouble; bool useDouble;
}; };
OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : context(context), cutoff(-1.0), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true), OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : context(context), useCutoff(false), usePeriodic(false), anyExclusions(false), usePadding(true),
numForceBuffers(0), exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL), numForceBuffers(0), exclusionIndices(NULL), exclusionRowIndices(NULL), exclusionTiles(NULL), exclusions(NULL), interactingTiles(NULL), interactingAtoms(NULL),
interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL), interactionCount(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedBlocks(NULL), sortedBlockCenter(NULL), sortedBlockBoundingBox(NULL),
oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), nonbondedForceGroup(0) { oldPositions(NULL), rebuildNeighborList(NULL), blockSorter(NULL), forceRebuildNeighborList(true), lastCutoff(0.0), groupFlags(0) {
// Decide how many thread blocks and force buffers to use. // Decide how many thread blocks and force buffers to use.
deviceIsCpu = (context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU); deviceIsCpu = (context.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
...@@ -126,24 +126,28 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() { ...@@ -126,24 +126,28 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
} }
void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) { void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel, int forceGroup) {
if (cutoff != -1.0) { if (groupCutoff.size() > 0) {
if (usesCutoff != useCutoff) if (usesCutoff != useCutoff)
throw OpenMMException("All Forces must agree on whether to use a cutoff"); throw OpenMMException("All Forces must agree on whether to use a cutoff");
if (usesPeriodic != usePeriodic) if (usesPeriodic != usePeriodic)
throw OpenMMException("All Forces must agree on whether to use periodic boundary conditions"); throw OpenMMException("All Forces must agree on whether to use periodic boundary conditions");
if (cutoffDistance != cutoff) if (usesCutoff && groupCutoff.find(forceGroup) != groupCutoff.end() && groupCutoff[forceGroup] != cutoffDistance)
throw OpenMMException("All Forces must use the same cutoff distance"); throw OpenMMException("All Forces in a single force group must use the same cutoff distance");
if (forceGroup != nonbondedForceGroup)
throw OpenMMException("All nonbonded forces must be in the same force group");
} }
if (usesExclusions) if (usesExclusions)
requestExclusions(exclusionList); requestExclusions(exclusionList);
useCutoff = usesCutoff; useCutoff = usesCutoff;
usePeriodic = usesPeriodic; usePeriodic = usesPeriodic;
cutoff = cutoffDistance; groupCutoff[forceGroup] = cutoffDistance;
if (kernel.size() > 0) groupFlags |= 1<<forceGroup;
kernelSource += kernel+"\n"; if (kernel.size() > 0) {
nonbondedForceGroup = forceGroup; if (groupKernelSource.find(forceGroup) == groupKernelSource.end())
groupKernelSource[forceGroup] = "";
map<string, string> replacements;
replacements["CUTOFF"] = "CUTOFF_"+context.intToString(forceGroup);
replacements["CUTOFF_SQUARED"] = "CUTOFF_"+context.intToString(forceGroup)+"_SQUARED";
groupKernelSource[forceGroup] += context.replaceStrings(kernel, replacements)+"\n";
}
} }
void OpenCLNonbondedUtilities::addParameter(const ParameterInfo& parameter) { void OpenCLNonbondedUtilities::addParameter(const ParameterInfo& parameter) {
...@@ -228,6 +232,9 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -228,6 +232,9 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
exclusionIndicesVec.insert(exclusionIndicesVec.end(), exclusionBlocksForBlock[i].begin(), exclusionBlocksForBlock[i].end()); exclusionIndicesVec.insert(exclusionIndicesVec.end(), exclusionBlocksForBlock[i].begin(), exclusionBlocksForBlock[i].end());
exclusionRowIndicesVec[i+1] = exclusionIndicesVec.size(); exclusionRowIndicesVec[i+1] = exclusionIndicesVec.size();
} }
maxExclusions = 0;
for (int i = 0; i < (int) exclusionBlocksForBlock.size(); i++)
maxExclusions = (maxExclusions > exclusionBlocksForBlock[i].size() ? maxExclusions : exclusionBlocksForBlock[i].size());
exclusionIndices = OpenCLArray::create<cl_uint>(context, exclusionIndicesVec.size(), "exclusionIndices"); exclusionIndices = OpenCLArray::create<cl_uint>(context, exclusionIndicesVec.size(), "exclusionIndices");
exclusionRowIndices = OpenCLArray::create<cl_uint>(context, exclusionRowIndicesVec.size(), "exclusionRowIndices"); exclusionRowIndices = OpenCLArray::create<cl_uint>(context, exclusionRowIndicesVec.size(), "exclusionRowIndices");
exclusionIndices->upload(exclusionIndicesVec); exclusionIndices->upload(exclusionIndicesVec);
...@@ -287,80 +294,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -287,80 +294,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
vector<cl_uint> count(1, 0); vector<cl_uint> count(1, 0);
interactionCount->upload(count); interactionCount->upload(count);
} }
// Create kernels.
if (kernelSource.size() > 0)
forceKernel = createInteractionKernel(kernelSource, parameters, arguments, true, true);
if (useCutoff) {
double padding = (usePadding ? 0.1*cutoff : 0.0);
double paddedCutoff = cutoff+padding;
map<string, string> defines;
defines["TILE_SIZE"] = context.intToString(OpenCLContext::TileSize);
defines["NUM_ATOMS"] = context.intToString(context.getNumAtoms());
defines["PADDING"] = context.doubleToString(padding);
defines["PADDED_CUTOFF"] = context.doubleToString(paddedCutoff);
defines["PADDED_CUTOFF_SQUARED"] = context.doubleToString(paddedCutoff*paddedCutoff);
defines["NUM_TILES_WITH_EXCLUSIONS"] = context.intToString(exclusionTiles->getSize());
defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks());
defines["SIMD_WIDTH"] = context.intToString(context.getSIMDWidth());
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
int maxExclusions = 0;
for (int i = 0; i < (int) exclusionBlocksForBlock.size(); i++)
maxExclusions = (maxExclusions > exclusionBlocksForBlock[i].size() ? maxExclusions : exclusionBlocksForBlock[i].size());
defines["MAX_EXCLUSIONS"] = context.intToString(maxExclusions);
defines["BUFFER_GROUPS"] = (deviceIsCpu ? "4" : "2");
string file = (deviceIsCpu ? OpenCLKernelSources::findInteractingBlocks_cpu : OpenCLKernelSources::findInteractingBlocks);
int groupSize = (deviceIsCpu || context.getSIMDWidth() < 32 ? 32 : 256);
while (true) {
defines["GROUP_SIZE"] = context.intToString(groupSize);
cl::Program interactingBlocksProgram = context.createProgram(file, defines);
findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds");
findBlockBoundsKernel.setArg<cl_int>(0, context.getNumAtoms());
findBlockBoundsKernel.setArg<cl::Buffer>(6, context.getPosq().getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(7, blockCenter->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(8, blockBoundingBox->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(9, rebuildNeighborList->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(10, sortedBlocks->getDeviceBuffer());
sortBoxDataKernel = cl::Kernel(interactingBlocksProgram, "sortBoxData");
sortBoxDataKernel.setArg<cl::Buffer>(0, sortedBlocks->getDeviceBuffer());
sortBoxDataKernel.setArg<cl::Buffer>(1, blockCenter->getDeviceBuffer());
sortBoxDataKernel.setArg<cl::Buffer>(2, blockBoundingBox->getDeviceBuffer());
sortBoxDataKernel.setArg<cl::Buffer>(3, sortedBlockCenter->getDeviceBuffer());
sortBoxDataKernel.setArg<cl::Buffer>(4, sortedBlockBoundingBox->getDeviceBuffer());
sortBoxDataKernel.setArg<cl::Buffer>(5, context.getPosq().getDeviceBuffer());
sortBoxDataKernel.setArg<cl::Buffer>(6, oldPositions->getDeviceBuffer());
sortBoxDataKernel.setArg<cl::Buffer>(7, interactionCount->getDeviceBuffer());
sortBoxDataKernel.setArg<cl::Buffer>(8, rebuildNeighborList->getDeviceBuffer());
sortBoxDataKernel.setArg<cl_int>(9, true);
findInteractingBlocksKernel = cl::Kernel(interactingBlocksProgram, "findBlocksWithInteractions");
findInteractingBlocksKernel.setArg<cl::Buffer>(5, interactionCount->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(8, context.getPosq().getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl_uint>(9, interactingTiles->getSize());
findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex);
findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks);
findInteractingBlocksKernel.setArg<cl::Buffer>(12, sortedBlocks->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(13, sortedBlockCenter->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(14, sortedBlockBoundingBox->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(15, exclusionIndices->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(16, exclusionRowIndices->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(17, oldPositions->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(18, rebuildNeighborList->getDeviceBuffer());
if (findInteractingBlocksKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()) < groupSize) {
// The device can't handle this block size, so reduce it.
groupSize -= 32;
if (groupSize < 32)
throw OpenMMException("Failed to create findInteractingBlocks kernel");
continue;
}
break;
}
interactingBlocksThreadBlockSize = (deviceIsCpu ? 1 : groupSize);
}
} }
static void setPeriodicBoxArgs(OpenCLContext& cl, cl::Kernel& kernel, int index) { static void setPeriodicBoxArgs(OpenCLContext& cl, cl::Kernel& kernel, int index) {
...@@ -380,34 +313,53 @@ static void setPeriodicBoxArgs(OpenCLContext& cl, cl::Kernel& kernel, int index) ...@@ -380,34 +313,53 @@ static void setPeriodicBoxArgs(OpenCLContext& cl, cl::Kernel& kernel, int index)
} }
} }
void OpenCLNonbondedUtilities::prepareInteractions() { double OpenCLNonbondedUtilities::getMaxCutoffDistance() {
double cutoff = 0.0;
for (map<int, double>::const_iterator iter = groupCutoff.begin(); iter != groupCutoff.end(); ++iter)
cutoff = max(cutoff, iter->second);
return cutoff;
}
void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
if ((forceGroups&groupFlags) == 0)
return;
if (groupKernels.find(forceGroups) == groupKernels.end())
createKernelsForGroups(forceGroups);
if (!useCutoff) if (!useCutoff)
return; return;
if (numTiles == 0) if (numTiles == 0)
return; return;
KernelSet& kernels = groupKernels[forceGroups];
if (usePeriodic) { if (usePeriodic) {
mm_float4 box = context.getPeriodicBoxSize(); mm_float4 box = context.getPeriodicBoxSize();
double minAllowedSize = 1.999999*cutoff; double minAllowedSize = 1.999999*kernels.cutoffDistance;
if (box.x < minAllowedSize || box.y < minAllowedSize || box.z < minAllowedSize) if (box.x < minAllowedSize || box.y < minAllowedSize || box.z < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff."); throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
} }
// Compute the neighbor list. // Compute the neighbor list.
setPeriodicBoxArgs(context, findBlockBoundsKernel, 1); if (lastCutoff != kernels.cutoffDistance)
context.executeKernel(findBlockBoundsKernel, context.getNumAtoms()); forceRebuildNeighborList = true;
setPeriodicBoxArgs(context, kernels.findBlockBoundsKernel, 1);
context.executeKernel(kernels.findBlockBoundsKernel, context.getNumAtoms());
blockSorter->sort(*sortedBlocks); blockSorter->sort(*sortedBlocks);
context.executeKernel(sortBoxDataKernel, context.getNumAtoms()); kernels.sortBoxDataKernel.setArg<cl_int>(9, forceRebuildNeighborList);
setPeriodicBoxArgs(context, findInteractingBlocksKernel, 0); context.executeKernel(kernels.sortBoxDataKernel, context.getNumAtoms());
context.executeKernel(findInteractingBlocksKernel, context.getNumAtoms(), interactingBlocksThreadBlockSize); setPeriodicBoxArgs(context, kernels.findInteractingBlocksKernel, 0);
sortBoxDataKernel.setArg<cl_int>(9, false); context.executeKernel(kernels.findInteractingBlocksKernel, context.getNumAtoms(), interactingBlocksThreadBlockSize);
forceRebuildNeighborList = false;
lastCutoff = kernels.cutoffDistance;
} }
void OpenCLNonbondedUtilities::computeInteractions() { void OpenCLNonbondedUtilities::computeInteractions(int forceGroups) {
if (kernelSource.size() > 0) { if ((forceGroups&groupFlags) == 0)
return;
KernelSet& kernels = groupKernels[forceGroups];
if (kernels.hasForces) {
if (useCutoff) if (useCutoff)
setPeriodicBoxArgs(context, forceKernel, 9); setPeriodicBoxArgs(context, kernels.forceKernel, 9);
context.executeKernel(forceKernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); context.executeKernel(kernels.forceKernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
if (context.getComputeForceCount() == 1) if (context.getComputeForceCount() == 1)
updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough. updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough.
} }
...@@ -434,13 +386,15 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() { ...@@ -434,13 +386,15 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
interactingAtoms = NULL; interactingAtoms = NULL;
interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles"); interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles");
interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms");
forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer()); for (map<int, KernelSet>::iterator iter = groupKernels.begin(); iter != groupKernels.end(); ++iter) {
forceKernel.setArg<cl_uint>(14, maxTiles); iter->second.forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer()); iter->second.forceKernel.setArg<cl_uint>(14, maxTiles);
findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer()); iter->second.forceKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer()); iter->second.findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl_uint>(9, maxTiles); iter->second.findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer());
sortBoxDataKernel.setArg<cl_int>(9, true); iter->second.findInteractingBlocksKernel.setArg<cl_uint>(9, maxTiles);
}
forceRebuildNeighborList = true;
} }
void OpenCLNonbondedUtilities::setUsePadding(bool padding) { void OpenCLNonbondedUtilities::setUsePadding(bool padding) {
...@@ -454,18 +408,103 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en ...@@ -454,18 +408,103 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2; int totalTiles = context.getNumAtomBlocks()*(context.getNumAtomBlocks()+1)/2;
startTileIndex = (int) (startFraction*totalTiles);; startTileIndex = (int) (startFraction*totalTiles);;
numTiles = (int) (endFraction*totalTiles)-startTileIndex; numTiles = (int) (endFraction*totalTiles)-startTileIndex;
if (useCutoff && interactingTiles != NULL) { if (useCutoff) {
// We are using a cutoff, and the kernels have already been created. // We are using a cutoff, and the kernels have already been created.
forceKernel.setArg<cl_uint>(5, startTileIndex); for (map<int, KernelSet>::iterator iter = groupKernels.begin(); iter != groupKernels.end(); ++iter) {
forceKernel.setArg<cl_uint>(6, numTiles); iter->second.forceKernel.setArg<cl_uint>(5, startTileIndex);
findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex); iter->second.forceKernel.setArg<cl_uint>(6, numTiles);
findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks); iter->second.findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex);
sortBoxDataKernel.setArg<cl_int>(9, true); iter->second.findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks);
}
forceRebuildNeighborList = true;
} }
} }
cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& source, const vector<ParameterInfo>& params, const vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric) const { void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
KernelSet kernels;
double cutoff = 0.0;
string source;
for (int i = 0; i < 32; i++) {
if ((groups&(1<<i)) != 0) {
cutoff = max(cutoff, groupCutoff[i]);
source += groupKernelSource[i];
}
}
kernels.hasForces = (source.size() > 0);
kernels.cutoffDistance = cutoff;
if (kernels.hasForces)
kernels.forceKernel = createInteractionKernel(source, parameters, arguments, true, true, groups);
if (useCutoff && kernels.hasForces) {
double padding = (usePadding ? 0.1*cutoff : 0.0);
double paddedCutoff = cutoff+padding;
map<string, string> defines;
defines["TILE_SIZE"] = context.intToString(OpenCLContext::TileSize);
defines["NUM_ATOMS"] = context.intToString(context.getNumAtoms());
defines["PADDING"] = context.doubleToString(padding);
defines["PADDED_CUTOFF"] = context.doubleToString(paddedCutoff);
defines["PADDED_CUTOFF_SQUARED"] = context.doubleToString(paddedCutoff*paddedCutoff);
defines["NUM_TILES_WITH_EXCLUSIONS"] = context.intToString(exclusionTiles->getSize());
defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks());
defines["SIMD_WIDTH"] = context.intToString(context.getSIMDWidth());
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
defines["MAX_EXCLUSIONS"] = context.intToString(maxExclusions);
defines["BUFFER_GROUPS"] = (deviceIsCpu ? "4" : "2");
string file = (deviceIsCpu ? OpenCLKernelSources::findInteractingBlocks_cpu : OpenCLKernelSources::findInteractingBlocks);
int groupSize = (deviceIsCpu || context.getSIMDWidth() < 32 ? 32 : 256);
while (true) {
defines["GROUP_SIZE"] = context.intToString(groupSize);
cl::Program interactingBlocksProgram = context.createProgram(file, defines);
kernels.findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds");
kernels.findBlockBoundsKernel.setArg<cl_int>(0, context.getNumAtoms());
kernels.findBlockBoundsKernel.setArg<cl::Buffer>(6, context.getPosq().getDeviceBuffer());
kernels.findBlockBoundsKernel.setArg<cl::Buffer>(7, blockCenter->getDeviceBuffer());
kernels.findBlockBoundsKernel.setArg<cl::Buffer>(8, blockBoundingBox->getDeviceBuffer());
kernels.findBlockBoundsKernel.setArg<cl::Buffer>(9, rebuildNeighborList->getDeviceBuffer());
kernels.findBlockBoundsKernel.setArg<cl::Buffer>(10, sortedBlocks->getDeviceBuffer());
kernels.sortBoxDataKernel = cl::Kernel(interactingBlocksProgram, "sortBoxData");
kernels.sortBoxDataKernel.setArg<cl::Buffer>(0, sortedBlocks->getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(1, blockCenter->getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(2, blockBoundingBox->getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(3, sortedBlockCenter->getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(4, sortedBlockBoundingBox->getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(5, context.getPosq().getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(6, oldPositions->getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(7, interactionCount->getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl::Buffer>(8, rebuildNeighborList->getDeviceBuffer());
kernels.sortBoxDataKernel.setArg<cl_int>(9, true);
kernels.findInteractingBlocksKernel = cl::Kernel(interactingBlocksProgram, "findBlocksWithInteractions");
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(5, interactionCount->getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(8, context.getPosq().getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl_uint>(9, interactingTiles->getSize());
kernels.findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex);
kernels.findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks);
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(12, sortedBlocks->getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(13, sortedBlockCenter->getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(14, sortedBlockBoundingBox->getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(15, exclusionIndices->getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(16, exclusionRowIndices->getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(17, oldPositions->getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(18, rebuildNeighborList->getDeviceBuffer());
if (kernels.findInteractingBlocksKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()) < groupSize) {
// The device can't handle this block size, so reduce it.
groupSize -= 32;
if (groupSize < 32)
throw OpenMMException("Failed to create findInteractingBlocks kernel");
continue;
}
break;
}
interactingBlocksThreadBlockSize = (deviceIsCpu ? 1 : groupSize);
}
groupKernels[groups] = kernels;
}
cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& source, const vector<ParameterInfo>& params, const vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups) {
map<string, string> replacements; map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = source; replacements["COMPUTE_INTERACTION"] = source;
const string suffixes[] = {"x", "y", "z", "w"}; const string suffixes[] = {"x", "y", "z", "w"};
...@@ -565,8 +604,16 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -565,8 +604,16 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
if (useCutoff && context.getSIMDWidth() < 32) if (useCutoff && context.getSIMDWidth() < 32)
defines["PRUNE_BY_CUTOFF"] = "1"; defines["PRUNE_BY_CUTOFF"] = "1";
defines["FORCE_WORK_GROUP_SIZE"] = context.intToString(forceThreadBlockSize); defines["FORCE_WORK_GROUP_SIZE"] = context.intToString(forceThreadBlockSize);
defines["CUTOFF_SQUARED"] = context.doubleToString(cutoff*cutoff); double maxCutoff = 0.0;
defines["CUTOFF"] = context.doubleToString(cutoff); for (int i = 0; i < 32; i++) {
if ((groups&(1<<i)) != 0) {
double cutoff = groupCutoff[i];
maxCutoff = max(maxCutoff, cutoff);
defines["CUTOFF_"+context.intToString(i)+"_SQUARED"] = context.doubleToString(cutoff*cutoff);
defines["CUTOFF_"+context.intToString(i)] = context.doubleToString(cutoff);
}
}
defines["MAX_CUTOFF"] = context.doubleToString(maxCutoff);
defines["NUM_ATOMS"] = context.intToString(context.getNumAtoms()); defines["NUM_ATOMS"] = context.intToString(context.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks()); defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks());
......
...@@ -220,9 +220,9 @@ __kernel void computeNonbonded( ...@@ -220,9 +220,9 @@ __kernel void computeNonbonded(
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
x = tiles[pos]; x = tiles[pos];
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); 0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
} }
else else
#endif #endif
......
...@@ -973,6 +973,62 @@ void testInteractionGroupLongRangeCorrection() { ...@@ -973,6 +973,62 @@ void testInteractionGroupLongRangeCorrection() {
ASSERT_EQUAL_TOL(expected, energy2-energy1, 1e-4); ASSERT_EQUAL_TOL(expected, energy2-energy1, 1e-4);
} }
void testMultipleCutoffs() {
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
// Add multiple nonbonded forces that have different cutoffs.
CustomNonbondedForce* nonbonded1 = new CustomNonbondedForce("2*r");
nonbonded1->addParticle(vector<double>());
nonbonded1->addParticle(vector<double>());
nonbonded1->setNonbondedMethod(CustomNonbondedForce::CutoffNonPeriodic);
nonbonded1->setCutoffDistance(2.5);
system.addForce(nonbonded1);
CustomNonbondedForce* nonbonded2 = new CustomNonbondedForce("3*r");
nonbonded2->addParticle(vector<double>());
nonbonded2->addParticle(vector<double>());
nonbonded2->setNonbondedMethod(CustomNonbondedForce::CutoffNonPeriodic);
nonbonded2->setCutoffDistance(2.9);
nonbonded2->setForceGroup(1);
system.addForce(nonbonded2);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(0, 0, 0);
for (double r = 2.4; r < 3.2; r += 0.2) {
positions[1][1] = r;
context.setPositions(positions);
double e1 = (r < 2.5 ? 2.0*r : 0.0);
double e2 = (r < 2.9 ? 3.0*r : 0.0);
double f1 = (r < 2.5 ? 2.0 : 0.0);
double f2 = (r < 2.9 ? 3.0 : 0.0);
// Check the first force.
State state = context.getState(State::Forces | State::Energy, false, 1);
ASSERT_EQUAL_VEC(Vec3(0, f1, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f1, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e1, state.getPotentialEnergy(), TOL);
// Check the second force.
state = context.getState(State::Forces | State::Energy, false, 2);
ASSERT_EQUAL_VEC(Vec3(0, f2, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f2, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e2, state.getPotentialEnergy(), TOL);
// Check the sum of both forces.
state = context.getState(State::Forces | State::Energy);
ASSERT_EQUAL_VEC(Vec3(0, f1+f2, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f1-f2, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e1+e2, state.getPotentialEnergy(), TOL);
}
}
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
try { try {
if (argc > 1) if (argc > 1)
...@@ -997,6 +1053,7 @@ int main(int argc, char* argv[]) { ...@@ -997,6 +1053,7 @@ int main(int argc, char* argv[]) {
testInteractionGroups(); testInteractionGroups();
testLargeInteractionGroup(); testLargeInteractionGroup();
testInteractionGroupLongRangeCorrection(); testInteractionGroupLongRangeCorrection();
testMultipleCutoffs();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment