Commit 54c0ca3f authored by peastman's avatar peastman
Browse files

Began creating OpenCL implementation of CustomNonbondedForce interaction groups

parent aba74fd8
...@@ -2072,7 +2072,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo ...@@ -2072,7 +2072,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
// Find how many tiles we will create for this group. // Find how many tiles we will create for this group.
int tileWidth = min(32, (int) atoms2.size()); int tileWidth = min(min(32, (int) atoms1.size()), (int) atoms2.size());
int numBlocks1 = (atoms1.size()+tileWidth-1)/tileWidth; int numBlocks1 = (atoms1.size()+tileWidth-1)/tileWidth;
int numBlocks2 = (atoms2.size()+tileWidth-1)/tileWidth; int numBlocks2 = (atoms2.size()+tileWidth-1)/tileWidth;
......
...@@ -615,7 +615,7 @@ void testLargeInteractionGroup() { ...@@ -615,7 +615,7 @@ void testLargeInteractionGroup() {
positions[2*i+1] = Vec3(positions[2*i][0]+1.0, positions[2*i][1], positions[2*i][2]); positions[2*i+1] = Vec3(positions[2*i][0]+1.0, positions[2*i][1], positions[2*i][2]);
nonbonded->addExclusion(2*i, 2*i+1); nonbonded->addExclusion(2*i, 2*i+1);
} }
nonbonded->setNonbondedMethod(CustomNonbondedForce::NoCutoff); nonbonded->setNonbondedMethod(CustomNonbondedForce::CutoffPeriodic);
system.addForce(nonbonded); system.addForce(nonbonded);
// Compute the forces. // Compute the forces.
......
...@@ -639,7 +639,7 @@ private: ...@@ -639,7 +639,7 @@ private:
class OpenCLCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel { class OpenCLCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
public: public:
OpenCLCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomNonbondedForceKernel(name, platform), OpenCLCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcCustomNonbondedForceKernel(name, platform),
cl(cl), params(NULL), globals(NULL), tabulatedFunctionParams(NULL), forceCopy(NULL), system(system) { cl(cl), params(NULL), globals(NULL), tabulatedFunctionParams(NULL), interactionGroupData(NULL), forceCopy(NULL), system(system), hasInitializedKernel(false) {
} }
~OpenCLCalcCustomNonbondedForceKernel(); ~OpenCLCalcCustomNonbondedForceKernel();
/** /**
...@@ -666,15 +666,20 @@ public: ...@@ -666,15 +666,20 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force); void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
private: private:
void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource);
OpenCLContext& cl; OpenCLContext& cl;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLArray* globals; OpenCLArray* globals;
OpenCLArray* tabulatedFunctionParams; OpenCLArray* tabulatedFunctionParams;
OpenCLArray* interactionGroupData;
cl::Kernel interactionGroupKernel;
std::vector<void*> interactionGroupArgs;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues; std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray*> tabulatedFunctions; std::vector<OpenCLArray*> tabulatedFunctions;
double longRangeCoefficient; double longRangeCoefficient;
bool hasInitializedLongRangeCorrection; bool hasInitializedLongRangeCorrection, hasInitializedKernel;
int numGroupThreadBlocks;
CustomNonbondedForce* forceCopy; CustomNonbondedForce* forceCopy;
const System& system; const System& system;
}; };
......
...@@ -81,6 +81,13 @@ public: ...@@ -81,6 +81,13 @@ public:
*/ */
template <class T> template <class T>
void setParameterValues(const std::vector<std::vector<T> >& values); void setParameterValues(const std::vector<std::vector<T> >& values);
/**
* Get a set of OpenCLNonbondedUtilities::ParameterInfo objects which describe the Buffers
* containing the data.
*/
std::vector<OpenCLNonbondedUtilities::ParameterInfo>& getBuffers() {
return buffers;
}
/** /**
* Get a set of OpenCLNonbondedUtilities::ParameterInfo objects which describe the Buffers * Get a set of OpenCLNonbondedUtilities::ParameterInfo objects which describe the Buffers
* containing the data. * containing the data.
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include "lepton/ParsedExpression.h" #include "lepton/ParsedExpression.h"
#include "SimTKOpenMMRealType.h" #include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h" #include "SimTKOpenMMUtilities.h"
#include <algorithm>
#include <cmath> #include <cmath>
#include <set> #include <set>
...@@ -1875,6 +1876,17 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex ...@@ -1875,6 +1876,17 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
class OpenCLCustomNonbondedForceInfo : public OpenCLForceInfo { class OpenCLCustomNonbondedForceInfo : public OpenCLForceInfo {
public: public:
OpenCLCustomNonbondedForceInfo(int requiredBuffers, const CustomNonbondedForce& force) : OpenCLForceInfo(requiredBuffers), force(force) { OpenCLCustomNonbondedForceInfo(int requiredBuffers, const CustomNonbondedForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
if (force.getNumInteractionGroups() > 0) {
groupsForParticle.resize(force.getNumParticles());
for (int i = 0; i < force.getNumInteractionGroups(); i++) {
set<int> set1, set2;
force.getInteractionGroupParameters(i, set1, set2);
for (set<int>::const_iterator iter = set1.begin(); iter != set1.end(); ++iter)
groupsForParticle[*iter].insert(2*i);
for (set<int>::const_iterator iter = set2.begin(); iter != set2.end(); ++iter)
groupsForParticle[*iter].insert(2*i+1);
}
}
} }
bool areParticlesIdentical(int particle1, int particle2) { bool areParticlesIdentical(int particle1, int particle2) {
vector<double> params1; vector<double> params1;
...@@ -1884,6 +1896,8 @@ public: ...@@ -1884,6 +1896,8 @@ public:
for (int i = 0; i < (int) params1.size(); i++) for (int i = 0; i < (int) params1.size(); i++)
if (params1[i] != params2[i]) if (params1[i] != params2[i])
return false; return false;
if (groupsForParticle.size() > 0 && groupsForParticle[particle1] != groupsForParticle[particle2])
return false;
return true; return true;
} }
int getNumParticleGroups() { int getNumParticleGroups() {
...@@ -1901,6 +1915,7 @@ public: ...@@ -1901,6 +1915,7 @@ public:
} }
private: private:
const CustomNonbondedForce& force; const CustomNonbondedForce& force;
vector<set<int> > groupsForParticle;
}; };
OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() { OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
...@@ -1910,6 +1925,8 @@ OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() { ...@@ -1910,6 +1925,8 @@ OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
delete globals; delete globals;
if (tabulatedFunctionParams != NULL) if (tabulatedFunctionParams != NULL)
delete tabulatedFunctionParams; delete tabulatedFunctionParams;
if (interactionGroupData != NULL)
delete interactionGroupData;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i]; delete tabulatedFunctions[i];
if (forceCopy != NULL) if (forceCopy != NULL)
...@@ -1920,7 +1937,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons ...@@ -1920,7 +1937,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
int forceIndex; int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex) for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
; ;
string prefix = "custom"+cl.intToString(forceIndex)+"_"; string prefix = (force.getNumInteractionGroups() == 0 ? "custom"+cl.intToString(forceIndex)+"_" : "");
// Record parameters and exclusions. // Record parameters and exclusions.
...@@ -2021,6 +2038,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons ...@@ -2021,6 +2038,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
replacements["SWITCH_C5"] = cl.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0)); replacements["SWITCH_C5"] = cl.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0));
} }
string source = cl.replaceStrings(OpenCLKernelSources::customNonbonded, replacements); string source = cl.replaceStrings(OpenCLKernelSources::customNonbonded, replacements);
if (force.getNumInteractionGroups() > 0)
initInteractionGroups(force, source);
else {
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup()); cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
...@@ -2030,6 +2050,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons ...@@ -2030,6 +2050,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
globals->upload(globalParamValues); globals->upload(globalParamValues);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDeviceBuffer())); cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDeviceBuffer()));
} }
}
cl.addForce(new OpenCLCustomNonbondedForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force)); cl.addForce(new OpenCLCustomNonbondedForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
// Record information for the long range correction. // Record information for the long range correction.
...@@ -2044,6 +2065,215 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons ...@@ -2044,6 +2065,215 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
} }
} }
void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbondedForce& force, const string& interactionSource) {
// Process groups to form tiles.
vector<vector<int> > atomLists;
vector<pair<int, int> > tiles;
for (int group = 0; group < force.getNumInteractionGroups(); group++) {
// Get the list of atoms in this group and sort them.
set<int> set1, set2;
force.getInteractionGroupParameters(group, set1, set2);
vector<int> atoms1, atoms2;
atoms1.insert(atoms1.begin(), set1.begin(), set1.end());
atoms2.insert(atoms2.begin(), set2.begin(), set2.end());
sort(atoms1.begin(), atoms1.end());
sort(atoms2.begin(), atoms2.end());
// Find how many tiles we will create for this group.
int tileWidth = min(min(32, (int) atoms1.size()), (int) atoms2.size());
int numBlocks1 = (atoms1.size()+tileWidth-1)/tileWidth;
int numBlocks2 = (atoms2.size()+tileWidth-1)/tileWidth;
// Add the tiles.
for (int i = 0; i < numBlocks1; i++)
for (int j = 0; j < numBlocks2; j++)
tiles.push_back(make_pair(atomLists.size()+i, atomLists.size()+numBlocks1+j));
// Add the atom lists.
for (int i = 0; i < numBlocks1; i++) {
vector<int> atoms;
int first = i*tileWidth;
int last = min((i+1)*tileWidth, (int) atoms1.size());
for (int j = first; j < last; j++)
atoms.push_back(atoms1[j]);
atomLists.push_back(atoms);
}
for (int i = 0; i < numBlocks2; i++) {
vector<int> atoms;
int first = i*tileWidth;
int last = min((i+1)*tileWidth, (int) atoms2.size());
for (int j = first; j < last; j++)
atoms.push_back(atoms2[j]);
atomLists.push_back(atoms);
}
}
// Build a lookup table for quickly identifying excluded interactions.
set<pair<int, int> > exclusions;
for (int i = 0; i < force.getNumExclusions(); i++) {
int p1, p2;
force.getExclusionParticles(i, p1, p2);
exclusions.insert(make_pair(p1, p2));
}
// Build the exclusion flags for each tile. While we're at it, filter out tiles
// where all interactions are excluded, and sort the tiles by size.
vector<vector<int> > exclusionFlags(tiles.size());
vector<pair<int, int> > tileOrder;
for (int tile = 0; tile < tiles.size(); tile++) {
if (atomLists[tiles[tile].first].size() < atomLists[tiles[tile].second].size()) {
// For efficiency, we want the first axis to be the larger one.
int swap = tiles[tile].first;
tiles[tile].first = tiles[tile].second;
tiles[tile].second = swap;
}
vector<int>& atoms1 = atomLists[tiles[tile].first];
vector<int>& atoms2 = atomLists[tiles[tile].second];
vector<int> flags(atoms1.size(), (1<<atoms2.size())-1);
int numExcluded = 0;
for (int i = 0; i < (int) atoms1.size(); i++)
for (int j = 0; j < (int) atoms2.size(); j++) {
int a1 = atoms1[i];
int a2 = atoms2[j];
if (a1 == a2 || exclusions.find(make_pair(a1, a2)) != exclusions.end() || exclusions.find(make_pair(a2, a1)) != exclusions.end()) {
flags[i] &= -1-(1<<j);
numExcluded++;
}
}
if (numExcluded == atoms1.size()*atoms2.size())
continue; // All interactions are excluded.
tileOrder.push_back(make_pair((int) -atoms2.size(), tile));
if (numExcluded > 0)
exclusionFlags[tile] = flags;
}
sort(tileOrder.begin(), tileOrder.end());
// Merge tiles to get as close as possible to 32 along the first axis of each one.
vector<int> tileSetStart;
tileSetStart.push_back(0);
int tileSetSize = 0;
for (int i = 0; i < tileOrder.size(); i++) {
int tile = tileOrder[i].second;
int size = atomLists[tiles[tile].first].size();
if (tileSetSize+size > 32) {
tileSetStart.push_back(i);
tileSetSize = 0;
}
tileSetSize += size;
}
tileSetStart.push_back(tileOrder.size());
// Build the data structures.
int numTileSets = tileSetStart.size()-1;
vector<mm_int4> groupData;
for (int tileSet = 0; tileSet < numTileSets; tileSet++) {
int indexInTileSet = 0;
for (int i = tileSetStart[tileSet]; i < tileSetStart[tileSet+1]; i++) {
int tile = tileOrder[i].second;
vector<int>& atoms1 = atomLists[tiles[tile].first];
vector<int>& atoms2 = atomLists[tiles[tile].second];
int range = indexInTileSet + ((indexInTileSet+atoms1.size())<<16);
int allFlags = (1<<atoms2.size())-1;
for (int j = 0; j < (int) atoms1.size(); j++) {
int a1 = atoms1[j];
int a2 = (j < atoms2.size() ? atoms2[j] : 0);
int flags = (exclusionFlags[tile].size() > 0 ? exclusionFlags[tile][j] : allFlags);
groupData.push_back(mm_int4(a1, a2, range, flags<<indexInTileSet));
}
indexInTileSet += atoms1.size();
}
for (; indexInTileSet < 32; indexInTileSet++)
groupData.push_back(mm_int4(0, 0, 0, 0));
}
interactionGroupData = OpenCLArray::create<mm_int4>(cl, groupData.size(), "interactionGroupData");
interactionGroupData->upload(groupData);
// Create the kernel.
map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = interactionSource;
const string suffixes[] = {"x", "y", "z", "w"};
stringstream localData;
int localDataSize = 0;
vector<OpenCLNonbondedUtilities::ParameterInfo>& buffers = params->getBuffers();
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getNumComponents() == 1)
localData<<buffers[i].getComponentType()<<" params"<<(i+1)<<";\n";
else {
for (int j = 0; j < buffers[i].getNumComponents(); ++j)
localData<<buffers[i].getComponentType()<<" params"<<(i+1)<<"_"<<suffixes[j]<<";\n";
}
localDataSize += buffers[i].getSize();
}
replacements["ATOM_PARAMETER_DATA"] = localData.str();
stringstream args;
for (int i = 0; i < (int) buffers.size(); i++)
args<<", __global const "<<buffers[i].getType()<<"* restrict global_params"<<(i+1);
if (globals != NULL)
args<<", __global const float* restrict globals";
replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream load1;
for (int i = 0; i < (int) buffers.size(); i++)
load1<<buffers[i].getType()<<" params"<<(i+1)<<"1 = global_params"<<(i+1)<<"[atom1];\n";
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
stringstream loadLocal2;
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getNumComponents() == 1)
loadLocal2<<"localData[get_local_id(0)].params"<<(i+1)<<" = global_params"<<(i+1)<<"[atom2];\n";
else {
loadLocal2<<buffers[i].getType()<<" temp_params"<<(i+1)<<" = global_params"<<(i+1)<<"[atom2];\n";
for (int j = 0; j < buffers[i].getNumComponents(); ++j)
loadLocal2<<"localData[get_local_id(0)].params"<<(i+1)<<"_"<<suffixes[j]<<" = temp_params"<<(i+1)<<"."<<suffixes[j]<<";\n";
}
}
replacements["LOAD_LOCAL_PARAMETERS"] = loadLocal2.str();
stringstream load2;
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getNumComponents() == 1)
load2<<buffers[i].getType()<<" params"<<(i+1)<<"2 = localData[localIndex].params"<<(i+1)<<";\n";
else {
load2<<buffers[i].getType()<<" params"<<(i+1)<<"2 = make_"<<buffers[i].getType()<<"(";
for (int j = 0; j < buffers[i].getNumComponents(); ++j) {
if (j > 0)
load2<<", ";
load2<<"localData[localIndex].params"<<(i+1)<<"_"<<suffixes[j];
}
load2<<");\n";
}
}
replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
map<string, string> defines;
if (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff)
defines["USE_CUTOFF"] = "1";
if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic)
defines["USE_PERIODIC"] = "1";
defines["THREAD_BLOCK_SIZE"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize());
double cutoff = force.getCutoffDistance();
defines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
defines["TILE_SIZE"] = "32";
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*numTileSets/numContexts;
int endIndex = (cl.getContextIndex()+1)*numTileSets/numContexts;
defines["FIRST_TILE"] = cl.intToString(startIndex);
defines["LAST_TILE"] = cl.intToString(endIndex);
if ((localDataSize/4)%2 == 0 && !cl.getUseDoublePrecision())
defines["PARAMETER_SIZE_IS_EVEN"] = "1";
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customNonbondedGroups, replacements), defines);
interactionGroupKernel = cl::Kernel(program, "computeInteractionGroups");
numGroupThreadBlocks = cl.getNonbondedUtilities().getNumForceThreadBlocks();
}
double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals != NULL) {
bool changed = false; bool changed = false;
...@@ -2065,6 +2295,24 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool ...@@ -2065,6 +2295,24 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
longRangeCoefficient = CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner()); longRangeCoefficient = CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner());
hasInitializedLongRangeCorrection = true; hasInitializedLongRangeCorrection = true;
} }
if (interactionGroupData != NULL) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
int index = 0;
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getLongForceBuffer().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, interactionGroupData->getDeviceBuffer());
setPeriodicBoxSizeArg(cl, interactionGroupKernel, index++);
setInvPeriodicBoxSizeArg(cl, interactionGroupKernel, index++);
for (int i = 0; i < (int) params->getBuffers().size(); i++)
interactionGroupKernel.setArg<cl::Memory>(index++, params->getBuffers()[i].getMemory());
if (globals != NULL)
interactionGroupKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
}
int forceThreadBlockSize = cl.getNonbondedUtilities().getForceThreadBlockSize();
cl.executeKernel(interactionGroupKernel, numGroupThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble(); mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
return longRangeCoefficient/(boxSize.x*boxSize.y*boxSize.z); return longRangeCoefficient/(boxSize.x*boxSize.y*boxSize.z);
} }
......
#ifdef SUPPORTS_64_BIT_ATOMICS
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#endif
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef struct {
real x, y, z;
real q;
real fx, fy, fz;
ATOM_PARAMETER_DATA
#ifndef PARAMETER_SIZE_IS_EVEN
real padding;
#endif
} AtomData;
__kernel void computeInteractionGroups(
__global long* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq,
__global const int4* restrict groupData, real4 periodicBoxSize, real4 invPeriodicBoxSize
PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
const unsigned int warp = get_global_id(0)/TILE_SIZE; // global warpIndex
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = get_local_id(0) - tgx; // block warpIndex
real energy = 0.0f;
__local AtomData localData[THREAD_BLOCK_SIZE];
const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps;
const unsigned int endTile = FIRST_TILE+(warp+1)*(LAST_TILE-FIRST_TILE)/totalWarps;
for (int tile = startTile; tile < endTile; tile++) {
const int4 atomData = groupData[TILE_SIZE*tile+tgx];
const int atom1 = atomData.x;
const int atom2 = atomData.y;
const int rangeStart = atomData.z&0xFFFF;
const int rangeEnd = (atomData.z>>16)&0xFFFF;
const int exclusions = atomData.w;
real4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS
real4 force = (real4) (0);
real4 posq2 = posq[atom2];
localData[get_local_id(0)].x = posq2.x;
localData[get_local_id(0)].y = posq2.y;
localData[get_local_id(0)].z = posq2.z;
localData[get_local_id(0)].q = posq2.w;
LOAD_LOCAL_PARAMETERS
localData[get_local_id(0)].fx = 0.0f;
localData[get_local_id(0)].fy = 0.0f;
localData[get_local_id(0)].fz = 0.0f;
int tj = tgx;
SYNC_WARPS;
for (int j = rangeStart; j < rangeEnd; j++) {
bool isExcluded = (((exclusions>>tj)&1) == 0);
int localIndex = tbx+tj;
posq2 = (real4) (localData[localIndex].x, localData[localIndex].y, localData[localIndex].z, localData[localIndex].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (!isExcluded && r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
LOAD_ATOM2_PARAMETERS
real dEdR = 0.0f;
real tempEnergy = 0.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
delta *= dEdR;
force.xyz -= delta.xyz;
localData[localIndex].fx += delta.x;
localData[localIndex].fy += delta.y;
localData[localIndex].fz += delta.z;
#ifdef USE_CUTOFF
}
#endif
tj = (tj == rangeEnd-1 ? rangeStart : tj+1);
SYNC_WARPS;
}
if (exclusions != 0) {
atom_add(&forceBuffers[atom1], (long) (force.x*0x100000000));
atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS], (long) (force.y*0x100000000));
atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000));
atom_add(&forceBuffers[atom2], (long) (localData[get_local_id(0)].fx*0x100000000));
atom_add(&forceBuffers[atom2+PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fy*0x100000000));
atom_add(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (long) (localData[get_local_id(0)].fz*0x100000000));
}
}
energyBuffer[get_global_id(0)] += energy;
}
\ No newline at end of file
...@@ -538,6 +538,109 @@ void testLongRangeCorrection() { ...@@ -538,6 +538,109 @@ void testLongRangeCorrection() {
ASSERT_EQUAL_TOL(standardEnergy1-standardEnergy2, customEnergy1-customEnergy2, 1e-4); ASSERT_EQUAL_TOL(standardEnergy1-standardEnergy2, customEnergy1-customEnergy2, 1e-4);
} }
void testInteractionGroups() {
const int numParticles = 6;
System system;
VerletIntegrator integrator(0.01);
CustomNonbondedForce* nonbonded = new CustomNonbondedForce("v1+v2");
nonbonded->addPerParticleParameter("v");
vector<double> params(1, 0.001);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
nonbonded->addParticle(params);
params[0] *= 10;
}
set<int> set1, set2, set3, set4;
set1.insert(2);
set2.insert(0);
set2.insert(1);
set2.insert(2);
set2.insert(3);
set2.insert(4);
set2.insert(5);
nonbonded->addInteractionGroup(set1, set2); // Particle 2 interacts with every other particle.
set3.insert(0);
set3.insert(1);
set4.insert(4);
set4.insert(5);
nonbonded->addInteractionGroup(set3, set4); // Particles 0 and 1 interact with 4 and 5.
nonbonded->addExclusion(1, 2); // Add an exclusion to make sure it gets skipped.
system.addForce(nonbonded);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
context.setPositions(positions);
State state = context.getState(State::Energy);
double expectedEnergy = 331.423; // Each digit is the number of interactions a particle particle is involved in.
ASSERT_EQUAL_TOL(expectedEnergy, state.getPotentialEnergy(), TOL);
}
void testLargeInteractionGroup() {
const int numMolecules = 300;
const int numParticles = numMolecules*2;
const double boxSize = 20.0;
// Create a large system.
System system;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
CustomNonbondedForce* nonbonded = new CustomNonbondedForce("4*eps*((sigma/r)^12-(sigma/r)^6)+138.935456*q/r; q=q1*q2; sigma=0.5*(sigma1+sigma2); eps=sqrt(eps1*eps2)");
nonbonded->addPerParticleParameter("q");
nonbonded->addPerParticleParameter("sigma");
nonbonded->addPerParticleParameter("eps");
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<double> params(3);
for (int i = 0; i < numMolecules; i++) {
if (i < numMolecules/2) {
params[0] = 1.0;
params[1] = 0.2;
params[2] = 0.1;
nonbonded->addParticle(params);
params[0] = -1.0;
params[1] = 0.1;
nonbonded->addParticle(params);
}
else {
params[0] = 1.0;
params[1] = 0.2;
params[2] = 0.2;
nonbonded->addParticle(params);
params[0] = -1.0;
params[1] = 0.1;
nonbonded->addParticle(params);
}
positions[2*i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
positions[2*i+1] = Vec3(positions[2*i][0]+1.0, positions[2*i][1], positions[2*i][2]);
nonbonded->addExclusion(2*i, 2*i+1);
}
nonbonded->setNonbondedMethod(CustomNonbondedForce::CutoffPeriodic);
system.addForce(nonbonded);
// Compute the forces.
VerletIntegrator integrator(0.01);
Context context(system, integrator, platform);
context.setPositions(positions);
State state1 = context.getState(State::Forces);
// Modify the force so only one particle interacts with everything else.
set<int> set1, set2;
set1.insert(151);
for (int i = 0; i < numParticles; i++)
set2.insert(i);
nonbonded->addInteractionGroup(set1, set2);
context.reinitialize();
context.setPositions(positions);
State state2 = context.getState(State::Forces);
// The force on that one particle should be the same.
ASSERT_EQUAL_VEC(state1.getForces()[151], state2.getForces()[151], 1e-4);
}
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
try { try {
if (argc > 1) if (argc > 1)
...@@ -553,6 +656,8 @@ int main(int argc, char* argv[]) { ...@@ -553,6 +656,8 @@ int main(int argc, char* argv[]) {
testParallelComputation(); testParallelComputation();
testSwitchingFunction(); testSwitchingFunction();
testLongRangeCorrection(); testLongRangeCorrection();
testInteractionGroups();
testLargeInteractionGroup();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment