Commit 09611266 authored by peastman's avatar peastman
Browse files

Created CUDA implementation of restricting CustomNonbondedForce to selected interaction groups

parent 9840ca70
...@@ -638,7 +638,7 @@ private: ...@@ -638,7 +638,7 @@ private:
class CudaCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel { class CudaCalcCustomNonbondedForceKernel : public CalcCustomNonbondedForceKernel {
public: public:
CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomNonbondedForceKernel(name, platform), CudaCalcCustomNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcCustomNonbondedForceKernel(name, platform),
cu(cu), params(NULL), globals(NULL), tabulatedFunctionParams(NULL), forceCopy(NULL), system(system) { cu(cu), params(NULL), globals(NULL), tabulatedFunctionParams(NULL), interactionGroupData(NULL), forceCopy(NULL), system(system), hasInitializedKernel(false) {
} }
~CudaCalcCustomNonbondedForceKernel(); ~CudaCalcCustomNonbondedForceKernel();
/** /**
...@@ -665,15 +665,20 @@ public: ...@@ -665,15 +665,20 @@ public:
*/ */
void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force); void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
private: private:
void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource);
CudaContext& cu; CudaContext& cu;
CudaParameterSet* params; CudaParameterSet* params;
CudaArray* globals; CudaArray* globals;
CudaArray* tabulatedFunctionParams; CudaArray* tabulatedFunctionParams;
CudaArray* interactionGroupData;
CUfunction interactionGroupKernel;
std::vector<void*> interactionGroupArgs;
std::vector<std::string> globalParamNames; std::vector<std::string> globalParamNames;
std::vector<float> globalParamValues; std::vector<float> globalParamValues;
std::vector<CudaArray*> tabulatedFunctions; std::vector<CudaArray*> tabulatedFunctions;
double longRangeCoefficient; double longRangeCoefficient;
bool hasInitializedLongRangeCorrection; bool hasInitializedLongRangeCorrection, hasInitializedKernel;
int numGroupThreadBlocks;
CustomNonbondedForce* forceCopy; CustomNonbondedForce* forceCopy;
const System& system; const System& system;
}; };
......
...@@ -1862,6 +1862,17 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -1862,6 +1862,17 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
class CudaCustomNonbondedForceInfo : public CudaForceInfo { class CudaCustomNonbondedForceInfo : public CudaForceInfo {
public: public:
CudaCustomNonbondedForceInfo(const CustomNonbondedForce& force) : force(force) { CudaCustomNonbondedForceInfo(const CustomNonbondedForce& force) : force(force) {
if (force.getNumInteractionGroups() > 0) {
groupsForParticle.resize(force.getNumParticles());
for (int i = 0; i < force.getNumInteractionGroups(); i++) {
set<int> set1, set2;
force.getInteractionGroupParameters(i, set1, set2);
for (set<int>::const_iterator iter = set1.begin(); iter != set1.end(); ++iter)
groupsForParticle[*iter].insert(2*i);
for (set<int>::const_iterator iter = set2.begin(); iter != set2.end(); ++iter)
groupsForParticle[*iter].insert(2*i+1);
}
}
} }
bool areParticlesIdentical(int particle1, int particle2) { bool areParticlesIdentical(int particle1, int particle2) {
vector<double> params1; vector<double> params1;
...@@ -1871,6 +1882,8 @@ public: ...@@ -1871,6 +1882,8 @@ public:
for (int i = 0; i < (int) params1.size(); i++) for (int i = 0; i < (int) params1.size(); i++)
if (params1[i] != params2[i]) if (params1[i] != params2[i])
return false; return false;
if (groupsForParticle.size() > 0 && groupsForParticle[particle1] != groupsForParticle[particle2])
return false;
return true; return true;
} }
int getNumParticleGroups() { int getNumParticleGroups() {
...@@ -1888,6 +1901,7 @@ public: ...@@ -1888,6 +1901,7 @@ public:
} }
private: private:
const CustomNonbondedForce& force; const CustomNonbondedForce& force;
vector<set<int> > groupsForParticle;
}; };
CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() { CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
...@@ -1898,6 +1912,8 @@ CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() { ...@@ -1898,6 +1912,8 @@ CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
delete globals; delete globals;
if (tabulatedFunctionParams != NULL) if (tabulatedFunctionParams != NULL)
delete tabulatedFunctionParams; delete tabulatedFunctionParams;
if (interactionGroupData != NULL)
delete interactionGroupData;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i]; delete tabulatedFunctions[i];
if (forceCopy != NULL) if (forceCopy != NULL)
...@@ -1909,7 +1925,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const ...@@ -1909,7 +1925,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
int forceIndex; int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex) for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
; ;
string prefix = "custom"+cu.intToString(forceIndex)+"_"; string prefix = (force.getNumInteractionGroups() == 0 ? "custom"+cu.intToString(forceIndex)+"_" : "");
// Record parameters and exclusions. // Record parameters and exclusions.
...@@ -2010,6 +2026,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const ...@@ -2010,6 +2026,9 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
replacements["SWITCH_C5"] = cu.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0)); replacements["SWITCH_C5"] = cu.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0));
} }
string source = cu.replaceStrings(CudaKernelSources::customNonbonded, replacements); string source = cu.replaceStrings(CudaKernelSources::customNonbonded, replacements);
if (force.getNumInteractionGroups() > 0)
initInteractionGroups(force, source);
else {
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup()); cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; CudaNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
...@@ -2019,6 +2038,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const ...@@ -2019,6 +2038,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
globals->upload(globalParamValues); globals->upload(globalParamValues);
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer())); cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer()));
} }
}
cu.addForce(new CudaCustomNonbondedForceInfo(force)); cu.addForce(new CudaCustomNonbondedForceInfo(force));
// Record information for the long range correction. // Record information for the long range correction.
...@@ -2033,6 +2053,215 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const ...@@ -2033,6 +2053,215 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
} }
} }
void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbondedForce& force, const string& interactionSource) {
// Process groups to form tiles.
vector<vector<int> > atomLists;
vector<pair<int, int> > tiles;
for (int group = 0; group < force.getNumInteractionGroups(); group++) {
// Get the list of atoms in this group and sort them.
set<int> set1, set2;
force.getInteractionGroupParameters(group, set1, set2);
vector<int> atoms1, atoms2;
atoms1.insert(atoms1.begin(), set1.begin(), set1.end());
atoms2.insert(atoms2.begin(), set2.begin(), set2.end());
sort(atoms1.begin(), atoms1.end());
sort(atoms2.begin(), atoms2.end());
// Find how many tiles we will create for this group.
int tileWidth = min(32, (int) atoms2.size());
int numBlocks1 = (atoms1.size()+tileWidth-1)/tileWidth;
int numBlocks2 = (atoms2.size()+tileWidth-1)/tileWidth;
// Add the tiles.
for (int i = 0; i < numBlocks1; i++)
for (int j = 0; j < numBlocks2; j++)
tiles.push_back(make_pair(atomLists.size()+i, atomLists.size()+numBlocks1+j));
// Add the atom lists.
for (int i = 0; i < numBlocks1; i++) {
vector<int> atoms;
int first = i*tileWidth;
int last = min((i+1)*tileWidth, (int) atoms1.size());
for (int j = first; j < last; j++)
atoms.push_back(atoms1[j]);
atomLists.push_back(atoms);
}
for (int i = 0; i < numBlocks2; i++) {
vector<int> atoms;
int first = i*tileWidth;
int last = min((i+1)*tileWidth, (int) atoms2.size());
for (int j = first; j < last; j++)
atoms.push_back(atoms2[j]);
atomLists.push_back(atoms);
}
}
// Build a lookup table for quickly identifying excluded interactions.
set<pair<int, int> > exclusions;
for (int i = 0; i < force.getNumExclusions(); i++) {
int p1, p2;
force.getExclusionParticles(i, p1, p2);
exclusions.insert(make_pair(p1, p2));
}
// Build the exclusion flags for each tile. While we're at it, filter out tiles
// where all interactions are excluded, and sort the tiles by size.
vector<vector<int> > exclusionFlags(tiles.size());
vector<pair<int, int> > tileOrder;
for (int tile = 0; tile < tiles.size(); tile++) {
if (atomLists[tiles[tile].first].size() < atomLists[tiles[tile].second].size()) {
// For efficiency, we want the first axis to be the larger one.
int swap = tiles[tile].first;
tiles[tile].first = tiles[tile].second;
tiles[tile].second = swap;
}
vector<int>& atoms1 = atomLists[tiles[tile].first];
vector<int>& atoms2 = atomLists[tiles[tile].second];
vector<int> flags(atoms1.size(), (1<<atoms2.size())-1);
int numExcluded = 0;
for (int i = 0; i < (int) atoms1.size(); i++)
for (int j = 0; j < (int) atoms2.size(); j++) {
int a1 = atoms1[i];
int a2 = atoms2[j];
if (a1 == a2 || exclusions.find(make_pair(a1, a2)) != exclusions.end() || exclusions.find(make_pair(a2, a1)) != exclusions.end()) {
flags[i] &= -1-(1<<j);
numExcluded++;
}
}
if (numExcluded == atoms1.size()*atoms2.size())
continue; // All interactions are excluded.
tileOrder.push_back(make_pair((int) -atoms2.size(), tile));
if (numExcluded > 0)
exclusionFlags[tile] = flags;
}
sort(tileOrder.begin(), tileOrder.end());
// Merge tiles to get as close as possible to 32 along the first axis of each one.
vector<int> tileSetStart;
tileSetStart.push_back(0);
int tileSetSize = 0;
for (int i = 0; i < tileOrder.size(); i++) {
int tile = tileOrder[i].second;
int size = atomLists[tiles[tile].first].size();
if (tileSetSize+size > 32) {
tileSetStart.push_back(i);
tileSetSize = 0;
}
tileSetSize += size;
}
tileSetStart.push_back(tileOrder.size());
// Build the data structures.
int numTileSets = tileSetStart.size()-1;
vector<int4> groupData;
for (int tileSet = 0; tileSet < numTileSets; tileSet++) {
int indexInTileSet = 0;
for (int i = tileSetStart[tileSet]; i < tileSetStart[tileSet+1]; i++) {
int tile = tileOrder[i].second;
vector<int>& atoms1 = atomLists[tiles[tile].first];
vector<int>& atoms2 = atomLists[tiles[tile].second];
int range = indexInTileSet + ((indexInTileSet+atoms1.size())<<16);
int allFlags = (1<<atoms2.size())-1;
for (int j = 0; j < (int) atoms1.size(); j++) {
int a1 = atoms1[j];
int a2 = (j < atoms2.size() ? atoms2[j] : 0);
int flags = (exclusionFlags[tile].size() > 0 ? exclusionFlags[tile][j] : allFlags);
groupData.push_back(make_int4(a1, a2, range, flags<<indexInTileSet));
}
indexInTileSet += atoms1.size();
}
for (; indexInTileSet < 32; indexInTileSet++)
groupData.push_back(make_int4(0, 0, 0, 0));
}
interactionGroupData = CudaArray::create<int4>(cu, groupData.size(), "interactionGroupData");
interactionGroupData->upload(groupData);
// Create the kernel.
map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = interactionSource;
const string suffixes[] = {"x", "y", "z", "w"};
stringstream localData;
int localDataSize = 0;
vector<CudaNonbondedUtilities::ParameterInfo>& buffers = params->getBuffers();
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getNumComponents() == 1)
localData<<buffers[i].getComponentType()<<" params"<<(i+1)<<";\n";
else {
for (int j = 0; j < buffers[i].getNumComponents(); ++j)
localData<<buffers[i].getComponentType()<<" params"<<(i+1)<<"_"<<suffixes[j]<<";\n";
}
localDataSize += buffers[i].getSize();
}
replacements["ATOM_PARAMETER_DATA"] = localData.str();
stringstream args;
for (int i = 0; i < (int) buffers.size(); i++)
args<<", const "<<buffers[i].getType()<<"* __restrict__ global_params"<<(i+1);
if (globals != NULL)
args<<", const float* __restrict__ globals";
replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream load1;
for (int i = 0; i < (int) buffers.size(); i++)
load1<<buffers[i].getType()<<" params"<<(i+1)<<"1 = global_params"<<(i+1)<<"[atom1];\n";
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
stringstream loadLocal2;
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getNumComponents() == 1)
loadLocal2<<"localData[threadIdx.x].params"<<(i+1)<<" = global_params"<<(i+1)<<"[atom2];\n";
else {
loadLocal2<<buffers[i].getType()<<" temp_params"<<(i+1)<<" = global_params"<<(i+1)<<"[atom2];\n";
for (int j = 0; j < buffers[i].getNumComponents(); ++j)
loadLocal2<<"localData[threadIdx.x].params"<<(i+1)<<"_"<<suffixes[j]<<" = temp_params"<<(i+1)<<"."<<suffixes[j]<<";\n";
}
}
replacements["LOAD_LOCAL_PARAMETERS"] = loadLocal2.str();
stringstream load2;
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getNumComponents() == 1)
load2<<buffers[i].getType()<<" params"<<(i+1)<<"2 = localData[localIndex].params"<<(i+1)<<";\n";
else {
load2<<buffers[i].getType()<<" params"<<(i+1)<<"2 = make_"<<buffers[i].getType()<<"(";
for (int j = 0; j < buffers[i].getNumComponents(); ++j) {
if (j > 0)
load2<<", ";
load2<<"localData[localIndex].params"<<(i+1)<<"_"<<suffixes[j];
}
load2<<");\n";
}
}
replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
map<string, string> defines;
if (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff)
defines["USE_CUTOFF"] = "1";
if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic)
defines["USE_PERIODIC"] = "1";
defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize());
double cutoff = force.getCutoffDistance();
defines["CUTOFF_SQUARED"] = cu.doubleToString(cutoff*cutoff);
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["TILE_SIZE"] = "32";
int numContexts = cu.getPlatformData().contexts.size();
int startIndex = cu.getContextIndex()*numTileSets/numContexts;
int endIndex = (cu.getContextIndex()+1)*numTileSets/numContexts;
defines["FIRST_TILE"] = cu.intToString(startIndex);
defines["LAST_TILE"] = cu.intToString(endIndex);
if ((localDataSize/4)%2 == 0 && !cu.getUseDoublePrecision())
defines["PARAMETER_SIZE_IS_EVEN"] = "1";
CUmodule program = cu.createModule(CudaKernelSources::vectorOps+cu.replaceStrings(CudaKernelSources::customNonbondedGroups, replacements), defines);
interactionGroupKernel = cu.getKernel(program, "computeInteractionGroups");
numGroupThreadBlocks = cu.getNonbondedUtilities().getNumForceThreadBlocks();
}
double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) { if (globals != NULL) {
bool changed = false; bool changed = false;
...@@ -2054,6 +2283,21 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in ...@@ -2054,6 +2283,21 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
longRangeCoefficient = CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner()); longRangeCoefficient = CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner());
hasInitializedLongRangeCorrection = true; hasInitializedLongRangeCorrection = true;
} }
if (interactionGroupData != NULL) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
interactionGroupArgs.push_back(&cu.getForce().getDevicePointer());
interactionGroupArgs.push_back(&cu.getEnergyBuffer().getDevicePointer());
interactionGroupArgs.push_back(&cu.getPosq().getDevicePointer());
interactionGroupArgs.push_back(&interactionGroupData->getDevicePointer());
for (int i = 0; i < (int) params->getBuffers().size(); i++)
interactionGroupArgs.push_back(&params->getBuffers()[i].getMemory());
if (globals != NULL)
interactionGroupArgs.push_back(&globals->getDevicePointer());
}
int forceThreadBlockSize = cu.getNonbondedUtilities().getForceThreadBlockSize();
cu.executeKernel(interactionGroupKernel, &interactionGroupArgs[0], numGroupThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
double4 boxSize = cu.getPeriodicBoxSize(); double4 boxSize = cu.getPeriodicBoxSize();
return longRangeCoefficient/(boxSize.x*boxSize.y*boxSize.z); return longRangeCoefficient/(boxSize.x*boxSize.y*boxSize.z);
} }
......
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef struct {
real x, y, z;
real q;
real fx, fy, fz;
ATOM_PARAMETER_DATA
#ifndef PARAMETER_SIZE_IS_EVEN
real padding;
#endif
} AtomData;
extern "C" __global__ void computeInteractionGroups(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const int4* __restrict__ groupData
PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE; // global warpIndex
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = threadIdx.x - tgx; // block warpIndex
real energy = 0.0f;
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps;
const unsigned int endTile = FIRST_TILE+(warp+1)*(LAST_TILE-FIRST_TILE)/totalWarps;
for (int tile = startTile; tile < endTile; tile++) {
const int4 atomData = groupData[TILE_SIZE*tile+tgx];
const int atom1 = atomData.x;
const int atom2 = atomData.y;
const int rangeStart = atomData.z&0xFFFF;
const int rangeEnd = (atomData.z>>16)&0xFFFF;
const int exclusions = atomData.w;
real4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS
real3 force = make_real3(0);
real4 posq2 = posq[atom2];
localData[threadIdx.x].x = posq2.x;
localData[threadIdx.x].y = posq2.y;
localData[threadIdx.x].z = posq2.z;
localData[threadIdx.x].q = posq2.w;
LOAD_LOCAL_PARAMETERS
localData[threadIdx.x].fx = 0.0f;
localData[threadIdx.x].fy = 0.0f;
localData[threadIdx.x].fz = 0.0f;
int tj = tgx;
for (int j = rangeStart; j < rangeEnd; j++) {
bool isExcluded = (((exclusions>>tj)&1) == 0);
int localIndex = tbx+tj;
posq2 = make_real4(localData[localIndex].x, localData[localIndex].y, localData[localIndex].z, localData[localIndex].q);
real3 delta = make_real3(posq2.x-posq1.x, posq2.y-posq1.y, posq2.z-posq1.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2);
real r = RECIP(invR);
LOAD_ATOM2_PARAMETERS
real dEdR = 0.0f;
real tempEnergy = 0.0f;
COMPUTE_INTERACTION
energy += tempEnergy;
delta *= dEdR;
force.x -= delta.x;
force.y -= delta.y;
force.z -= delta.z;
localData[localIndex].fx += delta.x;
localData[localIndex].fy += delta.y;
localData[localIndex].fz += delta.z;
tj = (tj == rangeEnd-1 ? rangeStart : tj+1);
}
if (exclusions != 0) {
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (force.x*0x100000000)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.y*0x100000000)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force.z*0x100000000)));
atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fx*0x100000000)));
atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fy*0x100000000)));
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000)));
}
}
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
\ No newline at end of file
...@@ -538,6 +538,109 @@ void testLongRangeCorrection() { ...@@ -538,6 +538,109 @@ void testLongRangeCorrection() {
ASSERT_EQUAL_TOL(standardEnergy1-standardEnergy2, customEnergy1-customEnergy2, 1e-4); ASSERT_EQUAL_TOL(standardEnergy1-standardEnergy2, customEnergy1-customEnergy2, 1e-4);
} }
void testInteractionGroups() {
const int numParticles = 6;
System system;
VerletIntegrator integrator(0.01);
CustomNonbondedForce* nonbonded = new CustomNonbondedForce("v1+v2");
nonbonded->addPerParticleParameter("v");
vector<double> params(1, 0.001);
for (int i = 0; i < numParticles; i++) {
system.addParticle(1.0);
nonbonded->addParticle(params);
params[0] *= 10;
}
set<int> set1, set2, set3, set4;
set1.insert(2);
set2.insert(0);
set2.insert(1);
set2.insert(2);
set2.insert(3);
set2.insert(4);
set2.insert(5);
nonbonded->addInteractionGroup(set1, set2); // Particle 2 interacts with every other particle.
set3.insert(0);
set3.insert(1);
set4.insert(4);
set4.insert(5);
nonbonded->addInteractionGroup(set3, set4); // Particles 0 and 1 interact with 4 and 5.
nonbonded->addExclusion(1, 2); // Add an exclusion to make sure it gets skipped.
system.addForce(nonbonded);
Context context(system, integrator, platform);
vector<Vec3> positions(numParticles);
context.setPositions(positions);
State state = context.getState(State::Energy);
double expectedEnergy = 331.423; // Each digit is the number of interactions a particle particle is involved in.
ASSERT_EQUAL_TOL(expectedEnergy, state.getPotentialEnergy(), TOL);
}
void testLargeInteractionGroup() {
const int numMolecules = 300;
const int numParticles = numMolecules*2;
const double boxSize = 20.0;
// Create a large system.
System system;
for (int i = 0; i < numParticles; i++)
system.addParticle(1.0);
CustomNonbondedForce* nonbonded = new CustomNonbondedForce("4*eps*((sigma/r)^12-(sigma/r)^6)+138.935456*q/r; q=q1*q2; sigma=0.5*(sigma1+sigma2); eps=sqrt(eps1*eps2)");
nonbonded->addPerParticleParameter("q");
nonbonded->addPerParticleParameter("sigma");
nonbonded->addPerParticleParameter("eps");
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
vector<double> params(3);
for (int i = 0; i < numMolecules; i++) {
if (i < numMolecules/2) {
params[0] = 1.0;
params[1] = 0.2;
params[2] = 0.1;
nonbonded->addParticle(params);
params[0] = -1.0;
params[1] = 0.1;
nonbonded->addParticle(params);
}
else {
params[0] = 1.0;
params[1] = 0.2;
params[2] = 0.2;
nonbonded->addParticle(params);
params[0] = -1.0;
params[1] = 0.1;
nonbonded->addParticle(params);
}
positions[2*i] = Vec3(boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt), boxSize*genrand_real2(sfmt));
positions[2*i+1] = Vec3(positions[2*i][0]+1.0, positions[2*i][1], positions[2*i][2]);
nonbonded->addExclusion(2*i, 2*i+1);
}
nonbonded->setNonbondedMethod(CustomNonbondedForce::NoCutoff);
system.addForce(nonbonded);
// Compute the forces.
VerletIntegrator integrator(0.01);
Context context(system, integrator, platform);
context.setPositions(positions);
State state1 = context.getState(State::Forces);
// Modify the force so only one particle interacts with everything else.
set<int> set1, set2;
set1.insert(151);
for (int i = 0; i < numParticles; i++)
set2.insert(i);
nonbonded->addInteractionGroup(set1, set2);
context.reinitialize();
context.setPositions(positions);
State state2 = context.getState(State::Forces);
// The force on that one particle should be the same.
ASSERT_EQUAL_VEC(state1.getForces()[151], state2.getForces()[151], 1e-4);
}
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
try { try {
if (argc > 1) if (argc > 1)
...@@ -553,6 +656,8 @@ int main(int argc, char* argv[]) { ...@@ -553,6 +656,8 @@ int main(int argc, char* argv[]) {
testParallelComputation(); testParallelComputation();
testSwitchingFunction(); testSwitchingFunction();
testLongRangeCorrection(); testLongRangeCorrection();
testInteractionGroups();
testLargeInteractionGroup();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment