"docs/UsersGuide/References.Data/vscode:/vscode.git/clone" did not exist on "7b0faf6aea4a90ab5879100c45be1635bdd4f83f"
Commit 2a465d40 authored by Peter Eastman's avatar Peter Eastman
Browse files

Finished OpenCL implementation of GBSA (not yet fully debugged)

parent 71d59925
...@@ -585,8 +585,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -585,8 +585,8 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// data.nonbondedMethod = method; // data.nonbondedMethod = method;
// gpuSetCoulombParameters(gpu, 138.935485f, particle, c6, c12, q, symbol, exclusionList, method); // gpuSetCoulombParameters(gpu, 138.935485f, particle, c6, c12, q, symbol, exclusionList, method);
string source = cl.loadSourceFromFile("coulombLennardJones.cl", defines); string source = cl.loadSourceFromFile("coulombLennardJones.cl", defines);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getCutoffDistance(), exclusionList, source); cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float2", 8, sigmaEpsilon->getDeviceBuffer())); cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float2", sizeof(cl_float2), sigmaEpsilon->getDeviceBuffer()));
cutoffSquared = force.getCutoffDistance()*force.getCutoffDistance(); cutoffSquared = force.getCutoffDistance()*force.getCutoffDistance();
// Compute the Ewald self energy. // Compute the Ewald self energy.
...@@ -781,11 +781,12 @@ OpenCLCalcGBSAOBCForceKernel::~OpenCLCalcGBSAOBCForceKernel() { ...@@ -781,11 +781,12 @@ OpenCLCalcGBSAOBCForceKernel::~OpenCLCalcGBSAOBCForceKernel() {
} }
void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) { void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
params = new OpenCLArray<mm_float2>(cl, cl.getPaddedNumAtoms(), "gbsaObcParams"); params = new OpenCLArray<mm_float2>(cl, cl.getPaddedNumAtoms(), "gbsaObcParams");
bornRadii = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms(), "bornRadii"); bornRadii = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms(), "bornRadii");
bornForce = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms(), "bornForce");
obcChain = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms(), "obcChain"); obcChain = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms(), "obcChain");
bornSum = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornSum");
bornForce = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms()*nb.getNumForceBuffers(), "bornForce");
OpenCLArray<mm_float4>& posq = cl.getPosq(); OpenCLArray<mm_float4>& posq = cl.getPosq();
int numParticles = force.getNumParticles(); int numParticles = force.getNumParticles();
vector<mm_float2> paramsVector(numParticles); vector<mm_float2> paramsVector(numParticles);
...@@ -800,14 +801,20 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB ...@@ -800,14 +801,20 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
posq.upload(); posq.upload();
params->upload(paramsVector); params->upload(paramsVector);
prefactor = 2.0*-166.02691*0.4184*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric())); prefactor = 2.0*-166.02691*0.4184*((1.0/force.getSoluteDielectric())-(1.0/force.getSolventDielectric()));
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
string source = cl.loadSourceFromFile("gbsaObc2.cl");
nb.addInteraction(useCutoff, usePeriodic, false, force.getCutoffDistance(), vector<vector<int> >(), source);
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("obcParams", "float2", sizeof(cl_float2), params->getDeviceBuffer()));;
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("bornForce", "float", sizeof(cl_float), bornForce->getDeviceBuffer()));;
} }
void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) { void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities(); OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
if (bornSum == NULL) { if (!hasCreatedKernels) {
// These objects cannot be created in initialize(), because the OpenCLNonbondedUtilities has not been initialized yet then. // These Kernels cannot be created in initialize(), because the OpenCLNonbondedUtilities has not been initialized yet then.
bornSum = new OpenCLArray<cl_float>(cl, cl.getPaddedNumAtoms()*cl.getNumForceBuffers(), "bornSum"); hasCreatedKernels = true;
map<string, string> defines; map<string, string> defines;
if (nb.getForceBufferPerAtomBlock()) if (nb.getForceBufferPerAtomBlock())
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1"; defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
...@@ -817,13 +824,6 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) { ...@@ -817,13 +824,6 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
cl::Program program = cl.createProgram(cl.loadSourceFromFile("gbsaObc.cl"), defines); cl::Program program = cl.createProgram(cl.loadSourceFromFile("gbsaObc.cl"), defines);
computeBornSumKernel = cl::Kernel(program, "computeBornSum"); computeBornSumKernel = cl::Kernel(program, "computeBornSum");
reduceBornSumKernel = cl::Kernel(program, "reduceBornSum");
}
cl.clearBuffer(*bornSum);
cl.clearBuffer(*bornForce);
// Compute the Born sum.
computeBornSumKernel.setArg<cl_int>(0, cl.getNumAtoms()); computeBornSumKernel.setArg<cl_int>(0, cl.getNumAtoms());
computeBornSumKernel.setArg<cl_int>(1, cl.getPaddedNumAtoms()); computeBornSumKernel.setArg<cl_int>(1, cl.getPaddedNumAtoms());
computeBornSumKernel.setArg<cl::Buffer>(2, bornSum->getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(2, bornSum->getDeviceBuffer());
...@@ -844,10 +844,7 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) { ...@@ -844,10 +844,7 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
computeBornSumKernel.setArg<cl::Buffer>(8, nb.getTiles().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(8, nb.getTiles().getDeviceBuffer());
computeBornSumKernel.setArg<cl_uint>(9, nb.getTiles().getSize()); computeBornSumKernel.setArg<cl_uint>(9, nb.getTiles().getSize());
} }
cl.executeKernel(computeBornSumKernel, nb.getTiles().getSize()*OpenCLContext::TileSize); reduceBornSumKernel = cl::Kernel(program, "reduceBornSum");
// Reduce the Born sum.
reduceBornSumKernel.setArg<cl_int>(0, cl.getNumAtoms()); reduceBornSumKernel.setArg<cl_int>(0, cl.getNumAtoms());
reduceBornSumKernel.setArg<cl_int>(1, cl.getPaddedNumAtoms()); reduceBornSumKernel.setArg<cl_int>(1, cl.getPaddedNumAtoms());
reduceBornSumKernel.setArg<cl_int>(2, cl.getNumForceBuffers()); reduceBornSumKernel.setArg<cl_int>(2, cl.getNumForceBuffers());
...@@ -858,7 +855,47 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) { ...@@ -858,7 +855,47 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
reduceBornSumKernel.setArg<cl::Buffer>(7, params->getDeviceBuffer()); reduceBornSumKernel.setArg<cl::Buffer>(7, params->getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(8, bornRadii->getDeviceBuffer()); reduceBornSumKernel.setArg<cl::Buffer>(8, bornRadii->getDeviceBuffer());
reduceBornSumKernel.setArg<cl::Buffer>(9, obcChain->getDeviceBuffer()); reduceBornSumKernel.setArg<cl::Buffer>(9, obcChain->getDeviceBuffer());
force1Kernel = cl::Kernel(program, "computeGBSAForce1");
force1Kernel.setArg<cl_int>(0, cl.getNumAtoms());
force1Kernel.setArg<cl_int>(1, cl.getPaddedNumAtoms());
force1Kernel.setArg<cl_float>(2, prefactor);
force1Kernel.setArg<cl::Buffer>(3, cl.getForceBuffers().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(4, cl.getEnergyBuffer().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(5, cl.getPosq().getDeviceBuffer());
force1Kernel.setArg(6, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
force1Kernel.setArg(7, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
force1Kernel.setArg<cl::Buffer>(8, bornRadii->getDeviceBuffer());
force1Kernel.setArg(9, OpenCLContext::ThreadBlockSize*sizeof(cl_float), NULL);
force1Kernel.setArg<cl::Buffer>(10, bornForce->getDeviceBuffer());
force1Kernel.setArg(11, OpenCLContext::ThreadBlockSize*sizeof(cl_float), NULL);
if (nb.getUseCutoff()) {
force1Kernel.setArg<cl::Buffer>(12, nb.getInteractingTiles().getDeviceBuffer());
force1Kernel.setArg<cl_float>(13, nb.getCutoffDistance()*nb.getCutoffDistance());
force1Kernel.setArg<mm_float4>(14, nb.getPeriodicBoxSize());
force1Kernel.setArg<cl::Buffer>(15, nb.getInteractionFlags().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(16, nb.getInteractionCount().getDeviceBuffer());
force1Kernel.setArg(17, OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
}
else {
force1Kernel.setArg<cl::Buffer>(12, nb.getTiles().getDeviceBuffer());
force1Kernel.setArg<cl_uint>(13, nb.getTiles().getSize());
}
reduceBornForceKernel = cl::Kernel(program, "reduceBornForce");
reduceBornForceKernel.setArg<cl_int>(0, cl.getNumAtoms());
reduceBornForceKernel.setArg<cl_int>(1, cl.getPaddedNumAtoms());
reduceBornForceKernel.setArg<cl_int>(2, cl.getNumForceBuffers());
reduceBornForceKernel.setArg<cl::Buffer>(3, bornForce->getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(4, cl.getEnergyBuffer().getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(5, params->getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(6, bornRadii->getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(7, obcChain->getDeviceBuffer());
}
cl.clearBuffer(*bornSum);
cl.clearBuffer(*bornForce);
cl.executeKernel(computeBornSumKernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(reduceBornSumKernel, cl.getPaddedNumAtoms()); cl.executeKernel(reduceBornSumKernel, cl.getPaddedNumAtoms());
cl.executeKernel(force1Kernel, cl.getPaddedNumAtoms());
cl.executeKernel(reduceBornForceKernel, cl.getPaddedNumAtoms());
} }
double OpenCLCalcGBSAOBCForceKernel::executeEnergy(ContextImpl& context) { double OpenCLCalcGBSAOBCForceKernel::executeEnergy(ContextImpl& context) {
......
...@@ -374,7 +374,7 @@ private: ...@@ -374,7 +374,7 @@ private:
*/ */
class OpenCLCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel { class OpenCLCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel {
public: public:
OpenCLCalcGBSAOBCForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGBSAOBCForceKernel(name, platform), cl(cl), bornSum(NULL) { OpenCLCalcGBSAOBCForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGBSAOBCForceKernel(name, platform), cl(cl), hasCreatedKernels(false) {
} }
~OpenCLCalcGBSAOBCForceKernel(); ~OpenCLCalcGBSAOBCForceKernel();
/** /**
...@@ -399,6 +399,7 @@ public: ...@@ -399,6 +399,7 @@ public:
double executeEnergy(ContextImpl& context); double executeEnergy(ContextImpl& context);
private: private:
double prefactor; double prefactor;
bool hasCreatedKernels;
OpenCLContext& cl; OpenCLContext& cl;
OpenCLArray<mm_float2>* params; OpenCLArray<mm_float2>* params;
OpenCLArray<cl_float>* bornSum; OpenCLArray<cl_float>* bornSum;
...@@ -407,6 +408,8 @@ private: ...@@ -407,6 +408,8 @@ private:
OpenCLArray<cl_float>* obcChain; OpenCLArray<cl_float>* obcChain;
cl::Kernel computeBornSumKernel; cl::Kernel computeBornSumKernel;
cl::Kernel reduceBornSumKernel; cl::Kernel reduceBornSumKernel;
cl::Kernel force1Kernel;
cl::Kernel reduceBornForceKernel;
}; };
/** /**
......
...@@ -68,7 +68,7 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() { ...@@ -68,7 +68,7 @@ OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
delete compact; delete compact;
} }
void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel) { void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const vector<vector<int> >& exclusionList, const string& kernel) {
if (cutoff != -1.0) { if (cutoff != -1.0) {
if (usesCutoff != useCutoff) if (usesCutoff != useCutoff)
throw OpenMMException("All Forces must agree on whether to use a cutoff"); throw OpenMMException("All Forces must agree on whether to use a cutoff");
...@@ -76,6 +76,8 @@ void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic ...@@ -76,6 +76,8 @@ void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic
throw OpenMMException("All Forces must agree on whether to use periodic boundary conditions"); throw OpenMMException("All Forces must agree on whether to use periodic boundary conditions");
if (cutoffDistance != cutoff) if (cutoffDistance != cutoff)
throw OpenMMException("All Forces must use the same cutoff distance"); throw OpenMMException("All Forces must use the same cutoff distance");
}
if (usesExclusions && atomExclusions.size() != 0) {
bool sameExclusions = (exclusionList.size() == atomExclusions.size()); bool sameExclusions = (exclusionList.size() == atomExclusions.size());
for (int i = 0; i < exclusionList.size() && sameExclusions; i++) { for (int i = 0; i < exclusionList.size() && sameExclusions; i++) {
if (exclusionList[i].size() != atomExclusions[i].size()) if (exclusionList[i].size() != atomExclusions[i].size())
...@@ -91,8 +93,9 @@ void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic ...@@ -91,8 +93,9 @@ void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic
useCutoff = usesCutoff; useCutoff = usesCutoff;
usePeriodic = usesPeriodic; usePeriodic = usesPeriodic;
cutoff = cutoffDistance; cutoff = cutoffDistance;
atomExclusions = exclusionList;
kernelSource += kernel+"\n"; kernelSource += kernel+"\n";
if (usesExclusions)
atomExclusions = exclusionList;
} }
} }
...@@ -115,21 +118,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -115,21 +118,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
for (unsigned int x = y; x < numAtomBlocks; x++) for (unsigned int x = y; x < numAtomBlocks; x++)
tileVec[count++] = (x << 17) | (y << 2); tileVec[count++] = (x << 17) | (y << 2);
// Create kernels.
forceKernel = createInteractionKernel(kernelSource, parameters, true);
map<string, string> defines;
if (forceBufferPerAtomBlock)
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
if (useCutoff)
defines["USE_CUTOFF"] = "1";
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
cl::Program interactingBlocksProgram = context.createProgram(context.loadSourceFromFile("findInteractingBlocks.cl"), defines);
findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds");
findInteractingBlocksKernel = cl::Kernel(interactingBlocksProgram, "findBlocksWithInteractions");
findInteractionsWithinBlocksKernel = cl::Kernel(interactingBlocksProgram, "findInteractionsWithinBlocks");
// Mark which tiles have exclusions. // Mark which tiles have exclusions.
for (int atom1 = 0; atom1 < (int) atomExclusions.size(); ++atom1) { for (int atom1 = 0; atom1 < (int) atomExclusions.size(); ++atom1) {
...@@ -223,21 +211,24 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -223,21 +211,24 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
blockBoundingBox = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockBoundingBox"); blockBoundingBox = new OpenCLArray<mm_float4>(context, numAtomBlocks, "blockBoundingBox");
compact = new OpenCLCompact(context); compact = new OpenCLCompact(context);
} }
}
void OpenCLNonbondedUtilities::prepareInteractions() {
if (!useCutoff)
return;
// Compute the neighbor list. // Create kernels.
forceKernel = createInteractionKernel(kernelSource, parameters, true);
if (useCutoff) {
map<string, string> defines;
if (forceBufferPerAtomBlock)
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
cl::Program interactingBlocksProgram = context.createProgram(context.loadSourceFromFile("findInteractingBlocks.cl"), defines);
findBlockBoundsKernel = cl::Kernel(interactingBlocksProgram, "findBlockBounds");
findBlockBoundsKernel.setArg<cl_int>(0, context.getNumAtoms()); findBlockBoundsKernel.setArg<cl_int>(0, context.getNumAtoms());
findBlockBoundsKernel.setArg<mm_float4>(1, periodicBoxSize); findBlockBoundsKernel.setArg<mm_float4>(1, periodicBoxSize);
findBlockBoundsKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer()); findBlockBoundsKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(3, blockCenter->getDeviceBuffer()); findBlockBoundsKernel.setArg<cl::Buffer>(3, blockCenter->getDeviceBuffer());
findBlockBoundsKernel.setArg<cl::Buffer>(4, blockBoundingBox->getDeviceBuffer()); findBlockBoundsKernel.setArg<cl::Buffer>(4, blockBoundingBox->getDeviceBuffer());
context.executeKernel(findBlockBoundsKernel, context.getNumAtoms()); findInteractingBlocksKernel = cl::Kernel(interactingBlocksProgram, "findBlocksWithInteractions");
findInteractingBlocksKernel.setArg<cl_int>(0, tiles->getSize()); findInteractingBlocksKernel.setArg<cl_int>(0, tiles->getSize());
findInteractingBlocksKernel.setArg<cl_float>(1, cutoff*cutoff); findInteractingBlocksKernel.setArg<cl_float>(1, cutoff*cutoff);
findInteractingBlocksKernel.setArg<mm_float4>(2, periodicBoxSize); findInteractingBlocksKernel.setArg<mm_float4>(2, periodicBoxSize);
...@@ -245,10 +236,7 @@ void OpenCLNonbondedUtilities::prepareInteractions() { ...@@ -245,10 +236,7 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
findInteractingBlocksKernel.setArg<cl::Buffer>(4, blockCenter->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(4, blockCenter->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(5, blockBoundingBox->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(5, blockBoundingBox->getDeviceBuffer());
findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactionFlags->getDeviceBuffer()); findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactionFlags->getDeviceBuffer());
context.executeKernel(findInteractingBlocksKernel, context.getNumAtoms()); findInteractionsWithinBlocksKernel = cl::Kernel(interactingBlocksProgram, "findInteractionsWithinBlocks");
compact->compactStream(*interactingTiles, *tiles, *interactionFlags, *interactionCount);
findInteractionsWithinBlocksKernel.setArg<cl_float>(0, cutoff*cutoff); findInteractionsWithinBlocksKernel.setArg<cl_float>(0, cutoff*cutoff);
findInteractionsWithinBlocksKernel.setArg<mm_float4>(1, periodicBoxSize); findInteractionsWithinBlocksKernel.setArg<mm_float4>(1, periodicBoxSize);
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer()); findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
...@@ -258,11 +246,23 @@ void OpenCLNonbondedUtilities::prepareInteractions() { ...@@ -258,11 +246,23 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(6, interactionFlags->getDeviceBuffer()); findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(6, interactionFlags->getDeviceBuffer());
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(7, interactionCount->getDeviceBuffer()); findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(7, interactionCount->getDeviceBuffer());
findInteractionsWithinBlocksKernel.setArg(8, OpenCLContext::ThreadBlockSize*sizeof(cl_uint), NULL); findInteractionsWithinBlocksKernel.setArg(8, OpenCLContext::ThreadBlockSize*sizeof(cl_uint), NULL);
}
}
void OpenCLNonbondedUtilities::prepareInteractions() {
if (!useCutoff)
return;
// Compute the neighbor list.
context.executeKernel(findBlockBoundsKernel, context.getNumAtoms());
context.executeKernel(findInteractingBlocksKernel, context.getNumAtoms());
compact->compactStream(*interactingTiles, *tiles, *interactionFlags, *interactionCount);
context.executeKernel(findInteractionsWithinBlocksKernel, context.getNumAtoms()); context.executeKernel(findInteractionsWithinBlocksKernel, context.getNumAtoms());
} }
void OpenCLNonbondedUtilities::computeInteractions() { void OpenCLNonbondedUtilities::computeInteractions() {
invokeInteractionKernel(forceKernel, parameters); context.executeKernel(forceKernel, tiles->getSize()*OpenCLContext::TileSize);
} }
cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& source, const vector<ParameterInfo> params, bool useExclusions) const { cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& source, const vector<ParameterInfo> params, bool useExclusions) const {
...@@ -280,24 +280,22 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -280,24 +280,22 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
args << params[i].getName(); args << params[i].getName();
} }
replacements["PARAMETER_ARGUMENTS"] = args.str(); replacements["PARAMETER_ARGUMENTS"] = args.str();
// local_sigmaEpsilon[get_local_id(0)] = sigmaEpsilon1;
stringstream loadLocal1; stringstream loadLocal1;
for (int i = 0; i < params.size(); i++) { for (int i = 0; i < params.size(); i++) {
loadLocal1 << "local_"; loadLocal1 << "local_";
loadLocal1 << params[i].getName(); loadLocal1 << params[i].getName();
loadLocal1 << "[get_local_id(0)] = "; loadLocal1 << "[get_local_id(0)] = ";
loadLocal1 << params[i].getName(); loadLocal1 << params[i].getName();
loadLocal1 << "1;"; loadLocal1 << "1;\n";
} }
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str(); replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
// local_sigmaEpsilon[get_local_id(0)] = global_sigmaEpsilon[j];
stringstream loadLocal2; stringstream loadLocal2;
for (int i = 0; i < params.size(); i++) { for (int i = 0; i < params.size(); i++) {
loadLocal2 << "local_"; loadLocal2 << "local_";
loadLocal2 << params[i].getName(); loadLocal2 << params[i].getName();
loadLocal2 << "[get_local_id(0)] = global_"; loadLocal2 << "[get_local_id(0)] = global_";
loadLocal2 << params[i].getName(); loadLocal2 << params[i].getName();
loadLocal2 << "[j];"; loadLocal2 << "[j];\n";
} }
replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str(); replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
stringstream load1; stringstream load1;
...@@ -307,7 +305,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -307,7 +305,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
load1 << params[i].getName(); load1 << params[i].getName();
load1 << "1 = global_"; load1 << "1 = global_";
load1 << params[i].getName(); load1 << params[i].getName();
load1 << "[i];"; load1 << "[atom1];\n";
} }
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str(); replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
stringstream load2j; stringstream load2j;
...@@ -317,19 +315,9 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -317,19 +315,9 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
load2j << params[i].getName(); load2j << params[i].getName();
load2j << "2 = local_"; load2j << "2 = local_";
load2j << params[i].getName(); load2j << params[i].getName();
load2j << "[tbx+j];"; load2j << "[atom2];\n";
} }
replacements["LOAD_ATOM2_PARAMETERS_J"] = load2j.str(); replacements["LOAD_ATOM2_PARAMETERS"] = load2j.str();
stringstream load2tj;
for (int i = 0; i < params.size(); i++) {
load2tj << params[i].getType();
load2tj << " ";
load2tj << params[i].getName();
load2tj << "2 = local_";
load2tj << params[i].getName();
load2tj << "[tbx+tj];";
}
replacements["LOAD_ATOM2_PARAMETERS_TJ"] = load2tj.str();
map<string, string> defines; map<string, string> defines;
if (forceBufferPerAtomBlock) if (forceBufferPerAtomBlock)
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1"; defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
...@@ -340,35 +328,36 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -340,35 +328,36 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
if (useExclusions) if (useExclusions)
defines["USE_EXCLUSIONS"] = "1"; defines["USE_EXCLUSIONS"] = "1";
cl::Program program = context.createProgram(context.loadSourceFromFile("nonbonded.cl", replacements), defines); cl::Program program = context.createProgram(context.loadSourceFromFile("nonbonded.cl", replacements), defines);
return cl::Kernel(program, "computeNonbonded"); cl::Kernel kernel(program, "computeNonbonded");
}
// Set arguments to the Kernel.
void OpenCLNonbondedUtilities::invokeInteractionKernel(cl::Kernel kernel, const vector<ParameterInfo>& params) { kernel.setArg<cl_int>(0, context.getNumAtoms());
kernel.setArg<cl_int>(0, context.getPaddedNumAtoms()); kernel.setArg<cl_int>(1, context.getPaddedNumAtoms());
kernel.setArg<cl::Buffer>(1, context.getForceBuffers().getDeviceBuffer()); kernel.setArg<cl::Buffer>(2, context.getForceBuffers().getDeviceBuffer());
kernel.setArg<cl::Buffer>(2, context.getEnergyBuffer().getDeviceBuffer()); kernel.setArg<cl::Buffer>(3, context.getEnergyBuffer().getDeviceBuffer());
kernel.setArg<cl::Buffer>(3, context.getPosq().getDeviceBuffer()); kernel.setArg<cl::Buffer>(4, context.getPosq().getDeviceBuffer());
kernel.setArg<cl::Buffer>(4, exclusions->getDeviceBuffer()); kernel.setArg<cl::Buffer>(5, exclusions->getDeviceBuffer());
kernel.setArg<cl::Buffer>(5, exclusionIndex->getDeviceBuffer()); kernel.setArg<cl::Buffer>(6, exclusionIndex->getDeviceBuffer());
kernel.setArg(6, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
kernel.setArg(7, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL); kernel.setArg(7, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
int paramBase = 10; kernel.setArg(8, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
int paramBase = 11;
if (useCutoff) { if (useCutoff) {
paramBase = 14; paramBase = 15;
kernel.setArg<cl::Buffer>(8, interactingTiles->getDeviceBuffer()); kernel.setArg<cl::Buffer>(9, interactingTiles->getDeviceBuffer());
kernel.setArg<cl_float>(9, cutoff*cutoff); kernel.setArg<cl_float>(10, cutoff*cutoff);
kernel.setArg<mm_float4>(10, periodicBoxSize); kernel.setArg<mm_float4>(11, periodicBoxSize);
kernel.setArg<cl::Buffer>(11, interactionFlags->getDeviceBuffer()); kernel.setArg<cl::Buffer>(12, interactionFlags->getDeviceBuffer());
kernel.setArg<cl::Buffer>(12, interactionCount->getDeviceBuffer()); kernel.setArg<cl::Buffer>(13, interactionCount->getDeviceBuffer());
kernel.setArg(13, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL); kernel.setArg(14, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
} }
else { else {
kernel.setArg<cl::Buffer>(8, tiles->getDeviceBuffer()); kernel.setArg<cl::Buffer>(9, tiles->getDeviceBuffer());
kernel.setArg<cl_uint>(9, tiles->getSize()); kernel.setArg<cl_uint>(10, tiles->getSize());
} }
for (int i = 0; i < (int) params.size(); i++) { for (int i = 0; i < (int) params.size(); i++) {
kernel.setArg<cl::Buffer>(i*2+paramBase, params[i].getBuffer()); kernel.setArg<cl::Buffer>(i*2+paramBase, params[i].getBuffer());
kernel.setArg(i*2+paramBase+1, OpenCLContext::ThreadBlockSize*params[i].getSize(), NULL); kernel.setArg(i*2+paramBase+1, OpenCLContext::ThreadBlockSize*params[i].getSize(), NULL);
} }
context.executeKernel(kernel, tiles->getSize()*OpenCLContext::TileSize); return kernel;
} }
...@@ -38,7 +38,7 @@ class OpenCLCompact; ...@@ -38,7 +38,7 @@ class OpenCLCompact;
/** /**
* This class provides a generic interface for calculating nonbonded interactions. It does this in two * This class provides a generic interface for calculating nonbonded interactions. It does this in two
* ways. First, it can be used to create and invoke Kernels that evaluate nonbonded interactions. Clients * ways. First, it can be used to create Kernels that evaluate nonbonded interactions. Clients
* only need to provide the code for evaluating a single interaction and the list of parameters it depends on. * only need to provide the code for evaluating a single interaction and the list of parameters it depends on.
* A complete kernel is then synthesized using an appropriate algorithm to evaluate all interactions on all * A complete kernel is then synthesized using an appropriate algorithm to evaluate all interactions on all
* atoms. * atoms.
...@@ -71,11 +71,12 @@ public: ...@@ -71,11 +71,12 @@ public:
* *
* @param usesCutoff specifies whether a cutoff should be applied to this interaction * @param usesCutoff specifies whether a cutoff should be applied to this interaction
* @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction * @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction
* @param usesExclusions specifies whether this interaction uses exclusions. If this is true, it must have identical exclusions to every other interaction.
* @param cutoffDistance the cutoff distance for this interaction (ignored if usesCutoff is false) * @param cutoffDistance the cutoff distance for this interaction (ignored if usesCutoff is false)
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded * @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
* @param kernel the code to evaluate the interaction * @param kernel the code to evaluate the interaction
*/ */
void addInteraction(bool usesCutoff, bool usesPeriodic, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel); void addInteraction(bool usesCutoff, bool usesPeriodic, bool usesExclusions, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList, const std::string& kernel);
/** /**
* Add a per-atom parameter that the default interaction kernel may depend on. * Add a per-atom parameter that the default interaction kernel may depend on.
*/ */
...@@ -175,13 +176,6 @@ public: ...@@ -175,13 +176,6 @@ public:
* @param useExclusions specifies whether exclusions are applied to this interaction * @param useExclusions specifies whether exclusions are applied to this interaction
*/ */
cl::Kernel createInteractionKernel(const std::string& source, const std::vector<ParameterInfo> params, bool useExclusions) const; cl::Kernel createInteractionKernel(const std::string& source, const std::vector<ParameterInfo> params, bool useExclusions) const;
/**
* Invoke a Kernel that was created by createInteractionKernel.
*
* @param kernel the Kernel to invoke
* @param params the per-atom parameters to pass to the Kernel
*/
void invokeInteractionKernel(cl::Kernel kernel, const std::vector<ParameterInfo>& params);
private: private:
OpenCLContext& context; OpenCLContext& context;
cl::Kernel forceKernel; cl::Kernel forceKernel;
......
{ #ifdef USE_CUTOFF
if (!isExcluded && r2 < cutoffSquared) {
#else
if (!isExcluded) {
#endif
float sig = sigmaEpsilon1.x + sigmaEpsilon2.x; float sig = sigmaEpsilon1.x + sigmaEpsilon2.x;
float sig2 = invR*sig; float sig2 = invR*sig;
sig2 *= sig2; sig2 *= sig2;
......
const unsigned int TileSize = 32; const unsigned int TileSize = 32;
const float dielectricOffset = 0.009f; const float dielectricOffset = 0.009f;
const float probeRadius = 0.14f;
const float surfaceAreaFactor = -6.0f*3.14159265358979323846f*0.0216f*1000.0f*0.4184f;
/** /**
* Compute the Born sum. * Compute the Born sum.
...@@ -30,10 +32,10 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g ...@@ -30,10 +32,10 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
unsigned int tgx = get_local_id(0) & (TileSize-1); unsigned int tgx = get_local_id(0) & (TileSize-1);
unsigned int tbx = get_local_id(0) - tgx; unsigned int tbx = get_local_id(0) - tgx;
unsigned int tj = tgx; unsigned int tj = tgx;
unsigned int i = x + tgx; unsigned int atom1 = x + tgx;
float bornSum = 0.0f; float bornSum = 0.0f;
float4 posq1 = posq[i]; float4 posq1 = posq[atom1];
float2 params1 = global_params[i]; float2 params1 = global_params[atom1];
if (x == y) { if (x == y) {
// This tile is on the diagonal. // This tile is on the diagonal.
...@@ -50,9 +52,9 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g ...@@ -50,9 +52,9 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (i < numAtoms && x+j < numAtoms && r2 < cutoffSquared) { if (atom1 < numAtoms && y+j < numAtoms && r2 < cutoffSquared) {
#else #else
if (i < numAtoms && x+j < numAtoms) { if (atom1 < numAtoms && y+j < numAtoms) {
#endif #endif
float r = sqrt(r2); float r = sqrt(r2);
float invR = 1.0f/r; float invR = 1.0f/r;
...@@ -108,9 +110,9 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g ...@@ -108,9 +110,9 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (i < numAtoms && x+j < numAtoms && r2 < cutoffSquared) { if (atom1 < numAtoms && y+j < numAtoms && r2 < cutoffSquared) {
#else #else
if (i < numAtoms && x+j < numAtoms) { if (atom1 < numAtoms && y+j < numAtoms) {
#endif #endif
float r = sqrt(r2); float r = sqrt(r2);
float invR = 1.0f/r; float invR = 1.0f/r;
...@@ -175,9 +177,9 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g ...@@ -175,9 +177,9 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
#endif #endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (i < numAtoms && x+tj < numAtoms && r2 < cutoffSquared) { if (atom1 < numAtoms && y+tj < numAtoms && r2 < cutoffSquared) {
#else #else
if (i < numAtoms && x+tj < numAtoms) { if (atom1 < numAtoms && y+tj < numAtoms) {
#endif #endif
float r = sqrt(r2); float r = sqrt(r2);
float invR = 1.0f/r; float invR = 1.0f/r;
...@@ -261,3 +263,255 @@ __kernel void reduceBornSum(int numAtoms, int bufferSize, int numBuffers, float ...@@ -261,3 +263,255 @@ __kernel void reduceBornSum(int numAtoms, int bufferSize, int numBuffers, float
index += get_global_size(0); index += get_global_size(0);
} }
} }
/**
* First part of computing the GBSA interaction.
*/
__kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefactor, __global float4* forceBuffers, __global float* energyBuffer,
__global float4* posq, __local float4* local_posq, __local float4* local_force, __global float* global_bornRadii, __local float* local_bornRadii,
__global float* global_bornForce, __local float* local_bornForce, __global unsigned int* tiles,
#ifdef USE_CUTOFF
float cutoffSquared, float4 periodicBoxSize, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, __local float4* tempBuffer) {
#else
unsigned int numTiles) {
#endif
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
#endif
unsigned int totalWarps = get_global_size(0)/TileSize;
unsigned int warp = get_global_id(0)/TileSize;
unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps;
float energy = 0.0f;
unsigned int lasty = 0xFFFFFFFF;
while (pos < end) {
// Extract the coordinates of this tile
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff)*TileSize;
x = (x>>17)*TileSize;
unsigned int tgx = get_local_id(0) & (TileSize-1);
unsigned int tbx = get_local_id(0) - tgx;
unsigned int tj = tgx;
unsigned int atom1 = x + tgx;
float4 force = 0.0f;
float4 posq1 = posq[atom1];
float bornRadius1 = global_bornRadii[atom1];
if (x == y) {
// This tile is on the diagonal.
local_posq[get_local_id(0)] = posq1;
local_bornRadii[get_local_id(0)] = bornRadius1;
unsigned int xi = x/TileSize;
unsigned int tile = xi+xi*paddedNumAtoms/TileSize-xi*(xi+1)/2;
for (unsigned int j = 0; j < TileSize; j++) {
if (atom1 < numAtoms && y+j < numAtoms) {
float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
float invR = 1.0f/r;
float4 posq2 = local_posq[tbx+j];
float bornRadius2 = local_bornRadii[tbx+j];
float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = sqrt(denominator2);
float tempEnergy = (prefactor*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
force.w += dGpol_dalpha2_ij*bornRadius2;
float dEdR = Gpol*(1.0f - 0.25f*expTerm);
#ifdef USE_CUTOFF
if (r2 > cutoffSquared) {
dEdR = 0.0f;
tempEnergy = 0.0f;
}
#endif
energy += 0.5f*tempEnergy;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset = x + tgx + (x/TileSize)*paddedNumAtoms;
#else
unsigned int offset = x + tgx + warp*paddedNumAtoms;
#endif
forceBuffers[offset].xyz += force.xyz;
global_bornForce[offset] = force.w;
}
else {
// This is an off-diagonal tile.
if (lasty != y) {
unsigned int j = y + tgx;
local_posq[get_local_id(0)] = posq[j];
local_bornRadii[get_local_id(0)] = global_bornRadii[j];
}
local_force[get_local_id(0)] = 0.0f;
#ifdef USE_CUTOFF
unsigned int flags = interactionFlags[pos];
if (flags != 0xFFFFFFFF) {
if (flags == 0) {
// No interactions in this tile.
}
else {
// Compute only a subset of the interactions in this tile.
for (unsigned int j = 0; j < TileSize; j++) {
if ((flags&(1<<j)) != 0) {
float4 delta = (float4) (local_posq[tbx+j].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
float invR = 1.0f / r;
float4 posq2 = local_posq[tbx+j];
float bornRadius2 = local_bornRadii[tbx+j];
float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = sqrt(denominator2);
float tempEnergy = (prefactor*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
force.w += dGpol_dalpha2_ij*bornRadius2;
float dEdR = Gpol*(1.0f - 0.25f*expTerm);
#ifdef USE_CUTOFF
if (atom1 >= numAtoms || y+j >= numAtoms || r2 > cutoffSquared) {
#else
if (atom1 >= numAtoms || y+j >= numAtoms) {
#endif
dEdR = 0.0f;
tempEnergy = 0.0f;
}
energy += tempEnergy;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
tempBuffer[get_local_id(0)] = (float4) (delta.xyz, dGpol_dalpha2_ij*bornRadius2);
// Sum the forces on atom j.
if (tgx % 2 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+1];
if (tgx % 4 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+2];
if (tgx % 8 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+4];
if (tgx % 16 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+8];
if (tgx == 0)
local_force[tbx+j] += tempBuffer[get_local_id(0)] + tempBuffer[get_local_id(0)+16];
}
}
}
}
else
#endif
{
// Compute the full set of interactions in this tile.
unsigned int xi = x/TileSize;
unsigned int yi = y/TileSize;
unsigned int tile = xi+yi*paddedNumAtoms/TileSize-yi*(yi+1)/2;
for (unsigned int j = 0; j < TileSize; j++) {
if (atom1 < numAtoms && y+tj < numAtoms) {
float4 delta = (float4) (local_posq[tbx+tj].xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
float invR = 1.0f / r;
float4 posq2 = local_posq[tbx+tj];
float bornRadius2 = local_bornRadii[tbx+tj];
float alpha2_ij = bornRadius1*bornRadius2;
float D_ij = r2/(4.0f*alpha2_ij);
float expTerm = exp(-D_ij);
float denominator2 = r2 + alpha2_ij*expTerm;
float denominator = sqrt(denominator2);
float tempEnergy = (prefactor*posq1.w*posq2.w)/denominator;
float Gpol = tempEnergy/denominator2;
float dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f+D_ij);
force.w += dGpol_dalpha2_ij*bornRadius2;
float dEdR = Gpol*(1.0f - 0.25f*expTerm);
#ifdef USE_CUTOFF
if (r2 > cutoffSquared) {
dEdR = 0.0f;
tempEnergy = 0.0f;
}
#endif
energy += tempEnergy;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
local_force[tbx+tj].xyz += delta.xyz;
}
tj = (tj + 1) & (TileSize - 1);
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset1 = x + tgx + (y/TileSize)*paddedNumAtoms;
unsigned int offset2 = y + tgx + (x/TileSize)*paddedNumAtoms;
#else
unsigned int offset1 = x + tgx + warp*paddedNumAtoms;
unsigned int offset2 = y + tgx + warp*paddedNumAtoms;
#endif
forceBuffers[offset1].xyz += force.xyz;
forceBuffers[offset2].xyz += local_force[get_local_id(0)].xyz;
global_bornForce[offset1] = force.w;
global_bornForce[offset2] = local_force[get_local_id(0)].w;
lasty = y;
}
pos++;
}
energyBuffer[get_global_id(0)] += energy;
}
/**
* Reduce the Born force.
*/
__kernel void reduceBornForce(int numAtoms, int bufferSize, int numBuffers, __global float* bornForce, __global float* energyBuffer,
__global float2* params, __global float* bornRadii, __global float* obcChain) {
float energy = 0.0f;
unsigned int index = get_global_id(0);
while (index < numAtoms) {
// Sum the Born force
int totalSize = bufferSize*numBuffers;
float force = bornForce[index];
for (int i = index+bufferSize; i < totalSize; i += bufferSize)
force += bornForce[i];
// Now calculate the actual force
float offsetRadius = params[index].x;
float bornRadius = bornRadii[index];
float r = offsetRadius+dielectricOffset+probeRadius;
float ratio6 = pow((offsetRadius+dielectricOffset)/bornRadius, 6.0f);
float saTerm = surfaceAreaFactor*r*r*ratio6;
force += saTerm/bornRadius;
energy += saTerm;
force *= bornRadius*bornRadius*obcChain[index];
bornForce[index] = force;
index += get_global_size(0);
}
energyBuffer[get_global_id(0)] += energy/-6.0f;
}
#ifdef USE_CUTOFF
if (atom1 < numAtoms && atom2 < numAtoms && atom1 != atom2 && r2 < cutoffSquared) {
#else
if (atom1 < numAtoms && atom2 < numAtoms && atom1 != atom2) {
#endif
float invRSquared = 1.0f/r2;
float rScaledRadiusJ = r+obcParams2.y;
float rScaledRadiusI = r+obcParams1.y;
float l_ijJ = 1.0f/max(obcParams1.x, fabs(r-obcParams2.y));
float l_ijI = 1.0f/max(obcParams2.x, fabs(r-obcParams1.y));
float u_ijJ = 1.0f/rScaledRadiusJ;
float u_ijI = 1.0f/rScaledRadiusI;
float l_ij2J = l_ijJ*l_ijJ;
float l_ij2I = l_ijI*l_ijI;
float u_ij2J = u_ijJ*u_ijJ;
float u_ij2I = u_ijI*u_ijI;
float t1J = log(u_ijJ/l_ijJ);
float t1I = log(u_ijI/l_ijI);
float t2J = (l_ij2J-u_ij2J);
float t2I = (l_ij2I-u_ij2I);
float t3J = t2J*invR;
float t3I = t2I*invR;
t1J *= invR;
t1I *= invR;
if (obcParams1.x < rScaledRadiusJ) {
float term = 0.125f*(1.0f+obcParams2.y*obcParams2.y*invRSquared)*t3J + 0.25f*t1J*invRSquared;
dEdR += bornForce1*term;
}
if (obcParams2.x < rScaledRadiusJ) {
float term = 0.125f*(1.0f+obcParams1.y*obcParams1.y*invRSquared)*t3I + 0.25f*t1I*invRSquared;
dEdR += bornForce2*term;
}
}
...@@ -4,7 +4,7 @@ const unsigned int TileSize = 32; ...@@ -4,7 +4,7 @@ const unsigned int TileSize = 32;
* Compute nonbonded interactions. * Compute nonbonded interactions.
*/ */
__kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4* forceBuffers, __global float* energyBuffer, __global float4* posq,
__global unsigned int* exclusions, __global unsigned int* exclusionIndices, __local float4* local_posq, __local float4* local_force, __global unsigned int* tiles, __global unsigned int* exclusions, __global unsigned int* exclusionIndices, __local float4* local_posq, __local float4* local_force, __global unsigned int* tiles,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
float cutoffSquared, float4 periodicBoxSize, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, __local float4* tempBuffer float cutoffSquared, float4 periodicBoxSize, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, __local float4* tempBuffer
...@@ -31,9 +31,9 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers ...@@ -31,9 +31,9 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
unsigned int tgx = get_local_id(0) & (TileSize-1); unsigned int tgx = get_local_id(0) & (TileSize-1);
unsigned int tbx = get_local_id(0) - tgx; unsigned int tbx = get_local_id(0) - tgx;
unsigned int tj = tgx; unsigned int tj = tgx;
unsigned int i = x + tgx; unsigned int atom1 = x + tgx;
float4 force = 0.0f; float4 force = 0.0f;
float4 posq1 = posq[i]; float4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS LOAD_ATOM1_PARAMETERS
if (x == y) { if (x == y) {
// This tile is on the diagonal. // This tile is on the diagonal.
...@@ -58,40 +58,26 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers ...@@ -58,40 +58,26 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2); float r = sqrt(r2);
float invR = 1.0f / r; float invR = 1.0f / r;
float4 posq2 = local_posq[tbx+j]; int atom2 = tbx+j;
LOAD_ATOM2_PARAMETERS_J float4 posq2 = local_posq[atom2];
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
float dEdR = 0.0f; float dEdR = 0.0f;
float tempEnergy = 0.0f; float tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
#if defined USE_CUTOFF || defined USE_EXCLUSIONS
#if defined USE_CUTOFF && defined USE_EXCLUSIONS
excl >>= 1;
if (isExcluded || r2 > cutoffSquared) {
#elif defined USE_CUTOFF
if (r2 > cutoffSquared) {
#elif defined USE_EXCLUSIONS
excl >>= 1;
if (isExcluded) {
#endif
dEdR = 0.0f;
tempEnergy = 0.0f;
}
#endif
energy += 0.5f*tempEnergy; energy += 0.5f*tempEnergy;
delta.xyz *= dEdR; delta.xyz *= dEdR;
force.xyz -= delta.xyz; force.xyz -= delta.xyz;
excl >>= 1;
} }
// Write results // Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset = x + tgx + (x/TileSize)*paddedNumAtoms; unsigned int offset = x + tgx + (x/TileSize)*paddedNumAtoms;
#else #else
unsigned int offset = x + tgx + warp*paddedNumAtoms; unsigned int offset = x + tgx + warp*paddedNumAtoms;
#endif #endif
of = forceBuffers[offset]; forceBuffers[offset].xyz += force.xyz;
of.xyz += force.xyz;
forceBuffers[offset] = of;
} }
else { else {
// This is an off-diagonal tile. // This is an off-diagonal tile.
...@@ -123,23 +109,19 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers ...@@ -123,23 +109,19 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2); float r = sqrt(r2);
float invR = 1.0f / r; float invR = 1.0f / r;
float4 posq2 = local_posq[tbx+j]; int atom2 = tbx+j;
LOAD_ATOM2_PARAMETERS_J float4 posq2 = local_posq[atom2];
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
float dEdR = 0.0f; float dEdR = 0.0f;
float tempEnergy = 0.0f; float tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
#ifdef USE_CUTOFF
if (r2 > cutoffSquared) {
dEdR = 0.0f;
tempEnergy = 0.0f;
}
#endif
energy += tempEnergy; energy += tempEnergy;
delta.xyz *= dEdR; delta.xyz *= dEdR;
force.xyz -= delta.xyz; force.xyz -= delta.xyz;
tempBuffer[get_local_id(0)] = delta; tempBuffer[get_local_id(0)] = delta;
// Sum the forces on atom j. // Sum the forces on atom2.
if (tgx % 2 == 0) if (tgx % 2 == 0)
tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+1].xyz; tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+1].xyz;
...@@ -180,35 +162,23 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers ...@@ -180,35 +162,23 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2); float r = sqrt(r2);
float invR = 1.0f / r; float invR = 1.0f / r;
float4 posq2 = local_posq[tbx+tj]; int atom2 = tbx+tj;
LOAD_ATOM2_PARAMETERS_TJ float4 posq2 = local_posq[atom2];
LOAD_ATOM2_PARAMETERS
atom2 = y+tj;
float dEdR = 0.0f; float dEdR = 0.0f;
float tempEnergy = 0.0f; float tempEnergy = 0.0f;
COMPUTE_INTERACTION COMPUTE_INTERACTION
#if defined USE_CUTOFF || defined USE_EXCLUSIONS
#if defined USE_CUTOFF && defined USE_EXCLUSIONS
excl >>= 1;
if (isExcluded || r2 > cutoffSquared) {
#elif defined USE_CUTOFF
if (r2 > cutoffSquared) {
#elif defined USE_EXCLUSIONS
excl >>= 1;
if (isExcluded) {
#endif
dEdR = 0.0f;
tempEnergy = 0.0f;
}
#endif
energy += tempEnergy; energy += tempEnergy;
delta.xyz *= dEdR; delta.xyz *= dEdR;
force.xyz -= delta.xyz; force.xyz -= delta.xyz;
local_force[tbx+tj].xyz += delta.xyz; local_force[tbx+tj].xyz += delta.xyz;
excl >>= 1;
tj = (tj + 1) & (TileSize - 1); tj = (tj + 1) & (TileSize - 1);
} }
} }
// Write results // Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset1 = x + tgx + (y/TileSize)*paddedNumAtoms; unsigned int offset1 = x + tgx + (y/TileSize)*paddedNumAtoms;
unsigned int offset2 = y + tgx + (x/TileSize)*paddedNumAtoms; unsigned int offset2 = y + tgx + (x/TileSize)*paddedNumAtoms;
...@@ -216,12 +186,8 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers ...@@ -216,12 +186,8 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
unsigned int offset1 = x + tgx + warp*paddedNumAtoms; unsigned int offset1 = x + tgx + warp*paddedNumAtoms;
unsigned int offset2 = y + tgx + warp*paddedNumAtoms; unsigned int offset2 = y + tgx + warp*paddedNumAtoms;
#endif #endif
of = forceBuffers[offset1]; forceBuffers[offset1].xyz += force.xyz;
of.xyz += force.xyz; forceBuffers[offset2].xyz += local_force[get_local_id(0)].xyz;
forceBuffers[offset1] = of;
of = forceBuffers[offset2];
of.xyz += local_force[get_local_id(0)].xyz;
forceBuffers[offset2] = of;
lasty = y; lasty = y;
} }
pos++; pos++;
......
...@@ -72,7 +72,7 @@ void testSingleParticle() { ...@@ -72,7 +72,7 @@ void testSingleParticle() {
double bornEnergy = (-0.5*0.5/(8*PI_M*eps0))*(1.0/gbsa->getSoluteDielectric()-1.0/gbsa->getSolventDielectric())/bornRadius; double bornEnergy = (-0.5*0.5/(8*PI_M*eps0))*(1.0/gbsa->getSoluteDielectric()-1.0/gbsa->getSolventDielectric())/bornRadius;
double extendedRadius = bornRadius+0.14; // probe radius double extendedRadius = bornRadius+0.14; // probe radius
double nonpolarEnergy = CAL2JOULE*PI_M*0.0216*(10*extendedRadius)*(10*extendedRadius)*std::pow(0.15/bornRadius, 6.0); // Where did this formula come from? Just copied it from CpuImplicitSolvent.cpp double nonpolarEnergy = CAL2JOULE*PI_M*0.0216*(10*extendedRadius)*(10*extendedRadius)*std::pow(0.15/bornRadius, 6.0); // Where did this formula come from? Just copied it from CpuImplicitSolvent.cpp
// ASSERT_EQUAL_TOL((bornEnergy+nonpolarEnergy), state.getPotentialEnergy(), 0.01); ASSERT_EQUAL_TOL((bornEnergy+nonpolarEnergy), state.getPotentialEnergy(), 0.01);
} }
void testCutoffAndPeriodic() { void testCutoffAndPeriodic() {
...@@ -214,11 +214,11 @@ int main() { ...@@ -214,11 +214,11 @@ int main() {
try { try {
testSingleParticle(); testSingleParticle();
testCutoffAndPeriodic(); testCutoffAndPeriodic();
// for (int i = 5; i < 11; i++) { for (int i = 5; i < 11; i++) {
// testForce(i*i*i, NonbondedForce::NoCutoff, GBSAOBCForce::NoCutoff); testForce(i*i*i, NonbondedForce::NoCutoff, GBSAOBCForce::NoCutoff);
// testForce(i*i*i, NonbondedForce::CutoffNonPeriodic, GBSAOBCForce::CutoffNonPeriodic); testForce(i*i*i, NonbondedForce::CutoffNonPeriodic, GBSAOBCForce::CutoffNonPeriodic);
// testForce(i*i*i, NonbondedForce::CutoffPeriodic, GBSAOBCForce::CutoffPeriodic); testForce(i*i*i, NonbondedForce::CutoffPeriodic, GBSAOBCForce::CutoffPeriodic);
// } }
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment