Commit 3976ad91 authored by Peter Eastman's avatar Peter Eastman
Browse files

Began implementing nonbonded forces

parent 4c489434
......@@ -28,6 +28,7 @@
#include "OpenCLArray.h"
#include "OpenCLForceInfo.h"
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLNonbondedUtilities.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include <fstream>
......@@ -61,8 +62,10 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
numAtoms = numParticles;
paddedNumAtoms = TileSize*((numParticles+TileSize-1)/TileSize);
numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize;
numTiles = numAtomBlocks*(numAtomBlocks+1)/2;
numThreadBlocks = device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0]/ThreadBlockSize;
nonbonded = new OpenCLNonbondedUtilities(*this);
posq = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "posq", true);
velm = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "velm", true);
// Create utility kernels that are used in multiple places.
......@@ -74,26 +77,25 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
OpenCLContext::~OpenCLContext() {
for (int i = 0; i < (int) forces.size(); i++)
delete forces[i];
delete posq;
delete velm;
delete force;
delete forceBuffers;
delete energyBuffer;
delete atomIndex;
delete integration;
if (posq != NULL)
delete posq;
if (velm != NULL)
delete velm;
if (force != NULL)
delete force;
if (forceBuffers != NULL)
delete forceBuffers;
if (energyBuffer != NULL)
delete energyBuffer;
if (atomIndex != NULL)
delete atomIndex;
if (integration != NULL)
delete integration;
if (nonbonded != NULL)
delete nonbonded;
}
void OpenCLContext::initialize(const System& system) {
// forceBufferPerWarp = true;
// numForceBuffers = numThreadBlocks*ThreadBlockSize/TileSize;
// if (numForceBuffers >= numAtomBlocks) {
// // For small systems, it is more efficient to have one force buffer per block of 32 atoms instead of one per warp.
//
// forceBufferPerWarp = false;
// numForceBuffers = numAtomBlocks;
// }
posq = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "posq", true);
velm = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "velm", true);
for (int i = 0; i < numAtoms; i++)
(*velm)[i].w = (float) (1.0/system.getParticleMass(i));
velm->upload();
......@@ -108,6 +110,7 @@ void OpenCLContext::initialize(const System& system) {
(*atomIndex)[i] = i;
atomIndex->upload();
integration = new OpenCLIntegrationUtilities(*this, system);
nonbonded->initialize(system);
}
void OpenCLContext::addForce(OpenCLForceInfo* force) {
......
......@@ -36,6 +36,7 @@ template <class T>
class OpenCLArray;
class OpenCLForceInfo;
class OpenCLIntegrationUtilities;
class OpenCLNonbondedUtilities;
class System;
/**
......@@ -210,12 +211,6 @@ public:
int getNumThreadBlocks() const {
return numThreadBlocks;
}
/**
* Get the total number of tiles used for nonbonded computation.
*/
int getNumTiles() const {
return numTiles;
}
/**
* Get the number of force buffers.
*/
......@@ -228,13 +223,18 @@ public:
OpenCLIntegrationUtilities& getIntegrationUtilties() {
return *integration;
}
/**
* Get the OpenCLNonbondedUtilities for this context.
*/
OpenCLNonbondedUtilities& getNonbondedUtilties() {
return *nonbonded;
}
private:
double time;
int stepCount;
int numAtoms;
int paddedNumAtoms;
int numAtomBlocks;
int numTiles;
int numThreadBlocks;
int numForceBuffers;
cl::Context context;
......@@ -251,6 +251,7 @@ private:
OpenCLArray<cl_float>* energyBuffer;
OpenCLArray<cl_int>* atomIndex;
OpenCLIntegrationUtilities* integration;
OpenCLNonbondedUtilities* nonbonded;
};
} // namespace OpenMM
......
......@@ -46,8 +46,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return new OpenCLCalcPeriodicTorsionForceKernel(name, platform, cl, context.getSystem());
if (name == CalcRBTorsionForceKernel::Name())
return new OpenCLCalcRBTorsionForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcNonbondedForceKernel::Name())
// return new OpenCLCalcNonbondedForceKernel(name, platform, cl, context.getSystem());
if (name == CalcNonbondedForceKernel::Name())
return new OpenCLCalcNonbondedForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcCustomNonbondedForceKernel::Name())
// return new OpenCLCalcCustomNonbondedForceKernel(name, platform, cl, context.getSystem());
// if (name == CalcGBSAOBCForceKernel::Name())
......
......@@ -30,6 +30,7 @@
#include "openmm/Context.h"
#include "openmm/internal/ContextImpl.h"
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLNonbondedUtilities.h"
#include <cmath>
using namespace OpenMM;
......@@ -46,10 +47,12 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
void OpenCLCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context) {
cl.clearBuffer(cl.getForceBuffers());
cl.getNonbondedUtilties().prepareInteractions();
}
void OpenCLCalcForcesAndEnergyKernel::finishForceComputation(ContextImpl& context) {
cl.reduceBuffer(cl.getForceBuffers(), cl.getNumForceBuffers());
cl.getNonbondedUtilties().prepareInteractions();
}
void OpenCLCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& context) {
......@@ -452,150 +455,127 @@ double OpenCLCalcRBTorsionForceKernel::executeEnergy(ContextImpl& context) {
return 0.0;
}
//OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
//}
//
//void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
// if (data.primaryKernel == NULL)
// data.primaryKernel = this;
// data.hasNonbonded = true;
// numParticles = force.getNumParticles();
// _gpuContext* gpu = data.gpu;
//
// // Identify which exceptions are 1-4 interactions.
//
// vector<pair<int, int> > exclusions;
// vector<int> exceptions;
// for (int i = 0; i < force.getNumExceptions(); i++) {
// int particle1, particle2;
// double chargeProd, sigma, epsilon;
// force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
// exclusions.push_back(pair<int, int>(particle1, particle2));
// if (chargeProd != 0.0 || epsilon != 0.0)
// exceptions.push_back(i);
OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
if (sigmaEpsilon != NULL)
delete sigmaEpsilon;
}
void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
// Identify which exceptions are 1-4 interactions.
vector<pair<int, int> > exclusions;
vector<int> exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double chargeProd, sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, chargeProd, sigma, epsilon);
exclusions.push_back(pair<int, int>(particle1, particle2));
if (chargeProd != 0.0 || epsilon != 0.0)
exceptions.push_back(i);
}
// Initialize nonbonded interactions.
int numParticles = force.getNumParticles();
sigmaEpsilon = new OpenCLArray<mm_float2>(cl, numParticles, "sigmaEpsilon");
OpenCLArray<mm_float4>& posq = cl.getPosq();
vector<mm_float2> sigmaEpsilonVector(numParticles);
vector<vector<int> > exclusionList(numParticles);
for (int i = 0; i < numParticles; i++) {
double charge, sigma, epsilon;
force.getParticleParameters(i, charge, sigma, epsilon);
posq[i].w = (float) charge;
sigmaEpsilonVector[i] = (mm_float2) {(float) (0.5*sigma), (float) (2.0*sqrt(epsilon))};
exclusionList[i].push_back(i);
}
for (int i = 0; i < (int) exclusions.size(); i++) {
exclusionList[exclusions[i].first].push_back(exclusions[i].second);
exclusionList[exclusions[i].second].push_back(exclusions[i].first);
}
posq.upload();
sigmaEpsilon->upload(sigmaEpsilonVector);
bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic);
// if (force.getNonbondedMethod() != NonbondedForce::NoCutoff) {
// method = CUTOFF;
// }
//
// // Initialize nonbonded interactions.
//
// {
// vector<int> particle(numParticles);
// vector<float> c6(numParticles);
// vector<float> c12(numParticles);
// vector<float> q(numParticles);
// vector<char> symbol;
// vector<vector<int> > exclusionList(numParticles);
// for (int i = 0; i < numParticles; i++) {
// double charge, radius, depth;
// force.getParticleParameters(i, charge, radius, depth);
// particle[i] = i;
// q[i] = (float) charge;
// c6[i] = (float) (4*depth*pow(radius, 6.0));
// c12[i] = (float) (4*depth*pow(radius, 12.0));
// exclusionList[i].push_back(i);
// }
// for (int i = 0; i < (int)exclusions.size(); i++) {
// exclusionList[exclusions[i].first].push_back(exclusions[i].second);
// exclusionList[exclusions[i].second].push_back(exclusions[i].first);
// }
// Vec3 boxVectors[3];
// system.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
// gpuSetPeriodicBoxSize(gpu, (float)boxVectors[0][0], (float)boxVectors[1][1], (float)boxVectors[2][2]);
// OpenCLNonbondedMethod method = NO_CUTOFF;
// if (force.getNonbondedMethod() != NonbondedForce::NoCutoff) {
// gpuSetNonbondedCutoff(gpu, (float)force.getCutoffDistance(), force.getReactionFieldDielectric());
// method = CUTOFF;
// }
// if (force.getNonbondedMethod() == NonbondedForce::CutoffPeriodic) {
// method = PERIODIC;
// }
// if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
// double ewaldErrorTol = force.getEwaldErrorTolerance();
// double alpha = (1.0/force.getCutoffDistance())*std::sqrt(-std::log(ewaldErrorTol));
// double mx = boxVectors[0][0]/force.getCutoffDistance();
// double my = boxVectors[1][1]/force.getCutoffDistance();
// double mz = boxVectors[2][2]/force.getCutoffDistance();
// double pi = 3.1415926535897932385;
// int kmaxx = (int)std::ceil(-(mx/pi)*std::log(ewaldErrorTol));
// int kmaxy = (int)std::ceil(-(my/pi)*std::log(ewaldErrorTol));
// int kmaxz = (int)std::ceil(-(mz/pi)*std::log(ewaldErrorTol));
// if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
// if (kmaxx%2 == 0)
// kmaxx++;
// if (kmaxy%2 == 0)
// kmaxy++;
// if (kmaxz%2 == 0)
// kmaxz++;
// gpuSetEwaldParameters(gpu, (float) alpha, kmaxx, kmaxy, kmaxz);
// method = EWALD;
// }
// else {
// int gridSizeX = -0.5*kmaxx*std::log(ewaldErrorTol);
// int gridSizeY = -0.5*kmaxy*std::log(ewaldErrorTol);
// int gridSizeZ = -0.5*kmaxz*std::log(ewaldErrorTol);
//// printf("%d %d\n", gridSizeX, (int) (kmaxx*std::sqrt(-std::log(ewaldErrorTol))));
//// gridSizeX = 0.02*mx*std::pow(-std::log(1.5*ewaldErrorTol), 3);
//// gridSizeY = 0.02*my*std::pow(-std::log(1.5*ewaldErrorTol), 3);
//// gridSizeZ = 0.02*mz*std::pow(-std::log(1.5*ewaldErrorTol), 3);
//// double scale = 0.698*std::pow(ewaldErrorTol, -0.312);
//// double scale = 0.713*std::pow(ewaldErrorTol, -0.261);
//// printf("%f\n", scale);
// gridSizeX = mx*NonbondedForce::PMEscale;
// gridSizeY = my*NonbondedForce::PMEscale;
// gridSizeZ = mz*NonbondedForce::PMEscale;
//// printf("%d %d %d\n", gridSizeX, gridSizeY, gridSizeZ);
//// gridSizeX = mx*scale;
//// gridSizeY = my*scale;
//// gridSizeZ = mz*scale;
// gpuSetPMEParameters(gpu, (float) alpha, gridSizeX, gridSizeY, gridSizeZ);
// method = PARTICLE_MESH_EWALD;
// }
// if (force.getNonbondedMethod() == NonbondedForce::CutoffPeriodic) {
// method = PERIODIC;
// }
// if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
// double ewaldErrorTol = force.getEwaldErrorTolerance();
// double alpha = (1.0/force.getCutoffDistance())*std::sqrt(-std::log(ewaldErrorTol));
// double mx = boxVectors[0][0]/force.getCutoffDistance();
// double my = boxVectors[1][1]/force.getCutoffDistance();
// double mz = boxVectors[2][2]/force.getCutoffDistance();
// double pi = 3.1415926535897932385;
// int kmaxx = (int)std::ceil(-(mx/pi)*std::log(ewaldErrorTol));
// int kmaxy = (int)std::ceil(-(my/pi)*std::log(ewaldErrorTol));
// int kmaxz = (int)std::ceil(-(mz/pi)*std::log(ewaldErrorTol));
// if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
// if (kmaxx%2 == 0)
// kmaxx++;
// if (kmaxy%2 == 0)
// kmaxy++;
// if (kmaxz%2 == 0)
// kmaxz++;
// gpuSetEwaldParameters(gpu, (float) alpha, kmaxx, kmaxy, kmaxz);
// method = EWALD;
// }
// data.nonbondedMethod = method;
// gpuSetCoulombParameters(gpu, 138.935485f, particle, c6, c12, q, symbol, exclusionList, method);
//
// // Compute the Ewald self energy.
//
// data.ewaldSelfEnergy = 0.0;
// if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
// double selfEnergyScale = gpu->sim.epsfac*gpu->sim.alphaEwald/std::sqrt(PI);
// for (int i = 0; i < numParticles; i++)
// data.ewaldSelfEnergy -= selfEnergyScale*q[i]*q[i];
// else {
// int gridSizeX = -0.5*kmaxx*std::log(ewaldErrorTol);
// int gridSizeY = -0.5*kmaxy*std::log(ewaldErrorTol);
// int gridSizeZ = -0.5*kmaxz*std::log(ewaldErrorTol);
// gpuSetPMEParameters(gpu, (float) alpha, gridSizeX, gridSizeY, gridSizeZ);
// method = PARTICLE_MESH_EWALD;
// }
// }
//
// // Initialize 1-4 nonbonded interactions.
//
// {
// int numExceptions = exceptions.size();
// vector<int> particle1(numExceptions);
// vector<int> particle2(numExceptions);
// vector<float> c6(numExceptions);
// vector<float> c12(numExceptions);
// vector<float> q1(numExceptions);
// vector<float> q2(numExceptions);
// for (int i = 0; i < numExceptions; i++) {
// double charge, sig, eps;
// force.getExceptionParameters(exceptions[i], particle1[i], particle2[i], charge, sig, eps);
// c6[i] = (float) (4*eps*pow(sig, 6.0));
// c12[i] = (float) (4*eps*pow(sig, 12.0));
// q1[i] = (float) charge;
// q2[i] = 1.0f;
// }
// data.nonbondedMethod = method;
// gpuSetCoulombParameters(gpu, 138.935485f, particle, c6, c12, q, symbol, exclusionList, method);
cl.getNonbondedUtilties().addInteraction(useCutoff, usePeriodic, force.getCutoffDistance(), exclusionList);
cl.getNonbondedUtilties().addParameter("sigmaEpsilon", "float2", 8, sigmaEpsilon->getDeviceBuffer());
// Compute the Ewald self energy.
ewaldSelfEnergy = 0.0;
if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
// double selfEnergyScale = gpu->sim.epsfac*gpu->sim.alphaEwald/std::sqrt(PI);
// for (int i = 0; i < numParticles; i++)
// ewaldSelfEnergy -= selfEnergyScale*q[i]*q[i];
}
// Initialize 1-4 nonbonded interactions.
{
int numExceptions = exceptions.size();
vector<int> particle1(numExceptions);
vector<int> particle2(numExceptions);
vector<float> c6(numExceptions);
vector<float> c12(numExceptions);
vector<float> q1(numExceptions);
vector<float> q2(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double charge, sig, eps;
force.getExceptionParameters(exceptions[i], particle1[i], particle2[i], charge, sig, eps);
c6[i] = (float) (4*eps*pow(sig, 6.0));
c12[i] = (float) (4*eps*pow(sig, 12.0));
q1[i] = (float) charge;
q2[i] = 1.0f;
}
// gpuSetLJ14Parameters(gpu, 138.935485f, 1.0f, particle1, particle2, c6, c12, q1, q2);
// }
//}
//
//void OpenCLCalcNonbondedForceKernel::executeForces(ContextImpl& context) {
// if (data.primaryKernel == this)
// calcForces(context, data);
//}
//
//double OpenCLCalcNonbondedForceKernel::executeEnergy(ContextImpl& context) {
// if (data.primaryKernel == this)
// return calcEnergy(context, data, system);
// return 0.0;
//}
//
}
}
void OpenCLCalcNonbondedForceKernel::executeForces(ContextImpl& context) {
cl.getNonbondedUtilties().computeInteractions();
}
double OpenCLCalcNonbondedForceKernel::executeEnergy(ContextImpl& context) {
executeForces(context);
return ewaldSelfEnergy;
}
//OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
//}
//
......
......@@ -294,40 +294,40 @@ private:
cl::Kernel kernel;
};
///**
// * This kernel is invoked by NonbondedForce to calculate the forces acting on the system.
// */
//class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
//public:
// OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, System& system) : CalcNonbondedForceKernel(name, platform), cl(cl), system(system) {
// }
// ~OpenCLCalcNonbondedForceKernel();
// /**
// * Initialize the kernel.
// *
// * @param system the System this kernel will be applied to
// * @param force the NonbondedForce this kernel will be used for
// */
// void initialize(const System& system, const NonbondedForce& force);
// /**
// * Execute the kernel to calculate the forces.
// *
// * @param context the context in which to execute this kernel
// */
// void executeForces(ContextImpl& context);
// /**
// * Execute the kernel to calculate the energy.
// *
// * @param context the context in which to execute this kernel
// * @return the potential energy due to the NonbondedForce
// */
// double executeEnergy(ContextImpl& context);
//private:
// OpenCLContext& cl;
// int numParticles;
// System& system;
//};
//
/**
* This kernel is invoked by NonbondedForce to calculate the forces acting on the system.
*/
class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, System& system) : CalcNonbondedForceKernel(name, platform), cl(cl) {
}
~OpenCLCalcNonbondedForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the NonbondedForce this kernel will be used for
*/
void initialize(const System& system, const NonbondedForce& force);
/**
* Execute the kernel to calculate the forces.
*
* @param context the context in which to execute this kernel
*/
void executeForces(ContextImpl& context);
/**
* Execute the kernel to calculate the energy.
*
* @param context the context in which to execute this kernel
* @return the potential energy due to the NonbondedForce
*/
double executeEnergy(ContextImpl& context);
private:
OpenCLContext& cl;
OpenCLArray<mm_float2>* sigmaEpsilon;
double ewaldSelfEnergy;
};
///**
// * This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system.
// */
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLNonbondedUtilities.h"
#include "OpenCLArray.h"
#include <map>
using namespace OpenMM;
using namespace std;
OpenCLNonbondedUtilities::OpenCLNonbondedUtilities(OpenCLContext& context) : context(context), cutoff(-1.0), useCutoff(false),
numForceBuffers(0), tiles(NULL), exclusionIndex(NULL), exclusions(NULL) {
}
OpenCLNonbondedUtilities::~OpenCLNonbondedUtilities() {
if (tiles != NULL)
delete tiles;
if (exclusionIndex != NULL)
delete exclusionIndex;
if (exclusions != NULL)
delete exclusions;
}
void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList) {
if (cutoff != -1.0) {
if (usesCutoff != useCutoff)
throw OpenMMException("All Forces must agree on whether to use a cutoff");
if (usesPeriodic != usePeriodic)
throw OpenMMException("All Forces must agree on whether to use periodic boundary conditions");
if (cutoffDistance != cutoff)
throw OpenMMException("All Forces must use the same cutoff distance");
bool sameExclusions = (exclusionList.size() == atomExclusions.size());
for (int i = 0; i < exclusionList.size() && sameExclusions; i++) {
if (exclusionList[i].size() != atomExclusions[i].size())
sameExclusions = false;
for (int j = 0; j < exclusionList[i].size(); j++)
if (exclusionList[i][j] != atomExclusions[i][j])
sameExclusions = false;
}
if (!sameExclusions)
throw OpenMMException("All Forces must have identical exceptions");
}
else {
useCutoff = usesCutoff;
usePeriodic = usesPeriodic;
cutoff = cutoffDistance;
atomExclusions = exclusionList;
}
}
void OpenCLNonbondedUtilities::addParameter(const string& name, const string& type, int size, cl::Buffer& buffer) {
parameters.push_back(ParameterInfo(name, type, size, buffer));
}
void OpenCLNonbondedUtilities::initialize(const System& system) {
if (cutoff == -1.0)
return; // There are no nonbonded interactions in the System.
// Create the list of tiles.
int numAtomBlocks = context.getNumAtomBlocks();
int numTiles = numAtomBlocks*(numAtomBlocks+1)/2;
tiles = new OpenCLArray<cl_uint>(context, numTiles, "tiles");
vector<cl_uint> tileVec(tiles->getSize());
unsigned int count = 0;
for (unsigned int y = 0; y < numAtomBlocks; y++)
for (unsigned int x = y; x < numAtomBlocks; x++)
tileVec[count++] = (x << 17) | (y << 2);
// Decide how many force buffers to use.
bool forceBufferPerAtomBlock = false;
numForceBuffers = context.getNumThreadBlocks()*OpenCLContext::ThreadBlockSize/OpenCLContext::TileSize;
if (numForceBuffers >= numAtomBlocks) {
// For small systems, it is more efficient to have one force buffer per block of 32 atoms instead of one per warp.
forceBufferPerAtomBlock = true;
numForceBuffers = numAtomBlocks;
}
// Create kernels.
cl::Program forceProgram = context.createProgram(context.loadSourceFromFile("nonbonded.cl"));
forceKernel = cl::Kernel(forceProgram, "computeNonbonded");
// Mark which tiles have exclusions.
for (int atom1 = 0; atom1 < (int) atomExclusions.size(); ++atom1) {
int x = atom1/OpenCLContext::TileSize;
for (int j = 0; j < (int) atomExclusions[atom1].size(); ++j) {
int atom2 = atomExclusions[atom1][j];
int y = atom2/OpenCLContext::TileSize;
int index = (x > y ? x+y*numAtomBlocks-y*(y+1)/2 : y+x*numAtomBlocks-x*(x+1)/2);
tileVec[index] |= 1;
}
}
if (context.getPaddedNumAtoms() > context.getNumAtoms()) {
int lastTile = context.getNumAtoms()/OpenCLContext::TileSize;
for (int i = 0; i < numTiles; ++i) {
int x = tileVec[i]>>17;
int y = (tileVec[i]>>2)&0x7FFF;
if (x == lastTile || y == lastTile)
tileVec[i] |= 1;
}
}
// Build a list of indices for the tiles with exclusions.
exclusionIndex = new OpenCLArray<cl_uint>(context, numTiles, "exclusionIndex");
vector<cl_uint> exclusionIndexVec(exclusionIndex->getSize());
int numWithExclusions = 0;
for (int i = 0; i < numTiles; ++i)
if ((tileVec[i]&1) == 1)
exclusionIndexVec[i] = (numWithExclusions++)*OpenCLContext::TileSize;
// Record the exclusion data.
exclusions = new OpenCLArray<cl_uint>(context, numWithExclusions*OpenCLContext::TileSize, "exclusions");
vector<cl_uint> exclusionVec(exclusions->getSize());
for (int i = 0; i < exclusions->getSize(); ++i)
exclusionVec[i] = 0xFFFFFFFF;
for (int atom1 = 0; atom1 < (int) atomExclusions.size(); ++atom1) {
int x = atom1/OpenCLContext::TileSize;
int offset1 = atom1-x*OpenCLContext::TileSize;
for (int j = 0; j < (int) atomExclusions[atom1].size(); ++j) {
int atom2 = atomExclusions[atom1][j];
int y = atom2/OpenCLContext::TileSize;
int offset2 = atom2-y*OpenCLContext::TileSize;
if (x > y) {
int tile = x+y*numAtomBlocks-y*(y+1)/2;
exclusionVec[exclusionIndexVec[tile]+offset1] &= 0xFFFFFFFF-(1<<offset2);
}
else {
int tile = y+x*numAtomBlocks-x*(x+1)/2;
exclusionVec[exclusionIndexVec[tile]+offset2] &= 0xFFFFFFFF-(1<<offset1);
}
}
}
// Mark all interactions that involve a padding atom as being excluded.
for (int atom1 = context.getNumAtoms(); atom1 < context.getPaddedNumAtoms(); ++atom1) {
int x = atom1/OpenCLContext::TileSize;
int offset1 = atom1-x*OpenCLContext::TileSize;
for (int atom2 = 0; atom2 < context.getPaddedNumAtoms(); ++atom2) {
int y = atom2/OpenCLContext::TileSize;
int offset2 = atom2-y*OpenCLContext::TileSize;
if (x >= y) {
int tile = x+y*numAtomBlocks-y*(y+1)/2;
exclusionVec[exclusionIndexVec[tile]+offset1] &= 0xFFFFFFFF-(1<<offset2);
}
if (y >= x) {
int tile = y+x*numAtomBlocks-x*(x+1)/2;
exclusionVec[exclusionIndexVec[tile]+offset2] &= 0xFFFFFFFF-(1<<offset1);
}
}
}
atomExclusions.clear(); // We won't use this again, so free the memory it used
tiles->upload(tileVec);
exclusions->upload(exclusionVec);
exclusionIndex->upload(exclusionIndexVec);
Vec3 boxVectors[3];
system.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
periodicBoxSize = (mm_float4) {(float) boxVectors[0][0], (float) boxVectors[1][1], (float) boxVectors[2][2], 0.0f};
}
void OpenCLNonbondedUtilities::prepareInteractions() {
hasComputedInteractions = false;
if (!useCutoff)
return;
// TODO compute the neighbor list
}
void OpenCLNonbondedUtilities::computeInteractions() {
if (hasComputedInteractions)
return;
hasComputedInteractions = true;
forceKernel.setArg<cl_int>(0, tiles->getSize());
forceKernel.setArg<cl_int>(1, context.getPaddedNumAtoms());
forceKernel.setArg<cl_float>(2, cutoff*cutoff);
forceKernel.setArg<mm_float4>(3, periodicBoxSize);
forceKernel.setArg<cl::Buffer>(4, context.getForceBuffers().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(5, context.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(6, context.getPosq().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(7, tiles->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(8, exclusions->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(9, exclusionIndex->getDeviceBuffer());
forceKernel.setArg(10, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
forceKernel.setArg(11, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
for (int i = 0; i < (int) parameters.size(); i++) {
forceKernel.setArg<cl::Buffer>(i*2+12, *parameters[i].buffer);
forceKernel.setArg(i*2+13, OpenCLContext::ThreadBlockSize*parameters[i].size, NULL);
}
context.executeKernel(forceKernel, tiles->getSize()*OpenCLContext::TileSize);
}
#ifndef OPENMM_OPENCLNONBONDEDUTILITIES_H_
#define OPENMM_OPENCLNONBONDEDUTILITIES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLContext.h"
#include "openmm/System.h"
#include <string>
#include <vector>
namespace OpenMM {
/**
* This class implements features that are used by several different force. It provides
* a generic interface for calculating nonbonded interactions.
*/
class OpenCLNonbondedUtilities {
public:
OpenCLNonbondedUtilities(OpenCLContext& context);
~OpenCLNonbondedUtilities();
/**
* Add a nonbonded interaction.
*
* @param usesCutoff specifies whether a cutoff should be applied to this interaction
* @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction
* @param cutoffDistance the cutoff distance for this interaction (ignored if usesCutoff is false)
* @param exclusionList for each atom, specifies the list of other atoms whose interactions should be excluded
*/
void addInteraction(bool usesCutoff, bool usesPeriodic, double cutoffDistance, const std::vector<std::vector<int> >& exclusionList);
/**
* Add a per-atom parameter that interactions may depend on.
*
* @param name the name of the parameter
* @param type the data type of the parameter
* @param size the size of the parameter in bytes
* @param buffer the buffer containing the parameter values
*/
void addParameter(const std::string& name, const std::string& type, int size, cl::Buffer& buffer);
/**
* Initialize this object in preparation for a simulation.
*/
void initialize(const System& system);
/**
* Get the number of force buffers required for nonbonded forces.
*/
int getNumForceBuffers() {
return numForceBuffers;
}
/**
* Prepare to compute interactions. This updates the neighbor list.
*/
void prepareInteractions();
/**
* Compute the nonbonded interactions. This will only be executed once after each call to
* prepareInteractions(). Additional calls return immediately without doing anything.
*/
void computeInteractions();
private:
class ParameterInfo;
OpenCLContext& context;
cl::Kernel forceKernel;
OpenCLArray<cl_uint>* tiles;
OpenCLArray<cl_uint>* exclusionIndex;
OpenCLArray<cl_uint>* exclusions;
std::vector<std::vector<int> > atomExclusions;
std::vector<ParameterInfo> parameters;
double cutoff;
bool useCutoff, usePeriodic, hasComputedInteractions;
int numForceBuffers;
mm_float4 periodicBoxSize;
};
class OpenCLNonbondedUtilities::ParameterInfo {
public:
ParameterInfo(const std::string& name, const std::string& type, int size, cl::Buffer& buffer) :
name(name), type(type), size(size), buffer(&buffer) {
}
std::string name;
std::string type;
int size;
cl::Buffer* buffer;
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLNONBONDEDUTILITIES_H_*/
......@@ -51,7 +51,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory(CalcHarmonicAngleForceKernel::Name(), factory);
registerKernelFactory(CalcPeriodicTorsionForceKernel::Name(), factory);
registerKernelFactory(CalcRBTorsionForceKernel::Name(), factory);
// registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
// registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);
// registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
......
const unsigned int TileSize = 32;
const float EpsilonFactor = 138.935485f;
/**
* Compute nonbonded interactions.
*/
__kernel void computeNonbonded(int numTiles, int paddedNumAtoms, float cutoffSquared, float4 periodicBoxSize,
__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* tiles,
__global unsigned int* exclusions, __global unsigned int* exclusionIndices, __local float4* local_posq, __local float4* local_force,
__global float2* sigmaEpsilon, __local float2* local_sigmaEpsilon) {
unsigned int totalWarps = get_global_size(0)/TileSize;
unsigned int warp = get_global_id(0)/TileSize;
unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps;
float energy = 0.0f;
#ifdef USE_CUTOFF
float3* tempBuffer = (float3*) &sA[cSim.nonbond_threads_per_block];
#endif
unsigned int lasty = 0xFFFFFFFF;
while (pos < end) {
// Extract tile coordinates from appropriate work unit
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff)*TileSize;
bool hasExclusions = (x & 0x1);
x = (x>>17)*TileSize;
float4 apos; // Local atom x, y, z, q
float4 af = 0.0f; // Local atom fx, fy, fz
unsigned int tgx = get_local_id(0) & (TileSize-1);
unsigned int tbx = get_local_id(0) - tgx;
unsigned int tj = tgx;
unsigned int i = x + tgx;
apos = posq[i];
float2 a = sigmaEpsilon[i];
if (x == y) {
// Handle diagonals uniquely at 50% efficiency
// Read fixed atom data into registers and GRF
local_posq[get_local_id(0)] = apos;
local_sigmaEpsilon[get_local_id(0)] = a;
barrier(CLK_LOCAL_MEM_FENCE);
apos.w *= EpsilonFactor;
unsigned int xi = x/TileSize;
unsigned int tile = xi+xi*paddedNumAtoms/TileSize-xi*(xi+1)/2;
unsigned int excl = exclusions[exclusionIndices[tile]+tgx];
for (unsigned int j = 0; j < TileSize; j++) {
bool isExcluded = !(excl & 0x1);
float4 delta = (float4) (local_posq[tbx+j].xyz - apos.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x * delta.x + delta.y * delta.y + delta.z * delta.z;
float invR = 1.0f / sqrt(r2);
float sig = a.x + local_sigmaEpsilon[tbx+j].x;
float sig2 = invR * sig;
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float eps = a.y * local_sigmaEpsilon[tbx+j].y;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
float tempEnergy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * local_posq[tbx+j].w * (invR - 2.0f * cSim.reactionFieldK * r2);
tempEnergy += apos.w * local_posq[tbx+j].w * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#else
dEdR += apos.w * local_posq[tbx+j].w * invR;
tempEnergy += apos.w * local_posq[tbx+j].w * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF
if (isExcluded || r2 > cutoffSquared) {
#else
if (isExcluded) {
#endif
dEdR = 0.0f;
tempEnergy = 0.0f;
}
energy += 0.5f*tempEnergy;
delta.xyz *= dEdR;
af.xyz -= delta.xyz;
excl >>= 1;
}
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*paddedNumAtoms;
of = forceBuffers[offset];
of.xyz += af.xyz;
forceBuffers[offset] = of;
#else
of.xyz = af.xyz;
of.w = 0.0f;
unsigned int offset = x + tgx + (x/TileSize) * paddedNumAtoms;
forceBuffers[offset] = of;
#endif
}
else {
// 100% utilization
// Read fixed atom data into registers and GRF
if (lasty != y) {
unsigned int j = y + tgx;
float2 temp1 = sigmaEpsilon[j];
local_posq[get_local_id(0)] = posq[j];
local_sigmaEpsilon[get_local_id(0)] = sigmaEpsilon[j];
}
local_force[get_local_id(0)] = 0.0f;
barrier(CLK_LOCAL_MEM_FENCE);
apos.w *= EpsilonFactor;
#ifdef USE_CUTOFF
unsigned int flags = cSim.pInteractionFlag[pos];
if (!hasExclusions && flags != 0xFFFFFFFF) {
if (flags == 0) {
// No interactions in this tile.
}
else {
// Compute only a subset of the interactions in this tile.
for (unsigned int j = 0; j < TileSize; j++) {
if ((flags&(1<<j)) != 0) {
bool isExcluded = false;
float4 delta = (float4) (local_posq[tbx+j].xyz - apos.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x * delta.x + delta.y * delta.y + delta.z * delta.z;
float invR = 1.0f / sqrt(r2);
float sig = a.x + local_sigmaEpsilon[tbx+j].x;
float sig2 = invR * sig;
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float eps = a.y * local_sigmaEpsilon[tbx+j].Y;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
float tempEnergy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * local_posq[tbx+j].w * (invR - 2.0f * cSim.reactionFieldK * r2);
tempEnergy += apos.w * local_posq[tbx+j].w * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#else
dEdR += apos.w * local_posq[tbx+j].w * invR;
tempEnergy += apos.w * local_posq[tbx+j].w * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF
if (r2 > cutoffSquared) {
dEdR = 0.0f;
tempEnergy = 0.0f;
}
#endif
energy += tempEnergy;
delta.xyz *= dEdR;
af.xyz -= delta.xyz;
tempBuffer[get_local_id(0)] = delta;
// Sum the forces on atom j.
if (tgx % 2 == 0)
tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+1].xyz;
barrier(CLK_LOCAL_MEM_FENCE);
if (tgx % 4 == 0)
tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+2].xyz;
barrier(CLK_LOCAL_MEM_FENCE);
if (tgx % 8 == 0)
tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+4].xyz;
barrier(CLK_LOCAL_MEM_FENCE);
if (tgx % 16 == 0)
tempBuffer[get_local_id(0)].xyz += tempBuffer[get_local_id(0)+8].xyz;
barrier(CLK_LOCAL_MEM_FENCE);
if (tgx == 0)
local_force[tbx+j].xyz += tempBuffer[get_local_id(0)].xyz + tempBuffer[get_local_id(0)+16].xyz;
barrier(CLK_LOCAL_MEM_FENCE);
}
}
}
}
else // bExclusion
#endif
{
// Read fixed atom data into registers and GRF
unsigned int xi = x/TileSize;
unsigned int yi = y/TileSize;
unsigned int tile = xi+yi*paddedNumAtoms/TileSize-yi*(yi+1)/2;
unsigned int excl = exclusions[exclusionIndices[tile]+tgx];
excl = (excl >> tgx) | (excl << (TileSize - tgx));
for (unsigned int j = 0; j < TileSize; j++) {
bool isExcluded = !(excl & 0x1);
float4 delta = (float4) (local_posq[tbx+tj].xyz - apos.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/periodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y/periodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z/periodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
float r2 = delta.x * delta.x + delta.y * delta.y + delta.z * delta.z;
float invR = 1.0f / sqrt(r2);
float sig = a.x + local_sigmaEpsilon[tbx+j].x;
float sig2 = invR * sig;
sig2 *= sig2;
float sig6 = sig2 * sig2 * sig2;
float eps = a.y * local_sigmaEpsilon[tbx+j].y;
float dEdR = eps * (12.0f * sig6 - 6.0f) * sig6;
float tempEnergy = eps * (sig6 - 1.0f) * sig6;
#ifdef USE_CUTOFF
dEdR += apos.w * local_posq[tbx+j].w * (invR - 2.0f * cSim.reactionFieldK * r2);
tempEnergy += apos.w * local_posq[tbx+j].w * (invR + cSim.reactionFieldK * r2 - cSim.reactionFieldC);
#else
dEdR += apos.w * local_posq[tbx+j].w * invR;
tempEnergy += apos.w * local_posq[tbx+j].w * invR;
#endif
dEdR *= invR * invR;
#ifdef USE_CUTOFF
if (isExcluded || r2 > cutoffSquared) {
#else
if (isExcluded) {
#endif
dEdR = 0.0f;
tempEnergy = 0.0f;
}
energy += tempEnergy;
delta.xyz *= dEdR;
af.xyz -= delta.xyz;
local_force[tbx+tj].xyz += delta.xyz;
barrier(CLK_LOCAL_MEM_FENCE);
excl >>= 1;
tj = (tj + 1) & (TileSize - 1);
}
}
// Write results
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = x + tgx + warp*paddedNumAtoms;
of = forceBuffers[offset];
of.xyz += af.xyz;
forceBuffers[offset] = of;
offset = y + tgx + warp*paddedNumAtoms;
of = forceBuffers[offset];
of.xyz += local_foce[get_local_id(0)].xyz;
forceBuffers[offset] = of;
#else
of.xyz = af.xyz;
of.w = 0.0f;
unsigned int offset = x + tgx + (y/TileSize) * paddedNumAtoms;
forceBuffers[offset] = of;
of = local_force[get_local_id(0)];
offset = y + tgx + (x/TileSize) * paddedNumAtoms;
forceBuffers[offset] = of;
#endif
lasty = y;
}
pos++;
}
energyBuffer[get_global_id(0)] += energy;
}
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment