Commit 40891bab authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimization to OpenCL platform: use tabulated values for erfc()

parent 3f2c29fb
...@@ -214,8 +214,8 @@ void OpenCLContext::clearBuffer(OpenCLArray<mm_float4>& array) { ...@@ -214,8 +214,8 @@ void OpenCLContext::clearBuffer(OpenCLArray<mm_float4>& array) {
clearBuffer(array.getDeviceBuffer(), array.getSize()*4); clearBuffer(array.getDeviceBuffer(), array.getSize()*4);
} }
void OpenCLContext::clearBuffer(cl::Buffer& buffer, int size) { void OpenCLContext::clearBuffer(cl::Memory& memory, int size) {
clearBufferKernel.setArg<cl::Buffer>(0, buffer); clearBufferKernel.setArg<cl::Memory>(0, memory);
clearBufferKernel.setArg<cl_int>(1, size); clearBufferKernel.setArg<cl_int>(1, size);
executeKernel(clearBufferKernel, size); executeKernel(clearBufferKernel, size);
} }
......
...@@ -230,10 +230,10 @@ public: ...@@ -230,10 +230,10 @@ public:
/** /**
* Set all elements of an array to 0. * Set all elements of an array to 0.
* *
* @param buffer the Buffer to clear * @param memory the Memory to clear
* @param size the number of float elements in the buffer * @param size the number of float elements in the buffer
*/ */
void clearBuffer(cl::Buffer& buffer, int size); void clearBuffer(cl::Memory& memory, int size);
/** /**
* Given a collection of buffers packed into an array, sum them and store * Given a collection of buffers packed into an array, sum them and store
* the sum in the first buffer. * the sum in the first buffer.
......
...@@ -35,10 +35,11 @@ ...@@ -35,10 +35,11 @@
#include "OpenCLIntegrationUtilities.h" #include "OpenCLIntegrationUtilities.h"
#include "OpenCLNonbondedUtilities.h" #include "OpenCLNonbondedUtilities.h"
#include "OpenCLKernelSources.h" #include "OpenCLKernelSources.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h" #include "lepton/Parser.h"
#include "lepton/ParsedExpression.h" #include "lepton/ParsedExpression.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h" #include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "lepton/Operation.h" #include "openmm/internal/MSVC_erfc.h"
#include <cmath> #include <cmath>
#include <set> #include <set>
...@@ -390,7 +391,7 @@ void OpenCLCalcCustomBondForceKernel::executeForces(ContextImpl& context) { ...@@ -390,7 +391,7 @@ void OpenCLCalcCustomBondForceKernel::executeForces(ContextImpl& context) {
kernel.setArg<cl::Buffer>(nextIndex++, globals->getDeviceBuffer()); kernel.setArg<cl::Buffer>(nextIndex++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
kernel.setArg<cl::Buffer>(nextIndex++, buffer.getBuffer()); kernel.setArg<cl::Memory>(nextIndex++, buffer.getMemory());
} }
} }
cl.executeKernel(kernel, numBonds); cl.executeKernel(kernel, numBonds);
...@@ -625,7 +626,7 @@ void OpenCLCalcCustomAngleForceKernel::executeForces(ContextImpl& context) { ...@@ -625,7 +626,7 @@ void OpenCLCalcCustomAngleForceKernel::executeForces(ContextImpl& context) {
kernel.setArg<cl::Buffer>(nextIndex++, globals->getDeviceBuffer()); kernel.setArg<cl::Buffer>(nextIndex++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
kernel.setArg<cl::Buffer>(nextIndex++, buffer.getBuffer()); kernel.setArg<cl::Memory>(nextIndex++, buffer.getMemory());
} }
} }
cl.executeKernel(kernel, numAngles); cl.executeKernel(kernel, numAngles);
...@@ -947,7 +948,7 @@ void OpenCLCalcCustomTorsionForceKernel::executeForces(ContextImpl& context) { ...@@ -947,7 +948,7 @@ void OpenCLCalcCustomTorsionForceKernel::executeForces(ContextImpl& context) {
kernel.setArg<cl::Buffer>(nextIndex++, globals->getDeviceBuffer()); kernel.setArg<cl::Buffer>(nextIndex++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
kernel.setArg<cl::Buffer>(nextIndex++, buffer.getBuffer()); kernel.setArg<cl::Memory>(nextIndex++, buffer.getMemory());
} }
} }
cl.executeKernel(kernel, numTorsions); cl.executeKernel(kernel, numTorsions);
...@@ -1015,6 +1016,8 @@ OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() { ...@@ -1015,6 +1016,8 @@ OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
delete pmeAtomRange; delete pmeAtomRange;
if (pmeAtomGridIndex != NULL) if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex; delete pmeAtomGridIndex;
if (erfcTable != NULL)
delete erfcTable;
if (sort != NULL) if (sort != NULL)
delete sort; delete sort;
if (fft != NULL) if (fft != NULL)
...@@ -1079,16 +1082,15 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1079,16 +1082,15 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines["REACTION_FIELD_K"] = doubleToString(reactionFieldK); defines["REACTION_FIELD_K"] = doubleToString(reactionFieldK);
defines["REACTION_FIELD_C"] = doubleToString(reactionFieldC); defines["REACTION_FIELD_C"] = doubleToString(reactionFieldC);
} }
double alpha = 0;
if (force.getNonbondedMethod() == NonbondedForce::Ewald) { if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
// Compute the Ewald parameters. // Compute the Ewald parameters.
double alpha;
int kmaxx, kmaxy, kmaxz; int kmaxx, kmaxy, kmaxz;
NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz); NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
defines["EWALD_ALPHA"] = doubleToString(alpha); defines["EWALD_ALPHA"] = doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI)); defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1"; defines["USE_EWALD"] = "1";
double selfEnergyScale = ONE_4PI_EPS0*alpha/std::sqrt(M_PI);
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI); ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI);
// Create the reciprocal space kernels. // Create the reciprocal space kernels.
...@@ -1111,7 +1113,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1111,7 +1113,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
else if (force.getNonbondedMethod() == NonbondedForce::PME) { else if (force.getNonbondedMethod() == NonbondedForce::PME) {
// Compute the PME parameters. // Compute the PME parameters.
double alpha;
int gridSizeX, gridSizeY, gridSizeZ; int gridSizeX, gridSizeY, gridSizeZ;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ); NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
gridSizeX = OpenCLFFT3D::findLegalDimension(gridSizeX); gridSizeX = OpenCLFFT3D::findLegalDimension(gridSizeX);
...@@ -1120,7 +1121,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1120,7 +1121,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines["EWALD_ALPHA"] = doubleToString(alpha); defines["EWALD_ALPHA"] = doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI)); defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1"; defines["USE_EWALD"] = "1";
double selfEnergyScale = ONE_4PI_EPS0*alpha/std::sqrt(M_PI);
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI); ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI);
pmeDefines["PME_ORDER"] = intToString(PmeOrder); pmeDefines["PME_ORDER"] = intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = intToString(numParticles); pmeDefines["NUM_ATOMS"] = intToString(numParticles);
...@@ -1205,6 +1205,37 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1205,6 +1205,37 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
} }
else else
ewaldSelfEnergy = 0.0; ewaldSelfEnergy = 0.0;
// Tabulate values of erfc().
if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
if (cl.getDevice().getInfo<CL_DEVICE_IMAGE_SUPPORT>()) {
try
{
const int tableSize = 2048;
defines["USE_TABULATED_ERFC"] = "1";
defines["ERFC_TABLE_SCALE"] = doubleToString((tableSize-1)/(alpha*force.getCutoffDistance()));
erfcTable = new cl::Image2D(cl.getContext(), CL_MEM_READ_ONLY, cl::ImageFormat(CL_INTENSITY, CL_FLOAT), tableSize, 1, 0);
vector<cl_float> erfcVector(tableSize);
for (int i = 0; i < tableSize; ++i)
erfcVector[i] = (float) erfc(i*(alpha*force.getCutoffDistance())/(tableSize-1));
cl::size_t<3> origin, region;
origin.push_back(0);
origin.push_back(0);
origin.push_back(0);
region.push_back(tableSize);
region.push_back(1);
region.push_back(1);
cl.getQueue().enqueueWriteImage(*erfcTable, CL_TRUE, origin, region, 0, 0, &erfcVector[0]);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo("erfcTable", "image2d_t", sizeof(cl_float), *erfcTable));
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error creating erfc() image: "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
}
// Add the interaction to the default nonbonded kernel. // Add the interaction to the default nonbonded kernel.
...@@ -1466,7 +1497,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons ...@@ -1466,7 +1497,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source); cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source);
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"params"+intToString(i+1), buffer.getType(), buffer.getSize(), buffer.getBuffer())); cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"params"+intToString(i+1), buffer.getType(), buffer.getSize(), buffer.getMemory()));
} }
if (globals != NULL) { if (globals != NULL) {
globals->upload(globalParamValues); globals->upload(globalParamValues);
...@@ -2149,17 +2180,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -2149,17 +2180,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = prefix+"params"+intToString(i+1); string paramName = prefix+"params"+intToString(i+1);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getBuffer())); cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getMemory()));
} }
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) { for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string paramName = prefix+"values"+intToString(i+1); string paramName = prefix+"values"+intToString(i+1);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getBuffer())); cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getMemory()));
} }
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string paramName = prefix+"dEdV"+intToString(i+1); string paramName = prefix+"dEdV"+intToString(i+1);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getBuffer())); cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getMemory()));
} }
if (globals != NULL) { if (globals != NULL) {
globals->upload(globalParamValues); globals->upload(globalParamValues);
...@@ -2195,7 +2226,7 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) { ...@@ -2195,7 +2226,7 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
pairValueKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
pairValueKernel.setArg<cl::Buffer>(index++, buffer.getBuffer()); pairValueKernel.setArg<cl::Memory>(index++, buffer.getMemory());
pairValueKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL); pairValueKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
} }
if (tabulatedFunctionParams != NULL) { if (tabulatedFunctionParams != NULL) {
...@@ -2210,9 +2241,9 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) { ...@@ -2210,9 +2241,9 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
if (globals != NULL) if (globals != NULL)
perParticleValueKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); perParticleValueKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) for (int i = 0; i < (int) params->getBuffers().size(); i++)
perParticleValueKernel.setArg<cl::Buffer>(index++, params->getBuffers()[i].getBuffer()); perParticleValueKernel.setArg<cl::Memory>(index++, params->getBuffers()[i].getMemory());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
perParticleValueKernel.setArg<cl::Buffer>(index++, computedValues->getBuffers()[i].getBuffer()); perParticleValueKernel.setArg<cl::Memory>(index++, computedValues->getBuffers()[i].getMemory());
if (tabulatedFunctionParams != NULL) { if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
perParticleValueKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer()); perParticleValueKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
...@@ -2240,17 +2271,17 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) { ...@@ -2240,17 +2271,17 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
pairEnergyKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
pairEnergyKernel.setArg<cl::Buffer>(index++, buffer.getBuffer()); pairEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL); pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
} }
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) { for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
pairEnergyKernel.setArg<cl::Buffer>(index++, buffer.getBuffer()); pairEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL); pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
} }
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
pairEnergyKernel.setArg<cl::Buffer>(index++, buffer.getBuffer()); pairEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL); pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
} }
if (tabulatedFunctionParams != NULL) { if (tabulatedFunctionParams != NULL) {
...@@ -2265,11 +2296,11 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) { ...@@ -2265,11 +2296,11 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
if (globals != NULL) if (globals != NULL)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); perParticleEnergyKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) for (int i = 0; i < (int) params->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, params->getBuffers()[i].getBuffer()); perParticleEnergyKernel.setArg<cl::Memory>(index++, params->getBuffers()[i].getMemory());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, computedValues->getBuffers()[i].getBuffer()); perParticleEnergyKernel.setArg<cl::Memory>(index++, computedValues->getBuffers()[i].getMemory());
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, energyDerivs->getBuffers()[i].getBuffer()); perParticleEnergyKernel.setArg<cl::Memory>(index++, energyDerivs->getBuffers()[i].getMemory());
if (tabulatedFunctionParams != NULL) { if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer()); perParticleEnergyKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
...@@ -2290,7 +2321,7 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) { ...@@ -2290,7 +2321,7 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
cl.clearBuffer(*valueBuffers); cl.clearBuffer(*valueBuffers);
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) { for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
cl.clearBuffer(buffer.getBuffer(), buffer.getSize()*energyDerivs->getNumObjects()/sizeof(cl_float)); cl.clearBuffer(buffer.getMemory(), buffer.getSize()*energyDerivs->getNumObjects()/sizeof(cl_float));
} }
cl.executeKernel(pairValueKernel, nb.getTiles().getSize()*OpenCLContext::TileSize); cl.executeKernel(pairValueKernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(perParticleValueKernel, cl.getPaddedNumAtoms()); cl.executeKernel(perParticleValueKernel, cl.getPaddedNumAtoms());
...@@ -2440,7 +2471,7 @@ void OpenCLCalcCustomExternalForceKernel::executeForces(ContextImpl& context) { ...@@ -2440,7 +2471,7 @@ void OpenCLCalcCustomExternalForceKernel::executeForces(ContextImpl& context) {
kernel.setArg<cl::Buffer>(nextIndex++, globals->getDeviceBuffer()); kernel.setArg<cl::Buffer>(nextIndex++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
kernel.setArg<cl::Buffer>(nextIndex++, buffer.getBuffer()); kernel.setArg<cl::Memory>(nextIndex++, buffer.getMemory());
} }
} }
cl.executeKernel(kernel, numParticles); cl.executeKernel(kernel, numParticles);
...@@ -2905,11 +2936,11 @@ void OpenCLCalcCustomHbondForceKernel::executeForces(ContextImpl& context) { ...@@ -2905,11 +2936,11 @@ void OpenCLCalcCustomHbondForceKernel::executeForces(ContextImpl& context) {
donorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); donorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) { for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
donorKernel.setArg<cl::Buffer>(index++, buffer.getBuffer()); donorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
} }
for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) { for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
donorKernel.setArg<cl::Buffer>(index++, buffer.getBuffer()); donorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
} }
if (tabulatedFunctionParams != NULL) { if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
...@@ -2929,11 +2960,11 @@ void OpenCLCalcCustomHbondForceKernel::executeForces(ContextImpl& context) { ...@@ -2929,11 +2960,11 @@ void OpenCLCalcCustomHbondForceKernel::executeForces(ContextImpl& context) {
acceptorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) { for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
acceptorKernel.setArg<cl::Buffer>(index++, buffer.getBuffer()); acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
} }
for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) { for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i]; const OpenCLNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
acceptorKernel.setArg<cl::Buffer>(index++, buffer.getBuffer()); acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
} }
if (tabulatedFunctionParams != NULL) { if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
......
...@@ -438,7 +438,7 @@ public: ...@@ -438,7 +438,7 @@ public:
OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, System& system) : CalcNonbondedForceKernel(name, platform), OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, System& system) : CalcNonbondedForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), sigmaEpsilon(NULL), exceptionParams(NULL), exceptionIndices(NULL), cosSinSums(NULL), pmeGrid(NULL), hasInitializedKernel(false), cl(cl), sigmaEpsilon(NULL), exceptionParams(NULL), exceptionIndices(NULL), cosSinSums(NULL), pmeGrid(NULL),
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeBsplineTheta(NULL), pmeBsplineDtheta(NULL), pmeAtomRange(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeBsplineTheta(NULL), pmeBsplineDtheta(NULL), pmeAtomRange(NULL),
pmeAtomGridIndex(NULL), sort(NULL), fft(NULL) { pmeAtomGridIndex(NULL), erfcTable(NULL), sort(NULL), fft(NULL) {
} }
~OpenCLCalcNonbondedForceKernel(); ~OpenCLCalcNonbondedForceKernel();
/** /**
...@@ -478,6 +478,7 @@ private: ...@@ -478,6 +478,7 @@ private:
OpenCLArray<mm_float2>* pmeAtomGridIndex; OpenCLArray<mm_float2>* pmeAtomGridIndex;
OpenCLSort<mm_float2>* sort; OpenCLSort<mm_float2>* sort;
OpenCLFFT3D* fft; OpenCLFFT3D* fft;
cl::Image2D* erfcTable;
cl::Kernel exceptionsKernel; cl::Kernel exceptionsKernel;
cl::Kernel ewaldSumsKernel; cl::Kernel ewaldSumsKernel;
cl::Kernel ewaldForcesKernel; cl::Kernel ewaldForcesKernel;
......
...@@ -294,13 +294,19 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -294,13 +294,19 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
args << params[i].getName(); args << params[i].getName();
} }
for (int i = 0; i < (int) arguments.size(); i++) { for (int i = 0; i < (int) arguments.size(); i++) {
if ((arguments[i].getBuffer().getInfo<CL_MEM_FLAGS>() & CL_MEM_READ_ONLY) == 0) if (arguments[i].getMemory().getInfo<CL_MEM_TYPE>() == CL_MEM_OBJECT_IMAGE2D) {
args << ", __global "; args << ", __read_only image2d_t ";
else args << arguments[i].getName();
args << ", __constant "; }
args << arguments[i].getType(); else {
args << "* "; if ((arguments[i].getMemory().getInfo<CL_MEM_FLAGS>() & CL_MEM_READ_ONLY) == 0)
args << arguments[i].getName(); args << ", __global ";
else
args << ", __constant ";
args << arguments[i].getType();
args << "* ";
args << arguments[i].getName();
}
} }
replacements["PARAMETER_ARGUMENTS"] = args.str(); replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream loadLocal1; stringstream loadLocal1;
...@@ -388,12 +394,12 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -388,12 +394,12 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel.setArg<cl_uint>(9, tiles->getSize()); kernel.setArg<cl_uint>(9, tiles->getSize());
} }
for (int i = 0; i < (int) params.size(); i++) { for (int i = 0; i < (int) params.size(); i++) {
kernel.setArg<cl::Buffer>(i*2+paramBase, params[i].getBuffer()); kernel.setArg<cl::Memory>(i*2+paramBase, params[i].getMemory());
kernel.setArg(i*2+paramBase+1, OpenCLContext::ThreadBlockSize*params[i].getSize(), NULL); kernel.setArg(i*2+paramBase+1, OpenCLContext::ThreadBlockSize*params[i].getSize(), NULL);
} }
paramBase += 2*params.size(); paramBase += 2*params.size();
for (int i = 0; i < (int) arguments.size(); i++) { for (int i = 0; i < (int) arguments.size(); i++) {
kernel.setArg<cl::Buffer>(i+paramBase, arguments[i].getBuffer()); kernel.setArg<cl::Memory>(i+paramBase, arguments[i].getMemory());
} }
return kernel; return kernel;
} }
...@@ -231,10 +231,10 @@ public: ...@@ -231,10 +231,10 @@ public:
* @param name the name of the parameter * @param name the name of the parameter
* @param type the data type of the parameter * @param type the data type of the parameter
* @param size the size of the parameter in bytes * @param size the size of the parameter in bytes
* @param buffer the buffer containing the parameter values * @param memory the memory containing the parameter values
*/ */
ParameterInfo(const std::string& name, const std::string& type, int size, cl::Buffer& buffer) : ParameterInfo(const std::string& name, const std::string& type, int size, cl::Memory& memory) :
name(name), type(type), size(size), buffer(&buffer) { name(name), type(type), size(size), memory(&memory) {
} }
const std::string& getName() const { const std::string& getName() const {
return name; return name;
...@@ -245,14 +245,14 @@ public: ...@@ -245,14 +245,14 @@ public:
int getSize() const { int getSize() const {
return size; return size;
} }
cl::Buffer& getBuffer() const { cl::Memory& getMemory() const {
return *buffer; return *memory;
} }
private: private:
std::string name; std::string name;
std::string type; std::string type;
int size; int size;
cl::Buffer* buffer; cl::Memory* memory;
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -67,7 +67,7 @@ OpenCLParameterSet::OpenCLParameterSet(OpenCLContext& context, int numParameters ...@@ -67,7 +67,7 @@ OpenCLParameterSet::OpenCLParameterSet(OpenCLContext& context, int numParameters
OpenCLParameterSet::~OpenCLParameterSet() { OpenCLParameterSet::~OpenCLParameterSet() {
for (int i = 0; i < (int) buffers.size(); i++) for (int i = 0; i < (int) buffers.size(); i++)
delete &buffers[i].getBuffer(); delete &buffers[i].getMemory();
} }
void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) const { void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) const {
...@@ -79,7 +79,7 @@ void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) c ...@@ -79,7 +79,7 @@ void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) c
for (int i = 0; i < (int) buffers.size(); i++) { for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getType() == "float4") { if (buffers[i].getType() == "float4") {
vector<mm_float4> data(numObjects); vector<mm_float4> data(numObjects);
context.getQueue().enqueueReadBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]); context.getQueue().enqueueReadBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++) { for (int j = 0; j < numObjects; j++) {
values[j][base] = data[j].x; values[j][base] = data[j].x;
if (base+1 < numParameters) if (base+1 < numParameters)
...@@ -93,7 +93,7 @@ void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) c ...@@ -93,7 +93,7 @@ void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) c
} }
else if (buffers[i].getType() == "float2") { else if (buffers[i].getType() == "float2") {
vector<mm_float2> data(numObjects); vector<mm_float2> data(numObjects);
context.getQueue().enqueueReadBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]); context.getQueue().enqueueReadBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++) { for (int j = 0; j < numObjects; j++) {
values[j][base] = data[j].x; values[j][base] = data[j].x;
if (base+1 < numParameters) if (base+1 < numParameters)
...@@ -103,7 +103,7 @@ void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) c ...@@ -103,7 +103,7 @@ void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) c
} }
else if (buffers[i].getType() == "float") { else if (buffers[i].getType() == "float") {
vector<cl_float> data(numObjects); vector<cl_float> data(numObjects);
context.getQueue().enqueueReadBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]); context.getQueue().enqueueReadBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++) for (int j = 0; j < numObjects; j++)
values[j][base] = data[j]; values[j][base] = data[j];
} }
...@@ -133,7 +133,7 @@ void OpenCLParameterSet::setParameterValues(const vector<vector<cl_float> >& val ...@@ -133,7 +133,7 @@ void OpenCLParameterSet::setParameterValues(const vector<vector<cl_float> >& val
if (base+3 < numParameters) if (base+3 < numParameters)
data[j].w = values[j][base+3]; data[j].w = values[j][base+3];
} }
context.getQueue().enqueueWriteBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]); context.getQueue().enqueueWriteBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
base += 4; base += 4;
} }
else if (buffers[i].getType() == "float2") { else if (buffers[i].getType() == "float2") {
...@@ -143,14 +143,14 @@ void OpenCLParameterSet::setParameterValues(const vector<vector<cl_float> >& val ...@@ -143,14 +143,14 @@ void OpenCLParameterSet::setParameterValues(const vector<vector<cl_float> >& val
if (base+1 < numParameters) if (base+1 < numParameters)
data[j].y = values[j][base+1]; data[j].y = values[j][base+1];
} }
context.getQueue().enqueueWriteBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]); context.getQueue().enqueueWriteBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
base += 2; base += 2;
} }
else if (buffers[i].getType() == "float") { else if (buffers[i].getType() == "float") {
vector<cl_float> data(numObjects); vector<cl_float> data(numObjects);
for (int j = 0; j < numObjects; j++) for (int j = 0; j < numObjects; j++)
data[j] = values[j][base]; data[j] = values[j][base];
context.getQueue().enqueueWriteBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]); context.getQueue().enqueueWriteBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
} }
else else
throw OpenMMException("Internal error: Unknown buffer type in OpenCLParameterSet"); throw OpenMMException("Internal error: Unknown buffer type in OpenCLParameterSet");
......
...@@ -3,7 +3,15 @@ bool needCorrection = isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2 ...@@ -3,7 +3,15 @@ bool needCorrection = isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2
if (!isExcluded || needCorrection) { if (!isExcluded || needCorrection) {
const float prefactor = 138.935456f*posq1.w*posq2.w*invR; const float prefactor = 138.935456f*posq1.w*posq2.w*invR;
float alphaR = EWALD_ALPHA*r; float alphaR = EWALD_ALPHA*r;
#ifdef USE_TABULATED_ERFC
float normalized = ERFC_TABLE_SCALE*alphaR;
int tableIndex = (int) normalized;
float fract2 = normalized-tableIndex;
float fract1 = 1.0f-fract2;
float erfcAlphaR = fract1*read_imagef(erfcTable, sampler, (int2)(tableIndex, 0)).x + fract2*read_imagef(erfcTable, sampler, (int2)(tableIndex+1, 0)).x;
#else
float erfcAlphaR = erfc(alphaR); float erfcAlphaR = erfc(alphaR);
#endif
float tempForce = 0.0f; float tempForce = 0.0f;
if (needCorrection) { if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution. // Subtract off the part of this interaction that was included in the reciprocal space contribution.
......
#define TILE_SIZE 32 #define TILE_SIZE 32
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
/** /**
* Compute nonbonded interactions. * Compute nonbonded interactions.
......
#define TILE_SIZE 32 #define TILE_SIZE 32
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
/** /**
* Compute nonbonded interactions. * Compute nonbonded interactions.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment