Commit 40891bab authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimization to OpenCL platform: use tabulated values for erfc()

parent 3f2c29fb
......@@ -214,8 +214,8 @@ void OpenCLContext::clearBuffer(OpenCLArray<mm_float4>& array) {
clearBuffer(array.getDeviceBuffer(), array.getSize()*4);
}
void OpenCLContext::clearBuffer(cl::Buffer& buffer, int size) {
clearBufferKernel.setArg<cl::Buffer>(0, buffer);
void OpenCLContext::clearBuffer(cl::Memory& memory, int size) {
clearBufferKernel.setArg<cl::Memory>(0, memory);
clearBufferKernel.setArg<cl_int>(1, size);
executeKernel(clearBufferKernel, size);
}
......
......@@ -230,10 +230,10 @@ public:
/**
* Set all elements of an array to 0.
*
* @param buffer the Buffer to clear
* @param memory the Memory to clear
* @param size the number of float elements in the buffer
*/
void clearBuffer(cl::Buffer& buffer, int size);
void clearBuffer(cl::Memory& memory, int size);
/**
* Given a collection of buffers packed into an array, sum them and store
* the sum in the first buffer.
......
......@@ -35,10 +35,11 @@
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLNonbondedUtilities.h"
#include "OpenCLKernelSources.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "lepton/Operation.h"
#include "openmm/internal/MSVC_erfc.h"
#include <cmath>
#include <set>
......@@ -390,7 +391,7 @@ void OpenCLCalcCustomBondForceKernel::executeForces(ContextImpl& context) {
kernel.setArg<cl::Buffer>(nextIndex++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
kernel.setArg<cl::Buffer>(nextIndex++, buffer.getBuffer());
kernel.setArg<cl::Memory>(nextIndex++, buffer.getMemory());
}
}
cl.executeKernel(kernel, numBonds);
......@@ -625,7 +626,7 @@ void OpenCLCalcCustomAngleForceKernel::executeForces(ContextImpl& context) {
kernel.setArg<cl::Buffer>(nextIndex++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
kernel.setArg<cl::Buffer>(nextIndex++, buffer.getBuffer());
kernel.setArg<cl::Memory>(nextIndex++, buffer.getMemory());
}
}
cl.executeKernel(kernel, numAngles);
......@@ -947,7 +948,7 @@ void OpenCLCalcCustomTorsionForceKernel::executeForces(ContextImpl& context) {
kernel.setArg<cl::Buffer>(nextIndex++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
kernel.setArg<cl::Buffer>(nextIndex++, buffer.getBuffer());
kernel.setArg<cl::Memory>(nextIndex++, buffer.getMemory());
}
}
cl.executeKernel(kernel, numTorsions);
......@@ -1015,6 +1016,8 @@ OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
delete pmeAtomGridIndex;
if (erfcTable != NULL)
delete erfcTable;
if (sort != NULL)
delete sort;
if (fft != NULL)
......@@ -1079,16 +1082,15 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines["REACTION_FIELD_K"] = doubleToString(reactionFieldK);
defines["REACTION_FIELD_C"] = doubleToString(reactionFieldC);
}
double alpha = 0;
if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
// Compute the Ewald parameters.
double alpha;
int kmaxx, kmaxy, kmaxz;
NonbondedForceImpl::calcEwaldParameters(system, force, alpha, kmaxx, kmaxy, kmaxz);
defines["EWALD_ALPHA"] = doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
double selfEnergyScale = ONE_4PI_EPS0*alpha/std::sqrt(M_PI);
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI);
// Create the reciprocal space kernels.
......@@ -1111,7 +1113,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
else if (force.getNonbondedMethod() == NonbondedForce::PME) {
// Compute the PME parameters.
double alpha;
int gridSizeX, gridSizeY, gridSizeZ;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
gridSizeX = OpenCLFFT3D::findLegalDimension(gridSizeX);
......@@ -1120,7 +1121,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines["EWALD_ALPHA"] = doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
double selfEnergyScale = ONE_4PI_EPS0*alpha/std::sqrt(M_PI);
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/std::sqrt(M_PI);
pmeDefines["PME_ORDER"] = intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = intToString(numParticles);
......@@ -1205,6 +1205,37 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
}
else
ewaldSelfEnergy = 0.0;
// Tabulate values of erfc().
if (force.getNonbondedMethod() == NonbondedForce::Ewald || force.getNonbondedMethod() == NonbondedForce::PME) {
if (cl.getDevice().getInfo<CL_DEVICE_IMAGE_SUPPORT>()) {
try
{
const int tableSize = 2048;
defines["USE_TABULATED_ERFC"] = "1";
defines["ERFC_TABLE_SCALE"] = doubleToString((tableSize-1)/(alpha*force.getCutoffDistance()));
erfcTable = new cl::Image2D(cl.getContext(), CL_MEM_READ_ONLY, cl::ImageFormat(CL_INTENSITY, CL_FLOAT), tableSize, 1, 0);
vector<cl_float> erfcVector(tableSize);
for (int i = 0; i < tableSize; ++i)
erfcVector[i] = (float) erfc(i*(alpha*force.getCutoffDistance())/(tableSize-1));
cl::size_t<3> origin, region;
origin.push_back(0);
origin.push_back(0);
origin.push_back(0);
region.push_back(tableSize);
region.push_back(1);
region.push_back(1);
cl.getQueue().enqueueWriteImage(*erfcTable, CL_TRUE, origin, region, 0, 0, &erfcVector[0]);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo("erfcTable", "image2d_t", sizeof(cl_float), *erfcTable));
}
catch (cl::Error err) {
std::stringstream str;
str<<"Error creating erfc() image: "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
}
// Add the interaction to the default nonbonded kernel.
......@@ -1466,7 +1497,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source);
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"params"+intToString(i+1), buffer.getType(), buffer.getSize(), buffer.getBuffer()));
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"params"+intToString(i+1), buffer.getType(), buffer.getSize(), buffer.getMemory()));
}
if (globals != NULL) {
globals->upload(globalParamValues);
......@@ -2149,17 +2180,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = prefix+"params"+intToString(i+1);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getBuffer()));
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getMemory()));
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string paramName = prefix+"values"+intToString(i+1);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getBuffer()));
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getMemory()));
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string paramName = prefix+"dEdV"+intToString(i+1);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getBuffer()));
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(paramName, buffer.getType(), buffer.getSize(), buffer.getMemory()));
}
if (globals != NULL) {
globals->upload(globalParamValues);
......@@ -2195,7 +2226,7 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
pairValueKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
pairValueKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
pairValueKernel.setArg<cl::Memory>(index++, buffer.getMemory());
pairValueKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
}
if (tabulatedFunctionParams != NULL) {
......@@ -2210,9 +2241,9 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
if (globals != NULL)
perParticleValueKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++)
perParticleValueKernel.setArg<cl::Buffer>(index++, params->getBuffers()[i].getBuffer());
perParticleValueKernel.setArg<cl::Memory>(index++, params->getBuffers()[i].getMemory());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
perParticleValueKernel.setArg<cl::Buffer>(index++, computedValues->getBuffers()[i].getBuffer());
perParticleValueKernel.setArg<cl::Memory>(index++, computedValues->getBuffers()[i].getMemory());
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
perParticleValueKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
......@@ -2240,17 +2271,17 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
pairEnergyKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
pairEnergyKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
pairEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
pairEnergyKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
pairEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
pairEnergyKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
pairEnergyKernel.setArg<cl::Memory>(index++, buffer.getMemory());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
}
if (tabulatedFunctionParams != NULL) {
......@@ -2265,11 +2296,11 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
if (globals != NULL)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, params->getBuffers()[i].getBuffer());
perParticleEnergyKernel.setArg<cl::Memory>(index++, params->getBuffers()[i].getMemory());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, computedValues->getBuffers()[i].getBuffer());
perParticleEnergyKernel.setArg<cl::Memory>(index++, computedValues->getBuffers()[i].getMemory());
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, energyDerivs->getBuffers()[i].getBuffer());
perParticleEnergyKernel.setArg<cl::Memory>(index++, energyDerivs->getBuffers()[i].getMemory());
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
......@@ -2290,7 +2321,7 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
cl.clearBuffer(*valueBuffers);
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
cl.clearBuffer(buffer.getBuffer(), buffer.getSize()*energyDerivs->getNumObjects()/sizeof(cl_float));
cl.clearBuffer(buffer.getMemory(), buffer.getSize()*energyDerivs->getNumObjects()/sizeof(cl_float));
}
cl.executeKernel(pairValueKernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(perParticleValueKernel, cl.getPaddedNumAtoms());
......@@ -2440,7 +2471,7 @@ void OpenCLCalcCustomExternalForceKernel::executeForces(ContextImpl& context) {
kernel.setArg<cl::Buffer>(nextIndex++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
kernel.setArg<cl::Buffer>(nextIndex++, buffer.getBuffer());
kernel.setArg<cl::Memory>(nextIndex++, buffer.getMemory());
}
}
cl.executeKernel(kernel, numParticles);
......@@ -2905,11 +2936,11 @@ void OpenCLCalcCustomHbondForceKernel::executeForces(ContextImpl& context) {
donorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
donorKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
donorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
}
for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
donorKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
donorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
}
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
......@@ -2929,11 +2960,11 @@ void OpenCLCalcCustomHbondForceKernel::executeForces(ContextImpl& context) {
acceptorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = donorParams->getBuffers()[i];
acceptorKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
}
for (int i = 0; i < (int) acceptorParams->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = acceptorParams->getBuffers()[i];
acceptorKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
acceptorKernel.setArg<cl::Memory>(index++, buffer.getMemory());
}
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
......
......@@ -438,7 +438,7 @@ public:
OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, System& system) : CalcNonbondedForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), sigmaEpsilon(NULL), exceptionParams(NULL), exceptionIndices(NULL), cosSinSums(NULL), pmeGrid(NULL),
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeBsplineTheta(NULL), pmeBsplineDtheta(NULL), pmeAtomRange(NULL),
pmeAtomGridIndex(NULL), sort(NULL), fft(NULL) {
pmeAtomGridIndex(NULL), erfcTable(NULL), sort(NULL), fft(NULL) {
}
~OpenCLCalcNonbondedForceKernel();
/**
......@@ -478,6 +478,7 @@ private:
OpenCLArray<mm_float2>* pmeAtomGridIndex;
OpenCLSort<mm_float2>* sort;
OpenCLFFT3D* fft;
cl::Image2D* erfcTable;
cl::Kernel exceptionsKernel;
cl::Kernel ewaldSumsKernel;
cl::Kernel ewaldForcesKernel;
......
......@@ -294,13 +294,19 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
args << params[i].getName();
}
for (int i = 0; i < (int) arguments.size(); i++) {
if ((arguments[i].getBuffer().getInfo<CL_MEM_FLAGS>() & CL_MEM_READ_ONLY) == 0)
args << ", __global ";
else
args << ", __constant ";
args << arguments[i].getType();
args << "* ";
args << arguments[i].getName();
if (arguments[i].getMemory().getInfo<CL_MEM_TYPE>() == CL_MEM_OBJECT_IMAGE2D) {
args << ", __read_only image2d_t ";
args << arguments[i].getName();
}
else {
if ((arguments[i].getMemory().getInfo<CL_MEM_FLAGS>() & CL_MEM_READ_ONLY) == 0)
args << ", __global ";
else
args << ", __constant ";
args << arguments[i].getType();
args << "* ";
args << arguments[i].getName();
}
}
replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream loadLocal1;
......@@ -388,12 +394,12 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel.setArg<cl_uint>(9, tiles->getSize());
}
for (int i = 0; i < (int) params.size(); i++) {
kernel.setArg<cl::Buffer>(i*2+paramBase, params[i].getBuffer());
kernel.setArg<cl::Memory>(i*2+paramBase, params[i].getMemory());
kernel.setArg(i*2+paramBase+1, OpenCLContext::ThreadBlockSize*params[i].getSize(), NULL);
}
paramBase += 2*params.size();
for (int i = 0; i < (int) arguments.size(); i++) {
kernel.setArg<cl::Buffer>(i+paramBase, arguments[i].getBuffer());
kernel.setArg<cl::Memory>(i+paramBase, arguments[i].getMemory());
}
return kernel;
}
......@@ -231,10 +231,10 @@ public:
* @param name the name of the parameter
* @param type the data type of the parameter
* @param size the size of the parameter in bytes
* @param buffer the buffer containing the parameter values
* @param memory the memory containing the parameter values
*/
ParameterInfo(const std::string& name, const std::string& type, int size, cl::Buffer& buffer) :
name(name), type(type), size(size), buffer(&buffer) {
ParameterInfo(const std::string& name, const std::string& type, int size, cl::Memory& memory) :
name(name), type(type), size(size), memory(&memory) {
}
const std::string& getName() const {
return name;
......@@ -245,14 +245,14 @@ public:
int getSize() const {
return size;
}
cl::Buffer& getBuffer() const {
return *buffer;
cl::Memory& getMemory() const {
return *memory;
}
private:
std::string name;
std::string type;
int size;
cl::Buffer* buffer;
cl::Memory* memory;
};
} // namespace OpenMM
......
......@@ -67,7 +67,7 @@ OpenCLParameterSet::OpenCLParameterSet(OpenCLContext& context, int numParameters
OpenCLParameterSet::~OpenCLParameterSet() {
for (int i = 0; i < (int) buffers.size(); i++)
delete &buffers[i].getBuffer();
delete &buffers[i].getMemory();
}
void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) const {
......@@ -79,7 +79,7 @@ void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) c
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getType() == "float4") {
vector<mm_float4> data(numObjects);
context.getQueue().enqueueReadBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
context.getQueue().enqueueReadBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++) {
values[j][base] = data[j].x;
if (base+1 < numParameters)
......@@ -93,7 +93,7 @@ void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) c
}
else if (buffers[i].getType() == "float2") {
vector<mm_float2> data(numObjects);
context.getQueue().enqueueReadBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
context.getQueue().enqueueReadBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++) {
values[j][base] = data[j].x;
if (base+1 < numParameters)
......@@ -103,7 +103,7 @@ void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) c
}
else if (buffers[i].getType() == "float") {
vector<cl_float> data(numObjects);
context.getQueue().enqueueReadBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
context.getQueue().enqueueReadBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++)
values[j][base] = data[j];
}
......@@ -133,7 +133,7 @@ void OpenCLParameterSet::setParameterValues(const vector<vector<cl_float> >& val
if (base+3 < numParameters)
data[j].w = values[j][base+3];
}
context.getQueue().enqueueWriteBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
context.getQueue().enqueueWriteBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
base += 4;
}
else if (buffers[i].getType() == "float2") {
......@@ -143,14 +143,14 @@ void OpenCLParameterSet::setParameterValues(const vector<vector<cl_float> >& val
if (base+1 < numParameters)
data[j].y = values[j][base+1];
}
context.getQueue().enqueueWriteBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
context.getQueue().enqueueWriteBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
base += 2;
}
else if (buffers[i].getType() == "float") {
vector<cl_float> data(numObjects);
for (int j = 0; j < numObjects; j++)
data[j] = values[j][base];
context.getQueue().enqueueWriteBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
context.getQueue().enqueueWriteBuffer(reinterpret_cast<cl::Buffer&>(buffers[i].getMemory()), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
}
else
throw OpenMMException("Internal error: Unknown buffer type in OpenCLParameterSet");
......
......@@ -3,7 +3,15 @@ bool needCorrection = isExcluded && atom1 != atom2 && atom1 < NUM_ATOMS && atom2
if (!isExcluded || needCorrection) {
const float prefactor = 138.935456f*posq1.w*posq2.w*invR;
float alphaR = EWALD_ALPHA*r;
#ifdef USE_TABULATED_ERFC
float normalized = ERFC_TABLE_SCALE*alphaR;
int tableIndex = (int) normalized;
float fract2 = normalized-tableIndex;
float fract1 = 1.0f-fract2;
float erfcAlphaR = fract1*read_imagef(erfcTable, sampler, (int2)(tableIndex, 0)).x + fract2*read_imagef(erfcTable, sampler, (int2)(tableIndex+1, 0)).x;
#else
float erfcAlphaR = erfc(alphaR);
#endif
float tempForce = 0.0f;
if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
......
#define TILE_SIZE 32
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
/**
* Compute nonbonded interactions.
......
#define TILE_SIZE 32
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
/**
* Compute nonbonded interactions.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment