Commit 51fc3bd2 authored by Peter Eastman's avatar Peter Eastman
Browse files

Beginnings of OpenCL implementation of CustomGBForce

parent 7ded64ba
......@@ -318,3 +318,30 @@ void OpenCLExpressionUtilities::findRelatedPowers(const ExpressionTreeNode& node
for (int i = 0; i < (int) searchNode.getChildren().size(); i++)
findRelatedPowers(node, searchNode.getChildren()[i], powers);
}
vector<mm_float4> OpenCLExpressionUtilities::computeFunctionCoefficients(const vector<double>& values, bool interpolating) {
// First create a padded set of function values.
vector<double> padded(values.size()+2);
padded[0] = 2*values[0]-values[1];
for (int i = 0; i < (int) values.size(); i++)
padded[i+1] = values[i];
padded[padded.size()-1] = 2*values[values.size()-1]-values[values.size()-2];
// Now compute the spline coefficients.
vector<mm_float4> f(values.size()-1);
for (int i = 0; i < (int) values.size()-1; i++) {
if (interpolating)
f[i] = (mm_float4) {(cl_float) padded[i+1],
(cl_float) (0.5*(-padded[i]+padded[i+2])),
(cl_float) (0.5*(2.0*padded[i]-5.0*padded[i+1]+4.0*padded[i+2]-padded[i+3])),
(cl_float) (0.5*(-padded[i]+3.0*padded[i+1]-3.0*padded[i+2]+padded[i+3]))};
else
f[i] = (mm_float4) {(cl_float) ((padded[i]+4.0*padded[i+1]+padded[i+2])/6.0),
(cl_float) ((-3.0*padded[i]+3.0*padded[i+2])/6.0),
(cl_float) ((3.0*padded[i]-6.0*padded[i+1]+3.0*padded[i+2])/6.0),
(cl_float) ((-padded[i]+3.0*padded[i+1]-3.0*padded[i+2]+padded[i+3])/6.0)};
}
return f;
}
......@@ -27,6 +27,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLContext.h"
#include "lepton/CustomFunction.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/ParsedExpression.h"
#include <map>
......@@ -54,6 +56,15 @@ public:
*/
static std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::map<std::string, std::string>& variables,
const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams);
/**
* Calculate the spline coefficients for a tabulated function that appears in expressions.
*
* @param values the tabulated values of the function
* @param interpolating true if an interpolating spline should be used, false if an approximating spline should be used
* @return the spline coefficients
*/
static std::vector<mm_float4> computeFunctionCoefficients(const std::vector<double>& values, bool interpolating);
class FunctionPlaceholder;
private:
static void processExpression(std::stringstream& out, const Lepton::ExpressionTreeNode& node,
std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps, const std::map<std::string, std::string>& variables,
......@@ -66,6 +77,26 @@ private:
std::map<int, const Lepton::ExpressionTreeNode*>& powers);
};
/**
* This class serves as a placeholder for custom functions in expressions.
*/
class OpenCLExpressionUtilities::FunctionPlaceholder : public Lepton::CustomFunction {
public:
int getNumArguments() const {
return 1;
}
double evaluate(const double* arguments) const {
return 0.0;
}
double evaluateDerivative(const double* arguments, const int* derivOrder) const {
return 0.0;
}
CustomFunction* clone() const {
return new FunctionPlaceholder();
}
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLEXPRESSIONUTILITIES_H_*/
......@@ -54,6 +54,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return new OpenCLCalcCustomNonbondedForceKernel(name, platform, cl, context.getSystem());
if (name == CalcGBSAOBCForceKernel::Name())
return new OpenCLCalcGBSAOBCForceKernel(name, platform, cl);
if (name == CalcCustomGBForceKernel::Name())
return new OpenCLCalcCustomGBForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomExternalForceKernel::Name())
return new OpenCLCalcCustomExternalForceKernel(name, platform, cl, context.getSystem());
if (name == IntegrateVerletStepKernel::Name())
......
This diff is collapsed.
......@@ -470,6 +470,50 @@ private:
cl::Kernel reduceBornForceKernel;
};
/**
* This kernel is invoked by CustomGBForce to calculate the forces acting on the system.
*/
class OpenCLCalcCustomGBForceKernel : public CalcCustomGBForceKernel {
public:
OpenCLCalcCustomGBForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, System& system) : CalcCustomGBForceKernel(name, platform),
hasInitializedKernels(false), cl(cl), params(NULL), globals(NULL), valueBuffers(NULL), tabulatedFunctionParams(NULL), system(system) {
}
~OpenCLCalcCustomGBForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomGBForce this kernel will be used for
*/
void initialize(const System& system, const CustomGBForce& force);
/**
* Execute the kernel to calculate the forces.
*
* @param context the context in which to execute this kernel
*/
void executeForces(ContextImpl& context);
/**
* Execute the kernel to calculate the energy.
*
* @param context the context in which to execute this kernel
* @return the potential energy due to the CustomGBForce
*/
double executeEnergy(ContextImpl& context);
private:
bool hasInitializedKernels;
OpenCLContext& cl;
OpenCLParameterSet* params;
OpenCLParameterSet* computedValues;
OpenCLArray<cl_float>* globals;
OpenCLArray<cl_float>* valueBuffers;
OpenCLArray<mm_float4>* tabulatedFunctionParams;
std::vector<std::string> globalParamNames;
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray<mm_float4>*> tabulatedFunctions;
System& system;
cl::Kernel pairValueKernel, reduceValueKernel, pairForceKernel, particleForceKernel;
};
/**
* This kernel is invoked by CustomExternalForce to calculate the forces acting on the system and the energy of the system.
*/
......
......@@ -70,21 +70,79 @@ OpenCLParameterSet::~OpenCLParameterSet() {
delete &buffers[i].getBuffer();
}
void OpenCLParameterSet::setParameterValues(const vector<vector<cl_float> >& values) {
void OpenCLParameterSet::getParameterValues(vector<vector<cl_float> >& values) const {
values.resize(numObjects);
for (int i = 0; i < numObjects; i++)
values[i].resize(numParameters);
try {
int base = 0;
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getType() == "float4") {
vector<mm_float4> data(numObjects);
context.getQueue().enqueueReadBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++) {
values[j][base] = data[j].x;
if (base+1 < numParameters)
values[j][base+1] = data[j].y;
if (base+2 < numParameters)
values[j][base+2] = data[j].z;
if (base+3 < numParameters)
values[j][base+3] = data[j].w;
}
base += 4;
}
else if (buffers[i].getType() == "float2") {
vector<mm_float2> data(numObjects);
context.getQueue().enqueueReadBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++) {
values[j][base] = data[j].x;
if (base+1 < numParameters)
values[j][base+1] = data[j].y;
}
base += 2;
}
else if (buffers[i].getType() == "float") {
vector<cl_float> data(numObjects);
context.getQueue().enqueueReadBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
for (int j = 0; j < numObjects; j++)
data[j] = (mm_float4) {values[j][base], values[j][base+1], values[j][base+2], values[j][base+3]};
values[j][base] = data[j];
}
else
throw OpenMMException("Internal error: Unknown buffer type in OpenCLParameterSet");
}
}
catch (cl::Error err) {
stringstream str;
str<<"Error downloading parameter set "<<name<<": "<<err.what()<<" ("<<err.err()<<")";
throw OpenMMException(str.str());
}
}
void OpenCLParameterSet::setParameterValues(const vector<vector<cl_float> >& values) {
try {
int base = 0;
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getType() == "float4") {
vector<mm_float4> data(numObjects);
for (int j = 0; j < numObjects; j++) {
data[j].x = values[j][base];
if (base+1 < numParameters)
data[j].y = values[j][base+1];
if (base+2 < numParameters)
data[j].z = values[j][base+2];
if (base+3 < numParameters)
data[j].w = values[j][base+3];
}
context.getQueue().enqueueWriteBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
base += 4;
}
else if (buffers[i].getType() == "float2") {
vector<mm_float2> data(numObjects);
for (int j = 0; j < numObjects; j++)
data[j] = (mm_float2) {values[j][base], values[j][base+1]};
for (int j = 0; j < numObjects; j++) {
data[j].x = values[j][base];
if (base+1 < numParameters)
data[j].y = values[j][base+1];
}
context.getQueue().enqueueWriteBuffer(buffers[i].getBuffer(), CL_TRUE, 0, numObjects*buffers[i].getSize(), &data[0]);
base += 2;
}
......
......@@ -64,6 +64,12 @@ public:
int getNumObjects() const {
return numObjects;
}
/**
* Get the values of all parameters.
*
* @param values on exit, values[i][j] contains the value of parameter j for object i
*/
void getParameterValues(std::vector<std::vector<cl_float> >& values) const;
/**
* Set the values of all parameters.
*
......
......@@ -54,6 +54,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);
registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
registerKernelFactory(CalcCustomGBForceKernel::Name(), factory);
registerKernelFactory(CalcCustomExternalForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
......
#define TILE_SIZE 32
/**
* Compute a value based on pair interactions.
*/
__kernel void computeN2Value(__global float4* posq, __local float4* local_posq, __global float* global_value,
__local float* local_value, __local float* tempBuffer, __global unsigned int* tiles,
#ifdef USE_CUTOFF
__global unsigned int* interactionFlags, __global unsigned int* interactionCount
#else
unsigned int numTiles
#endif
PARAMETER_ARGUMENTS) {
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
#endif
unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
unsigned int warp = get_global_id(0)/TILE_SIZE;
unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps;
float energy = 0.0f;
unsigned int lasty = 0xFFFFFFFF;
while (pos < end) {
// Extract the coordinates of this tile
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff)*TILE_SIZE;
bool hasExclusions = (x & 0x1);
x = (x>>17)*TILE_SIZE;
unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
unsigned int tbx = get_local_id(0) - tgx;
unsigned int atom1 = x + tgx;
float value = 0.0f;
float4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS
if (x == y) {
// This tile is on the diagonal.
local_posq[get_local_id(0)] = posq1;
LOAD_LOCAL_PARAMETERS_FROM_1
unsigned int xi = x/TILE_SIZE;
unsigned int tile = xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*(xi+1)/2;
#ifdef USE_EXCLUSIONS
unsigned int excl = exclusions[exclusionIndices[tile]+tgx];
#endif
for (unsigned int j = 0; j < TILE_SIZE; j++) {
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
int atom2 = tbx+j;
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
float tempValue1 = 0.0f;
float tempValue2 = 0.0f;
#ifdef USE_EXCLUSIONS
if (!isExcluded && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
#else
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
#endif
COMPUTE_VALUE
}
value += tempValue1;
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset = x + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
#else
unsigned int offset = x + tgx + warp*PADDED_NUM_ATOMS;
#endif
global_value[offset] += value;
}
else {
// This is an off-diagonal tile.
if (lasty != y) {
unsigned int j = y + tgx;
local_posq[get_local_id(0)] = posq[j];
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
local_value[get_local_id(0)] = 0.0f;
#ifdef USE_CUTOFF
unsigned int flags = interactionFlags[pos];
if (!hasExclusions && flags != 0xFFFFFFFF) {
if (flags == 0) {
// No interactions in this tile.
}
else {
// Compute only a subset of the interactions in this tile.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) {
int atom2 = tbx+j;
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
float tempValue1 = 0.0f;
float tempValue2 = 0.0f;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_VALUE
}
value += tempValue1;
tempBuffer[get_local_id(0)] = tempValue2;
// Sum the forces on atom2.
if (tgx % 2 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+1];
if (tgx % 4 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+2];
if (tgx % 8 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+4];
if (tgx % 16 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+8];
if (tgx == 0)
local_value[tbx+j] += tempBuffer[get_local_id(0)] + tempBuffer[get_local_id(0)+16];
}
}
}
}
else
#endif
{
// Compute the full set of interactions in this tile.
unsigned int xi = x/TILE_SIZE;
unsigned int yi = y/TILE_SIZE;
unsigned int tile = xi+yi*PADDED_NUM_ATOMS/TILE_SIZE-yi*(yi+1)/2;
#ifdef USE_EXCLUSIONS
unsigned int excl = (hasExclusions ? exclusions[exclusionIndices[tile]+tgx] : 0xFFFFFFFF);
excl = (excl >> tgx) | (excl << (TILE_SIZE - tgx));
#endif
unsigned int tj = tgx;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
int atom2 = tbx+tj;
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
float r = sqrt(r2);
LOAD_ATOM2_PARAMETERS
atom2 = y+tj;
float tempValue1 = 0.0f;
float tempValue2 = 0.0f;
#ifdef USE_EXCLUSIONS
if (!isExcluded && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#else
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
#endif
COMPUTE_VALUE
}
value += tempValue1;
local_value[tbx+tj] += tempValue2;
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
tj = (tj + 1) & (TILE_SIZE - 1);
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset1 = x + tgx + (y/TILE_SIZE)*PADDED_NUM_ATOMS;
unsigned int offset2 = y + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
#else
unsigned int offset1 = x + tgx + warp*PADDED_NUM_ATOMS;
unsigned int offset2 = y + tgx + warp*PADDED_NUM_ATOMS;
#endif
global_value[offset1] += value;
global_value[offset2] += local_value[get_local_id(0)];
lasty = y;
}
pos++;
}
}
/**
* Reduce a pairwise computed value, and compute per-particle values.
*/
__kernel void reduceGBValue(int bufferSize, int numBuffers, __global float* valueBuffers
PARAMETER_ARGUMENTS) {
unsigned int index = get_global_id(0);
while (index < NUM_ATOMS) {
// Reduce the pairwise value
int totalSize = bufferSize*numBuffers;
float sum = valueBuffers[index];
for (int i = index+bufferSize; i < totalSize; i += bufferSize)
sum += valueBuffers[i];
// Now calculate other values
COMPUTE_VALUES
index += get_global_size(0);
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment