Commit 7eec4930 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing OpenCL implementation of CustomGBForce

parent 9ed2c870
......@@ -198,15 +198,17 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
}
void OpenCLContext::clearBuffer(OpenCLArray<float>& array) {
clearBufferKernel.setArg<cl::Buffer>(0, array.getDeviceBuffer());
clearBufferKernel.setArg<cl_int>(1, array.getSize());
executeKernel(clearBufferKernel, array.getSize());
clearBuffer(array.getDeviceBuffer(), array.getSize());
}
void OpenCLContext::clearBuffer(OpenCLArray<mm_float4>& array) {
clearBufferKernel.setArg<cl::Buffer>(0, array.getDeviceBuffer());
clearBufferKernel.setArg<cl_int>(1, array.getSize()*4);
executeKernel(clearBufferKernel, array.getSize()*4);
clearBuffer(array.getDeviceBuffer(), array.getSize()*4);
}
void OpenCLContext::clearBuffer(cl::Buffer& buffer, int size) {
clearBufferKernel.setArg<cl::Buffer>(0, buffer);
clearBufferKernel.setArg<cl_int>(1, size);
executeKernel(clearBufferKernel, size);
}
void OpenCLContext::reduceBuffer(OpenCLArray<mm_float4>& array, int numBuffers) {
......
......@@ -188,6 +188,13 @@ public:
* Set all elements of an array to 0.
*/
void clearBuffer(OpenCLArray<mm_float4>& array);
/**
* Set all elements of an array to 0.
*
* @param buffer the Buffer to clear
* @param size the number of float elements in the buffer
*/
void clearBuffer(cl::Buffer& buffer, int size);
/**
* Given a collection of buffers packed into an array, sum them and store
* the sum in the first buffer.
......
......@@ -1219,8 +1219,12 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() {
delete params;
if (computedValues != NULL)
delete computedValues;
if (energyDerivs != NULL)
delete energyDerivs;
if (globals != NULL)
delete globals;
if (valueBuffers != NULL)
delete valueBuffers;
if (tabulatedFunctionParams != NULL)
delete tabulatedFunctionParams;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
......@@ -1303,7 +1307,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
tableArgs << ", __constant float4* " << prefix << "functionParams";
}
// Record information for the expressions.
// Record the global parameters.
vector<string> paramNames;
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
......@@ -1316,16 +1320,39 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
if (globals != NULL)
globals->upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomGBForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomGBForce::NoCutoff && force.getNonbondedMethod() != CustomGBForce::CutoffNonPeriodic);
// Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
// Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize();
// map<string, Lepton::ParsedExpression> forceExpressions;
// forceExpressions["tempEnergy += "] = energyExpression;
// forceExpressions["tempForce -= "] = forceExpression;
//
// Record derivatives of expressions needed for the chain rule terms.
vector<Lepton::ParsedExpression> valueDerivExpressions;
vector<vector<Lepton::ParsedExpression> > energyDerivExpressions;
for (int i = 0; i < force.getNumComputedValues(); i++) {
Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[i], functions).optimize();
if (i == 0)
valueDerivExpressions.push_back(ex.differentiate("r").optimize());
else
valueDerivExpressions.push_back(ex.differentiate(computedValueNames[i-1]).optimize());
}
energyDerivExpressions.resize(force.getNumEnergyTerms());
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression;
CustomGBForce::ComputationType type;
force.getEnergyTermParameters(i, expression, type);
Lepton::ParsedExpression ex = Lepton::Parser::parse(expression, functions).optimize();
for (int j = 0; j < force.getNumComputedValues(); j++) {
if (type == CustomGBForce::SingleParticle)
energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]).optimize());
else {
energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]+"1").optimize());
energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]+"2").optimize());
}
}
}
energyDerivs = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), "customGBEnergyDerivatives");
// Create the kernels.
bool useCutoff = (force.getNonbondedMethod() != CustomGBForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomGBForce::NoCutoff && force.getNonbondedMethod() != CustomGBForce::CutoffNonPeriodic);
{
// Create the N2 value kernel.
......@@ -1453,7 +1480,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]";
map<string, Lepton::ParsedExpression> n2EnergyExpressions;
stringstream n2ForceSource;
stringstream n2EnergySource;
bool anyExclusions = false;
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression;
......@@ -1462,15 +1489,22 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (type == CustomGBForce::SingleParticle)
continue;
bool exclude = (type == CustomGBForce::ParticlePair);
anyExclusions != exclude;
string excludePrefix = (exclude ? "if (!isExcluded) " : "");
n2EnergyExpressions[excludePrefix+"tempEnergy += "] = Lepton::Parser::parse(expression, functions).optimize();
n2EnergyExpressions[excludePrefix+"dEdR += "] = Lepton::Parser::parse(expression, functions).differentiate("r").optimize();
n2ForceSource << OpenCLExpressionUtilities::createExpressions(n2EnergyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
anyExclusions |= exclude;
n2EnergyExpressions["tempEnergy += "] = Lepton::Parser::parse(expression, functions).optimize();
n2EnergyExpressions["dEdR += "] = Lepton::Parser::parse(expression, functions).differentiate("r").optimize();
for (int j = 0; j < force.getNumComputedValues(); j++) {
n2EnergyExpressions["/*"+intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_1")+" += "] = energyDerivExpressions[i][2*j];
n2EnergyExpressions["/*"+intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_2")+" += "] = energyDerivExpressions[i][2*j+1];
}
if (exclude)
n2EnergySource << "if (!isExcluded) {\n";
n2EnergySource << OpenCLExpressionUtilities::createExpressions(n2EnergyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
if (exclude)
n2EnergySource << "}\n";
}
map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = n2ForceSource.str();
stringstream extraArgs, loadLocal1, loadLocal2, load1, load2;
replacements["COMPUTE_INTERACTION"] = n2EnergySource.str();
stringstream extraArgs, loadLocal1, loadLocal2, load1, load2, recordDeriv, storeDerivs1, storeDerivs2;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", __constant float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
......@@ -1491,11 +1525,25 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
load1 << buffer.getType() << " " << valueName << "1 = global_" << valueName << "[atom1];\n";
load2 << buffer.getType() << " " << valueName << "2 = local_" << valueName << "[atom2];\n";
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* derivBuffers" << index << ", __local " << buffer.getType() << "* local_deriv" << index;
loadLocal2 << "local_deriv" << index << "[get_local_id(0)] = 0.0f;\n";
load1 << buffer.getType() << " deriv" << index << "_1 = 0;\n";
load2 << buffer.getType() << " deriv" << index << "_2 = 0;\n";
recordDeriv << "local_deriv" << index << "[atom2] += deriv" << index << "_2;\n";
storeDerivs1 << "derivBuffers" << index << "[offset1] += deriv" << index << "_1;\n";
storeDerivs2 << "derivBuffers" << index << "[offset2] += local_deriv" << index << "[get_local_id(0)];\n";
}
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
replacements["RECORD_DERIVATIVE_2"] = recordDeriv.str();
replacements["STORE_DERIVATIVES_1"] = storeDerivs1.str();
replacements["STORE_DERIVATIVES_2"] = storeDerivs2.str();
map<string, string> defines;
if (cl.getNonbondedUtilities().getForceBufferPerAtomBlock())
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
......@@ -1520,7 +1568,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
{
// Create the kernel to reduce the derivatives and calculate per-particle energy terms.
stringstream source, extraArgs;
stringstream compute, extraArgs, reduce;
map<string, Lepton::ParsedExpression> energyExpressions;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", __constant float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
......@@ -1533,6 +1582,13 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
string valueName = "values"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* " << valueName;
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* derivBuffers" << index;
reduce << "REDUCE_VALUE(derivBuffers" << index << ", " << buffer.getType() << ")\n";
compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
}
map<string, string> variables;
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
......@@ -1540,7 +1596,6 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]";
for (int i = 0; i < force.getNumComputedValues(); i++)
variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]");
map<string, Lepton::ParsedExpression> energyExpressions;
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression;
CustomGBForce::ComputationType type;
......@@ -1548,15 +1603,18 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (type != CustomGBForce::SingleParticle)
continue;
energyExpressions["/*"+intToString(i+1)+"*/ energy += "] = Lepton::Parser::parse(expression, functions).optimize();
for (int j = 0; j < force.getNumComputedValues(); j++)
energyExpressions["/*"+intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j)+" += "] = energyDerivExpressions[i][j];
}
compute << OpenCLExpressionUtilities::createExpressions(energyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
string index = intToString(i+1);
compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n";
}
source << OpenCLExpressionUtilities::createExpressions(energyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
// for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
// string valueName = "values"+intToString(i+1);
// source << "global_" << valueName << "[index] = local_" << valueName << ";\n";
// }
map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["COMPUTE_ENERGY"] = source.str();
replacements["REDUCE_DERIVATIVES"] = reduce.str();
replacements["COMPUTE_ENERGY"] = compute.str();
map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
cl::Program program = cl.createProgram(cl.loadSourceFromFile("customGBEnergyPerParticle.cl", replacements), defines);
......@@ -1644,6 +1702,11 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
pairEnergyKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
}
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
pairEnergyKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
}
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
pairEnergyKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
......@@ -1659,6 +1722,8 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
perParticleEnergyKernel.setArg<cl::Buffer>(index++, params->getBuffers()[i].getBuffer());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, computedValues->getBuffers()[i].getBuffer());
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, energyDerivs->getBuffers()[i].getBuffer());
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
......@@ -1677,14 +1742,18 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
globals->upload(globalParamValues);
}
cl.clearBuffer(*valueBuffers);
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
cl.clearBuffer(buffer.getBuffer(), buffer.getSize()*energyDerivs->getNumObjects()/sizeof(cl_float));
}
cl.executeKernel(pairValueKernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(perParticleValueKernel, cl.getPaddedNumAtoms());
cl.executeKernel(pairEnergyKernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(perParticleEnergyKernel, cl.getPaddedNumAtoms());
// vector<vector<cl_float> > values;
// computedValues->getParameterValues(values);
// energyDerivs->getParameterValues(values);
// for (int i = 0; i < cl.getNumAtoms(); i++)
// printf("%d: %f\n", i, values[i][1]);
// printf("%d: %f %f\n", i, values[i][0], values[i][1]);
}
double OpenCLCalcCustomGBForceKernel::executeEnergy(ContextImpl& context) {
......
......@@ -476,7 +476,8 @@ private:
class OpenCLCalcCustomGBForceKernel : public CalcCustomGBForceKernel {
public:
OpenCLCalcCustomGBForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, System& system) : CalcCustomGBForceKernel(name, platform),
hasInitializedKernels(false), cl(cl), params(NULL), globals(NULL), valueBuffers(NULL), tabulatedFunctionParams(NULL), system(system) {
hasInitializedKernels(false), cl(cl), params(NULL), computedValues(NULL), energyDerivs(NULL), globals(NULL), valueBuffers(NULL),
tabulatedFunctionParams(NULL), system(system) {
}
~OpenCLCalcCustomGBForceKernel();
/**
......@@ -504,6 +505,7 @@ private:
OpenCLContext& cl;
OpenCLParameterSet* params;
OpenCLParameterSet* computedValues;
OpenCLParameterSet* energyDerivs;
OpenCLArray<cl_float>* globals;
OpenCLArray<cl_float>* valueBuffers;
OpenCLArray<mm_float4>* tabulatedFunctionParams;
......
......@@ -63,6 +63,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
float tempEnergy = 0.0f;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
COMPUTE_INTERACTION
dEdR /= -r;
}
energy += 0.5f*tempEnergy;
delta.xyz *= dEdR;
......@@ -74,11 +75,12 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset = x + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
unsigned int offset1 = x + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
#else
unsigned int offset = x + tgx + warp*PADDED_NUM_ATOMS;
unsigned int offset1 = x + tgx + warp*PADDED_NUM_ATOMS;
#endif
forceBuffers[offset].xyz += force.xyz;
forceBuffers[offset1].xyz += force.xyz;
STORE_DERIVATIVES_1
}
else {
// This is an off-diagonal tile.
......@@ -91,52 +93,8 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
local_force[get_local_id(0)] = 0.0f;
#ifdef USE_CUTOFF
unsigned int flags = interactionFlags[pos];
if (!hasExclusions && flags != 0xFFFFFFFF) {
if (flags == 0) {
// No interactions in this tile.
}
else {
// Compute only a subset of the interactions in this tile.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) {
bool isExcluded = false;
int atom2 = tbx+j;
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
float dEdR = 0.0f;
float tempEnergy = 0.0f;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION
}
energy += tempEnergy;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
tempBuffer[get_local_id(0)].xyz = delta.xyz;
// Sum the forces on atom2.
if (tgx % 2 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+1];
if (tgx % 4 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+2];
if (tgx % 8 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+4];
if (tgx % 16 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+8];
if (tgx == 0)
local_force[tbx+j] += tempBuffer[get_local_id(0)] + tempBuffer[get_local_id(0)+16];
}
}
}
if (!hasExclusions && flags == 0) {
// No interactions in this tile.
}
else
#endif
......@@ -170,11 +128,14 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
float tempEnergy = 0.0f;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION
dEdR /= -r;
RECORD_DERIVATIVE_2
}
energy += tempEnergy;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
local_force[tbx+tj].xyz += delta.xyz;
atom2 = tbx+tj;
local_force[atom2].xyz += delta.xyz;
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
......@@ -192,6 +153,8 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#endif
forceBuffers[offset1].xyz += force.xyz;
forceBuffers[offset2].xyz += local_force[get_local_id(0)].xyz;
STORE_DERIVATIVES_1
STORE_DERIVATIVES_2
lasty = y;
}
pos++;
......
#define REDUCE_VALUE(NAME, TYPE) \
TYPE sum = NAME[index]; \
for (int i = index+bufferSize; i < totalSize; i += bufferSize) \
sum += NAME[i]; \
NAME[index] = sum;
/**
* Reduce the derivatives computed in the N^2 energy kernel, and compute all per-particle energy terms.
*/
......@@ -9,10 +15,8 @@ __kernel void computePerParticleEnergy(int bufferSize, int numBuffers, __global
while (index < NUM_ATOMS) {
// Reduce the derivatives
// int totalSize = bufferSize*numBuffers;
// float sum = valueBuffers[index];
// for (int i = index+bufferSize; i < totalSize; i += bufferSize)
// sum += valueBuffers[i];
int totalSize = bufferSize*numBuffers;
REDUCE_DERIVATIVES
// Now calculate the per-particle energy terms.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment