Commit 7eec4930 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing OpenCL implementation of CustomGBForce

parent 9ed2c870
...@@ -198,15 +198,17 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi ...@@ -198,15 +198,17 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
} }
void OpenCLContext::clearBuffer(OpenCLArray<float>& array) { void OpenCLContext::clearBuffer(OpenCLArray<float>& array) {
clearBufferKernel.setArg<cl::Buffer>(0, array.getDeviceBuffer()); clearBuffer(array.getDeviceBuffer(), array.getSize());
clearBufferKernel.setArg<cl_int>(1, array.getSize());
executeKernel(clearBufferKernel, array.getSize());
} }
void OpenCLContext::clearBuffer(OpenCLArray<mm_float4>& array) { void OpenCLContext::clearBuffer(OpenCLArray<mm_float4>& array) {
clearBufferKernel.setArg<cl::Buffer>(0, array.getDeviceBuffer()); clearBuffer(array.getDeviceBuffer(), array.getSize()*4);
clearBufferKernel.setArg<cl_int>(1, array.getSize()*4); }
executeKernel(clearBufferKernel, array.getSize()*4);
void OpenCLContext::clearBuffer(cl::Buffer& buffer, int size) {
clearBufferKernel.setArg<cl::Buffer>(0, buffer);
clearBufferKernel.setArg<cl_int>(1, size);
executeKernel(clearBufferKernel, size);
} }
void OpenCLContext::reduceBuffer(OpenCLArray<mm_float4>& array, int numBuffers) { void OpenCLContext::reduceBuffer(OpenCLArray<mm_float4>& array, int numBuffers) {
......
...@@ -188,6 +188,13 @@ public: ...@@ -188,6 +188,13 @@ public:
* Set all elements of an array to 0. * Set all elements of an array to 0.
*/ */
void clearBuffer(OpenCLArray<mm_float4>& array); void clearBuffer(OpenCLArray<mm_float4>& array);
/**
* Set all elements of an array to 0.
*
* @param buffer the Buffer to clear
* @param size the number of float elements in the buffer
*/
void clearBuffer(cl::Buffer& buffer, int size);
/** /**
* Given a collection of buffers packed into an array, sum them and store * Given a collection of buffers packed into an array, sum them and store
* the sum in the first buffer. * the sum in the first buffer.
......
...@@ -1219,8 +1219,12 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() { ...@@ -1219,8 +1219,12 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() {
delete params; delete params;
if (computedValues != NULL) if (computedValues != NULL)
delete computedValues; delete computedValues;
if (energyDerivs != NULL)
delete energyDerivs;
if (globals != NULL) if (globals != NULL)
delete globals; delete globals;
if (valueBuffers != NULL)
delete valueBuffers;
if (tabulatedFunctionParams != NULL) if (tabulatedFunctionParams != NULL)
delete tabulatedFunctionParams; delete tabulatedFunctionParams;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
...@@ -1303,7 +1307,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1303,7 +1307,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
tableArgs << ", __constant float4* " << prefix << "functionParams"; tableArgs << ", __constant float4* " << prefix << "functionParams";
} }
// Record information for the expressions. // Record the global parameters.
vector<string> paramNames; vector<string> paramNames;
for (int i = 0; i < force.getNumPerParticleParameters(); i++) for (int i = 0; i < force.getNumPerParticleParameters(); i++)
...@@ -1316,16 +1320,39 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1316,16 +1320,39 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
} }
if (globals != NULL) if (globals != NULL)
globals->upload(globalParamValues); globals->upload(globalParamValues);
bool useCutoff = (force.getNonbondedMethod() != CustomGBForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomGBForce::NoCutoff && force.getNonbondedMethod() != CustomGBForce::CutoffNonPeriodic); // Record derivatives of expressions needed for the chain rule terms.
// Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
// Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r").optimize(); vector<Lepton::ParsedExpression> valueDerivExpressions;
// map<string, Lepton::ParsedExpression> forceExpressions; vector<vector<Lepton::ParsedExpression> > energyDerivExpressions;
// forceExpressions["tempEnergy += "] = energyExpression; for (int i = 0; i < force.getNumComputedValues(); i++) {
// forceExpressions["tempForce -= "] = forceExpression; Lepton::ParsedExpression ex = Lepton::Parser::parse(computedValueExpressions[i], functions).optimize();
// if (i == 0)
valueDerivExpressions.push_back(ex.differentiate("r").optimize());
else
valueDerivExpressions.push_back(ex.differentiate(computedValueNames[i-1]).optimize());
}
energyDerivExpressions.resize(force.getNumEnergyTerms());
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression;
CustomGBForce::ComputationType type;
force.getEnergyTermParameters(i, expression, type);
Lepton::ParsedExpression ex = Lepton::Parser::parse(expression, functions).optimize();
for (int j = 0; j < force.getNumComputedValues(); j++) {
if (type == CustomGBForce::SingleParticle)
energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]).optimize());
else {
energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]+"1").optimize());
energyDerivExpressions[i].push_back(ex.differentiate(computedValueNames[j]+"2").optimize());
}
}
}
energyDerivs = new OpenCLParameterSet(cl, force.getNumComputedValues(), cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), "customGBEnergyDerivatives");
// Create the kernels. // Create the kernels.
bool useCutoff = (force.getNonbondedMethod() != CustomGBForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != CustomGBForce::NoCutoff && force.getNonbondedMethod() != CustomGBForce::CutoffNonPeriodic);
{ {
// Create the N2 value kernel. // Create the N2 value kernel.
...@@ -1453,7 +1480,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1453,7 +1480,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for (int i = 0; i < force.getNumGlobalParameters(); i++) for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]"; variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]";
map<string, Lepton::ParsedExpression> n2EnergyExpressions; map<string, Lepton::ParsedExpression> n2EnergyExpressions;
stringstream n2ForceSource; stringstream n2EnergySource;
bool anyExclusions = false; bool anyExclusions = false;
for (int i = 0; i < force.getNumEnergyTerms(); i++) { for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression; string expression;
...@@ -1462,15 +1489,22 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1462,15 +1489,22 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (type == CustomGBForce::SingleParticle) if (type == CustomGBForce::SingleParticle)
continue; continue;
bool exclude = (type == CustomGBForce::ParticlePair); bool exclude = (type == CustomGBForce::ParticlePair);
anyExclusions != exclude; anyExclusions |= exclude;
string excludePrefix = (exclude ? "if (!isExcluded) " : ""); n2EnergyExpressions["tempEnergy += "] = Lepton::Parser::parse(expression, functions).optimize();
n2EnergyExpressions[excludePrefix+"tempEnergy += "] = Lepton::Parser::parse(expression, functions).optimize(); n2EnergyExpressions["dEdR += "] = Lepton::Parser::parse(expression, functions).differentiate("r").optimize();
n2EnergyExpressions[excludePrefix+"dEdR += "] = Lepton::Parser::parse(expression, functions).differentiate("r").optimize(); for (int j = 0; j < force.getNumComputedValues(); j++) {
n2ForceSource << OpenCLExpressionUtilities::createExpressions(n2EnergyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams"); n2EnergyExpressions["/*"+intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_1")+" += "] = energyDerivExpressions[i][2*j];
n2EnergyExpressions["/*"+intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j, "_2")+" += "] = energyDerivExpressions[i][2*j+1];
}
if (exclude)
n2EnergySource << "if (!isExcluded) {\n";
n2EnergySource << OpenCLExpressionUtilities::createExpressions(n2EnergyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
if (exclude)
n2EnergySource << "}\n";
} }
map<string, string> replacements; map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = n2ForceSource.str(); replacements["COMPUTE_INTERACTION"] = n2EnergySource.str();
stringstream extraArgs, loadLocal1, loadLocal2, load1, load2; stringstream extraArgs, loadLocal1, loadLocal2, load1, load2, recordDeriv, storeDerivs1, storeDerivs2;
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
extraArgs << ", __constant float* globals"; extraArgs << ", __constant float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
...@@ -1491,11 +1525,25 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1491,11 +1525,25 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
load1 << buffer.getType() << " " << valueName << "1 = global_" << valueName << "[atom1];\n"; load1 << buffer.getType() << " " << valueName << "1 = global_" << valueName << "[atom1];\n";
load2 << buffer.getType() << " " << valueName << "2 = local_" << valueName << "[atom2];\n"; load2 << buffer.getType() << " " << valueName << "2 = local_" << valueName << "[atom2];\n";
} }
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* derivBuffers" << index << ", __local " << buffer.getType() << "* local_deriv" << index;
loadLocal2 << "local_deriv" << index << "[get_local_id(0)] = 0.0f;\n";
load1 << buffer.getType() << " deriv" << index << "_1 = 0;\n";
load2 << buffer.getType() << " deriv" << index << "_2 = 0;\n";
recordDeriv << "local_deriv" << index << "[atom2] += deriv" << index << "_2;\n";
storeDerivs1 << "derivBuffers" << index << "[offset1] += deriv" << index << "_1;\n";
storeDerivs2 << "derivBuffers" << index << "[offset2] += local_deriv" << index << "[get_local_id(0)];\n";
}
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str(); replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str(); replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str(); replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
replacements["LOAD_ATOM2_PARAMETERS"] = load2.str(); replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
replacements["RECORD_DERIVATIVE_2"] = recordDeriv.str();
replacements["STORE_DERIVATIVES_1"] = storeDerivs1.str();
replacements["STORE_DERIVATIVES_2"] = storeDerivs2.str();
map<string, string> defines; map<string, string> defines;
if (cl.getNonbondedUtilities().getForceBufferPerAtomBlock()) if (cl.getNonbondedUtilities().getForceBufferPerAtomBlock())
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1"; defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
...@@ -1520,7 +1568,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1520,7 +1568,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
{ {
// Create the kernel to reduce the derivatives and calculate per-particle energy terms. // Create the kernel to reduce the derivatives and calculate per-particle energy terms.
stringstream source, extraArgs; stringstream compute, extraArgs, reduce;
map<string, Lepton::ParsedExpression> energyExpressions;
if (force.getNumGlobalParameters() > 0) if (force.getNumGlobalParameters() > 0)
extraArgs << ", __constant float* globals"; extraArgs << ", __constant float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) { for (int i = 0; i < (int) params->getBuffers().size(); i++) {
...@@ -1533,6 +1582,13 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1533,6 +1582,13 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
string valueName = "values"+intToString(i+1); string valueName = "values"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* " << valueName; extraArgs << ", __global " << buffer.getType() << "* " << valueName;
} }
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
string index = intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* derivBuffers" << index;
reduce << "REDUCE_VALUE(derivBuffers" << index << ", " << buffer.getType() << ")\n";
compute << buffer.getType() << " deriv" << index << " = derivBuffers" << index << "[index];\n";
}
map<string, string> variables; map<string, string> variables;
for (int i = 0; i < force.getNumPerParticleParameters(); i++) for (int i = 0; i < force.getNumPerParticleParameters(); i++)
variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]"); variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
...@@ -1540,7 +1596,6 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1540,7 +1596,6 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]"; variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]";
for (int i = 0; i < force.getNumComputedValues(); i++) for (int i = 0; i < force.getNumComputedValues(); i++)
variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]"); variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]");
map<string, Lepton::ParsedExpression> energyExpressions;
for (int i = 0; i < force.getNumEnergyTerms(); i++) { for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression; string expression;
CustomGBForce::ComputationType type; CustomGBForce::ComputationType type;
...@@ -1548,15 +1603,18 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -1548,15 +1603,18 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (type != CustomGBForce::SingleParticle) if (type != CustomGBForce::SingleParticle)
continue; continue;
energyExpressions["/*"+intToString(i+1)+"*/ energy += "] = Lepton::Parser::parse(expression, functions).optimize(); energyExpressions["/*"+intToString(i+1)+"*/ energy += "] = Lepton::Parser::parse(expression, functions).optimize();
for (int j = 0; j < force.getNumComputedValues(); j++)
energyExpressions["/*"+intToString(i+1)+"*/ deriv"+energyDerivs->getParameterSuffix(j)+" += "] = energyDerivExpressions[i][j];
}
compute << OpenCLExpressionUtilities::createExpressions(energyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
string index = intToString(i+1);
compute << "derivBuffers" << index << "[index] = deriv" << index << ";\n";
} }
source << OpenCLExpressionUtilities::createExpressions(energyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
// for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
// string valueName = "values"+intToString(i+1);
// source << "global_" << valueName << "[index] = local_" << valueName << ";\n";
// }
map<string, string> replacements; map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str(); replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["COMPUTE_ENERGY"] = source.str(); replacements["REDUCE_DERIVATIVES"] = reduce.str();
replacements["COMPUTE_ENERGY"] = compute.str();
map<string, string> defines; map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms()); defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
cl::Program program = cl.createProgram(cl.loadSourceFromFile("customGBEnergyPerParticle.cl", replacements), defines); cl::Program program = cl.createProgram(cl.loadSourceFromFile("customGBEnergyPerParticle.cl", replacements), defines);
...@@ -1644,6 +1702,11 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) { ...@@ -1644,6 +1702,11 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
pairEnergyKernel.setArg<cl::Buffer>(index++, buffer.getBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL); pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
} }
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
pairEnergyKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
}
if (tabulatedFunctionParams != NULL) { if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
pairEnergyKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
...@@ -1659,6 +1722,8 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) { ...@@ -1659,6 +1722,8 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
perParticleEnergyKernel.setArg<cl::Buffer>(index++, params->getBuffers()[i].getBuffer()); perParticleEnergyKernel.setArg<cl::Buffer>(index++, params->getBuffers()[i].getBuffer());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, computedValues->getBuffers()[i].getBuffer()); perParticleEnergyKernel.setArg<cl::Buffer>(index++, computedValues->getBuffers()[i].getBuffer());
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, energyDerivs->getBuffers()[i].getBuffer());
if (tabulatedFunctionParams != NULL) { if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer()); perParticleEnergyKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
...@@ -1677,14 +1742,18 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) { ...@@ -1677,14 +1742,18 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
globals->upload(globalParamValues); globals->upload(globalParamValues);
} }
cl.clearBuffer(*valueBuffers); cl.clearBuffer(*valueBuffers);
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = energyDerivs->getBuffers()[i];
cl.clearBuffer(buffer.getBuffer(), buffer.getSize()*energyDerivs->getNumObjects()/sizeof(cl_float));
}
cl.executeKernel(pairValueKernel, nb.getTiles().getSize()*OpenCLContext::TileSize); cl.executeKernel(pairValueKernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(perParticleValueKernel, cl.getPaddedNumAtoms()); cl.executeKernel(perParticleValueKernel, cl.getPaddedNumAtoms());
cl.executeKernel(pairEnergyKernel, nb.getTiles().getSize()*OpenCLContext::TileSize); cl.executeKernel(pairEnergyKernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(perParticleEnergyKernel, cl.getPaddedNumAtoms()); cl.executeKernel(perParticleEnergyKernel, cl.getPaddedNumAtoms());
// vector<vector<cl_float> > values; // vector<vector<cl_float> > values;
// computedValues->getParameterValues(values); // energyDerivs->getParameterValues(values);
// for (int i = 0; i < cl.getNumAtoms(); i++) // for (int i = 0; i < cl.getNumAtoms(); i++)
// printf("%d: %f\n", i, values[i][1]); // printf("%d: %f %f\n", i, values[i][0], values[i][1]);
} }
double OpenCLCalcCustomGBForceKernel::executeEnergy(ContextImpl& context) { double OpenCLCalcCustomGBForceKernel::executeEnergy(ContextImpl& context) {
......
...@@ -476,7 +476,8 @@ private: ...@@ -476,7 +476,8 @@ private:
class OpenCLCalcCustomGBForceKernel : public CalcCustomGBForceKernel { class OpenCLCalcCustomGBForceKernel : public CalcCustomGBForceKernel {
public: public:
OpenCLCalcCustomGBForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, System& system) : CalcCustomGBForceKernel(name, platform), OpenCLCalcCustomGBForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, System& system) : CalcCustomGBForceKernel(name, platform),
hasInitializedKernels(false), cl(cl), params(NULL), globals(NULL), valueBuffers(NULL), tabulatedFunctionParams(NULL), system(system) { hasInitializedKernels(false), cl(cl), params(NULL), computedValues(NULL), energyDerivs(NULL), globals(NULL), valueBuffers(NULL),
tabulatedFunctionParams(NULL), system(system) {
} }
~OpenCLCalcCustomGBForceKernel(); ~OpenCLCalcCustomGBForceKernel();
/** /**
...@@ -504,6 +505,7 @@ private: ...@@ -504,6 +505,7 @@ private:
OpenCLContext& cl; OpenCLContext& cl;
OpenCLParameterSet* params; OpenCLParameterSet* params;
OpenCLParameterSet* computedValues; OpenCLParameterSet* computedValues;
OpenCLParameterSet* energyDerivs;
OpenCLArray<cl_float>* globals; OpenCLArray<cl_float>* globals;
OpenCLArray<cl_float>* valueBuffers; OpenCLArray<cl_float>* valueBuffers;
OpenCLArray<mm_float4>* tabulatedFunctionParams; OpenCLArray<mm_float4>* tabulatedFunctionParams;
......
...@@ -63,6 +63,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene ...@@ -63,6 +63,7 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
float tempEnergy = 0.0f; float tempEnergy = 0.0f;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
COMPUTE_INTERACTION COMPUTE_INTERACTION
dEdR /= -r;
} }
energy += 0.5f*tempEnergy; energy += 0.5f*tempEnergy;
delta.xyz *= dEdR; delta.xyz *= dEdR;
...@@ -74,11 +75,12 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene ...@@ -74,11 +75,12 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
// Write results // Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK #ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset = x + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS; unsigned int offset1 = x + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
#else #else
unsigned int offset = x + tgx + warp*PADDED_NUM_ATOMS; unsigned int offset1 = x + tgx + warp*PADDED_NUM_ATOMS;
#endif #endif
forceBuffers[offset].xyz += force.xyz; forceBuffers[offset1].xyz += force.xyz;
STORE_DERIVATIVES_1
} }
else { else {
// This is an off-diagonal tile. // This is an off-diagonal tile.
...@@ -91,52 +93,8 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene ...@@ -91,52 +93,8 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
local_force[get_local_id(0)] = 0.0f; local_force[get_local_id(0)] = 0.0f;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int flags = interactionFlags[pos]; unsigned int flags = interactionFlags[pos];
if (!hasExclusions && flags != 0xFFFFFFFF) { if (!hasExclusions && flags == 0) {
if (flags == 0) { // No interactions in this tile.
// No interactions in this tile.
}
else {
// Compute only a subset of the interactions in this tile.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) {
bool isExcluded = false;
int atom2 = tbx+j;
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
float dEdR = 0.0f;
float tempEnergy = 0.0f;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION
}
energy += tempEnergy;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
tempBuffer[get_local_id(0)].xyz = delta.xyz;
// Sum the forces on atom2.
if (tgx % 2 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+1];
if (tgx % 4 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+2];
if (tgx % 8 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+4];
if (tgx % 16 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+8];
if (tgx == 0)
local_force[tbx+j] += tempBuffer[get_local_id(0)] + tempBuffer[get_local_id(0)+16];
}
}
}
} }
else else
#endif #endif
...@@ -170,11 +128,14 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene ...@@ -170,11 +128,14 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
float tempEnergy = 0.0f; float tempEnergy = 0.0f;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION COMPUTE_INTERACTION
dEdR /= -r;
RECORD_DERIVATIVE_2
} }
energy += tempEnergy; energy += tempEnergy;
delta.xyz *= dEdR; delta.xyz *= dEdR;
force.xyz -= delta.xyz; force.xyz -= delta.xyz;
local_force[tbx+tj].xyz += delta.xyz; atom2 = tbx+tj;
local_force[atom2].xyz += delta.xyz;
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
excl >>= 1; excl >>= 1;
#endif #endif
...@@ -192,6 +153,8 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene ...@@ -192,6 +153,8 @@ __kernel void computeN2Energy(__global float4* forceBuffers, __global float* ene
#endif #endif
forceBuffers[offset1].xyz += force.xyz; forceBuffers[offset1].xyz += force.xyz;
forceBuffers[offset2].xyz += local_force[get_local_id(0)].xyz; forceBuffers[offset2].xyz += local_force[get_local_id(0)].xyz;
STORE_DERIVATIVES_1
STORE_DERIVATIVES_2
lasty = y; lasty = y;
} }
pos++; pos++;
......
#define REDUCE_VALUE(NAME, TYPE) \
TYPE sum = NAME[index]; \
for (int i = index+bufferSize; i < totalSize; i += bufferSize) \
sum += NAME[i]; \
NAME[index] = sum;
/** /**
* Reduce the derivatives computed in the N^2 energy kernel, and compute all per-particle energy terms. * Reduce the derivatives computed in the N^2 energy kernel, and compute all per-particle energy terms.
*/ */
...@@ -9,10 +15,8 @@ __kernel void computePerParticleEnergy(int bufferSize, int numBuffers, __global ...@@ -9,10 +15,8 @@ __kernel void computePerParticleEnergy(int bufferSize, int numBuffers, __global
while (index < NUM_ATOMS) { while (index < NUM_ATOMS) {
// Reduce the derivatives // Reduce the derivatives
// int totalSize = bufferSize*numBuffers; int totalSize = bufferSize*numBuffers;
// float sum = valueBuffers[index]; REDUCE_DERIVATIVES
// for (int i = index+bufferSize; i < totalSize; i += bufferSize)
// sum += valueBuffers[i];
// Now calculate the per-particle energy terms. // Now calculate the per-particle energy terms.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment