"platforms/vscode:/vscode.git/clone" did not exist on "861a61504e511fea92bf9397d9eeafcc488f22bf"
Commit 9ed2c870 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing OpenCL implementation of CustomGBForce

parent a966a981
......@@ -1279,6 +1279,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions;
vector<mm_float4> tabulatedFunctionParamsVec(force.getNumFunctions());
stringstream tableArgs;
for (int i = 0; i < force.getNumFunctions(); i++) {
string name;
vector<double> values;
......@@ -1293,11 +1294,13 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
tabulatedFunctions.push_back(new OpenCLArray<mm_float4>(cl, values.size()-1, "TabulatedFunction"));
tabulatedFunctions[tabulatedFunctions.size()-1]->upload(f);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(arrayName, "float4", sizeof(cl_float4), tabulatedFunctions[tabulatedFunctions.size()-1]->getDeviceBuffer()));
tableArgs << ", __global float4* arrayName";
}
if (force.getNumFunctions() > 0) {
tabulatedFunctionParams = new OpenCLArray<mm_float4>(cl, tabulatedFunctionParamsVec.size(), "tabulatedFunctionParameters", false, CL_MEM_READ_ONLY);
tabulatedFunctionParams->upload(tabulatedFunctionParamsVec);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"functionParams", "float4", sizeof(cl_float4), tabulatedFunctionParams->getDeviceBuffer()));
tableArgs << ", __constant float4* " << prefix << "functionParams";
}
// Record information for the expressions.
......@@ -1366,7 +1369,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n";
load2 << buffer.getType() << " " << paramName << "2 = local_" << paramName << "[atom2];\n";
}
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str();
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
......@@ -1426,12 +1429,138 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
reductionSource << "global_" << valueName << "[index] = local_" << valueName << ";\n";
}
map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str();
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["COMPUTE_VALUES"] = reductionSource.str();
map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
cl::Program program = cl.createProgram(cl.loadSourceFromFile("customGBValueReduction.cl", replacements), defines);
reduceValueKernel = cl::Kernel(program, "reduceGBValue");
cl::Program program = cl.createProgram(cl.loadSourceFromFile("customGBValuePerParticle.cl", replacements), defines);
perParticleValueKernel = cl::Kernel(program, "computePerParticleValues");
}
{
// Create the N2 energy kernel.
map<string, string> variables;
variables["r"] = "r";
for (int i = 0; i < force.getNumPerParticleParameters(); i++) {
const string& name = force.getPerParticleParameterName(i);
variables[name+"1"] = "params"+params->getParameterSuffix(i, "1");
variables[name+"2"] = "params"+params->getParameterSuffix(i, "2");
}
for (int i = 0; i < force.getNumComputedValues(); i++) {
variables[computedValueNames[i]+"1"] = "values"+computedValues->getParameterSuffix(i, "1");
variables[computedValueNames[i]+"2"] = "values"+computedValues->getParameterSuffix(i, "2");
}
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]";
map<string, Lepton::ParsedExpression> n2EnergyExpressions;
stringstream n2ForceSource;
bool anyExclusions = false;
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression;
CustomGBForce::ComputationType type;
force.getEnergyTermParameters(i, expression, type);
if (type == CustomGBForce::SingleParticle)
continue;
bool exclude = (type == CustomGBForce::ParticlePair);
anyExclusions != exclude;
string excludePrefix = (exclude ? "if (!isExcluded) " : "");
n2EnergyExpressions[excludePrefix+"tempEnergy += "] = Lepton::Parser::parse(expression, functions).optimize();
n2EnergyExpressions[excludePrefix+"dEdR += "] = Lepton::Parser::parse(expression, functions).differentiate("r").optimize();
n2ForceSource << OpenCLExpressionUtilities::createExpressions(n2EnergyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
}
map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = n2ForceSource.str();
stringstream extraArgs, loadLocal1, loadLocal2, load1, load2;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", __constant float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* global_" << paramName << ", __local " << buffer.getType() << "* local_" << paramName;
loadLocal1 << "local_" << paramName << "[get_local_id(0)] = " << paramName << "1;\n";
loadLocal2 << "local_" << paramName << "[get_local_id(0)] = global_" << paramName << "[j];\n";
load1 << buffer.getType() << " " << paramName << "1 = global_" << paramName << "[atom1];\n";
load2 << buffer.getType() << " " << paramName << "2 = local_" << paramName << "[atom2];\n";
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* global_" << valueName << ", __local " << buffer.getType() << "* local_" << valueName;
loadLocal1 << "local_" << valueName << "[get_local_id(0)] = " << valueName << "1;\n";
loadLocal2 << "local_" << valueName << "[get_local_id(0)] = global_" << valueName << "[j];\n";
load1 << buffer.getType() << " " << valueName << "1 = global_" << valueName << "[atom1];\n";
load2 << buffer.getType() << " " << valueName << "2 = local_" << valueName << "[atom2];\n";
}
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_1"] = loadLocal1.str();
replacements["LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"] = loadLocal2.str();
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
map<string, string> defines;
if (cl.getNonbondedUtilities().getForceBufferPerAtomBlock())
defines["USE_OUTPUT_BUFFER_PER_BLOCK"] = "1";
if (useCutoff)
defines["USE_CUTOFF"] = "1";
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
if (anyExclusions)
defines["USE_EXCLUSIONS"] = "1";
Vec3 boxVectors[3];
system.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
defines["PERIODIC_BOX_SIZE_X"] = doubleToString(boxVectors[0][0]);
defines["PERIODIC_BOX_SIZE_Y"] = doubleToString(boxVectors[1][1]);
defines["PERIODIC_BOX_SIZE_Z"] = doubleToString(boxVectors[2][2]);
defines["CUTOFF_SQUARED"] = doubleToString(force.getCutoffDistance()*force.getCutoffDistance());
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = intToString(cl.getPaddedNumAtoms());
string filename = (cl.getSIMDWidth() == 32 ? "customGBEnergyN2_nvidia.cl" : "customGBEnergyN2_default.cl");
cl::Program program = cl.createProgram(cl.loadSourceFromFile(filename, replacements), defines);
pairEnergyKernel = cl::Kernel(program, "computeN2Energy");
}
{
// Create the kernel to reduce the derivatives and calculate per-particle energy terms.
stringstream source, extraArgs;
if (force.getNumGlobalParameters() > 0)
extraArgs << ", __constant float* globals";
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
string paramName = "params"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* " << paramName;
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
string valueName = "values"+intToString(i+1);
extraArgs << ", __global " << buffer.getType() << "* " << valueName;
}
map<string, string> variables;
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
variables[force.getPerParticleParameterName(i)] = "params"+params->getParameterSuffix(i, "[index]");
for (int i = 0; i < force.getNumGlobalParameters(); i++)
variables[force.getGlobalParameterName(i)] = "globals["+intToString(i)+"]";
for (int i = 0; i < force.getNumComputedValues(); i++)
variables[computedValueNames[i]] = "values"+computedValues->getParameterSuffix(i, "[index]");
map<string, Lepton::ParsedExpression> energyExpressions;
for (int i = 0; i < force.getNumEnergyTerms(); i++) {
string expression;
CustomGBForce::ComputationType type;
force.getEnergyTermParameters(i, expression, type);
if (type != CustomGBForce::SingleParticle)
continue;
energyExpressions["/*"+intToString(i+1)+"*/ energy += "] = Lepton::Parser::parse(expression, functions).optimize();
}
source << OpenCLExpressionUtilities::createExpressions(energyExpressions, variables, functionDefinitions, "temp", prefix+"functionParams");
// for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
// string valueName = "values"+intToString(i+1);
// source << "global_" << valueName << "[index] = local_" << valueName << ";\n";
// }
map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = extraArgs.str()+tableArgs.str();
replacements["COMPUTE_ENERGY"] = source.str();
map<string, string> defines;
defines["NUM_ATOMS"] = intToString(cl.getNumAtoms());
cl::Program program = cl.createProgram(cl.loadSourceFromFile("customGBEnergyPerParticle.cl", replacements), defines);
perParticleEnergyKernel = cl::Kernel(program, "computePerParticleEnergy");
}
if (globals != NULL) {
globals->upload(globalParamValues);
......@@ -1467,16 +1596,74 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
pairValueKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
pairValueKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
}
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
pairValueKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, tabulatedFunctionParams->getDeviceBuffer());
}
index = 0;
reduceValueKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
reduceValueKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
reduceValueKernel.setArg<cl::Buffer>(index++, valueBuffers->getDeviceBuffer());
perParticleValueKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
perParticleValueKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
perParticleValueKernel.setArg<cl::Buffer>(index++, valueBuffers->getDeviceBuffer());
if (globals != NULL)
reduceValueKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
perParticleValueKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++)
reduceValueKernel.setArg<cl::Buffer>(index++, params->getBuffers()[i].getBuffer());
perParticleValueKernel.setArg<cl::Buffer>(index++, params->getBuffers()[i].getBuffer());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
reduceValueKernel.setArg<cl::Buffer>(index++, computedValues->getBuffers()[i].getBuffer());
perParticleValueKernel.setArg<cl::Buffer>(index++, computedValues->getBuffers()[i].getBuffer());
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
perParticleValueKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
perParticleValueKernel.setArg<cl::Buffer>(index++, tabulatedFunctionParams->getDeviceBuffer());
}
index = 0;
pairEnergyKernel.setArg<cl::Buffer>(index++, cl.getForceBuffers().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
pairEnergyKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
if (nb.getUseCutoff()) {
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractionFlags().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
}
else {
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getTiles().getDeviceBuffer());
pairEnergyKernel.setArg<cl_uint>(index++, nb.getTiles().getSize());
}
if (globals != NULL)
pairEnergyKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
pairEnergyKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
}
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++) {
const OpenCLNonbondedUtilities::ParameterInfo& buffer = computedValues->getBuffers()[i];
pairEnergyKernel.setArg<cl::Buffer>(index++, buffer.getBuffer());
pairEnergyKernel.setArg(index++, OpenCLContext::ThreadBlockSize*buffer.getSize(), NULL);
}
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
pairEnergyKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, tabulatedFunctionParams->getDeviceBuffer());
}
index = 0;
perParticleEnergyKernel.setArg<cl_int>(index++, cl.getPaddedNumAtoms());
perParticleEnergyKernel.setArg<cl_int>(index++, nb.getNumForceBuffers());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
if (globals != NULL)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, params->getBuffers()[i].getBuffer());
for (int i = 0; i < (int) computedValues->getBuffers().size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, computedValues->getBuffers()[i].getBuffer());
if (tabulatedFunctionParams != NULL) {
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
perParticleEnergyKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
perParticleEnergyKernel.setArg<cl::Buffer>(index++, tabulatedFunctionParams->getDeviceBuffer());
}
}
if (globals != NULL) {
bool changed = false;
......@@ -1491,7 +1678,9 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
}
cl.clearBuffer(*valueBuffers);
cl.executeKernel(pairValueKernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(reduceValueKernel, cl.getPaddedNumAtoms());
cl.executeKernel(perParticleValueKernel, cl.getPaddedNumAtoms());
cl.executeKernel(pairEnergyKernel, nb.getTiles().getSize()*OpenCLContext::TileSize);
cl.executeKernel(perParticleEnergyKernel, cl.getPaddedNumAtoms());
// vector<vector<cl_float> > values;
// computedValues->getParameterValues(values);
// for (int i = 0; i < cl.getNumAtoms(); i++)
......
......@@ -511,7 +511,7 @@ private:
std::vector<cl_float> globalParamValues;
std::vector<OpenCLArray<mm_float4>*> tabulatedFunctions;
System& system;
cl::Kernel pairValueKernel, reduceValueKernel, pairForceKernel, particleForceKernel;
cl::Kernel pairValueKernel, perParticleValueKernel, pairEnergyKernel, perParticleEnergyKernel;
};
/**
......
#define TILE_SIZE 32
/**
* Compute a force based on pair interactions.
*/
__kernel void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer, __local float4* local_force,
__global float4* posq, __local float4* local_posq, __local float4* tempBuffer, __global unsigned int* tiles,
#ifdef USE_CUTOFF
__global unsigned int* interactionFlags, __global unsigned int* interactionCount
#else
unsigned int numTiles
#endif
PARAMETER_ARGUMENTS) {
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
#endif
unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
unsigned int warp = get_global_id(0)/TILE_SIZE;
unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps;
float energy = 0.0f;
unsigned int lasty = 0xFFFFFFFF;
while (pos < end) {
// Extract the coordinates of this tile
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff)*TILE_SIZE;
bool hasExclusions = (x & 0x1);
x = (x>>17)*TILE_SIZE;
unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
unsigned int tbx = get_local_id(0) - tgx;
unsigned int atom1 = x + tgx;
float4 force = 0.0f;
float4 posq1 = posq[atom1];
LOAD_ATOM1_PARAMETERS
if (x == y) {
// This tile is on the diagonal.
local_posq[get_local_id(0)] = posq1;
LOAD_LOCAL_PARAMETERS_FROM_1
unsigned int xi = x/TILE_SIZE;
unsigned int tile = xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*(xi+1)/2;
#ifdef USE_EXCLUSIONS
unsigned int excl = exclusions[exclusionIndices[tile]+tgx];
#endif
for (unsigned int j = 0; j < TILE_SIZE; j++) {
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
int atom2 = tbx+j;
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
float dEdR = 0.0f;
float tempEnergy = 0.0f;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS && atom1 != atom2) {
COMPUTE_INTERACTION
}
energy += 0.5f*tempEnergy;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset = x + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
#else
unsigned int offset = x + tgx + warp*PADDED_NUM_ATOMS;
#endif
forceBuffers[offset].xyz += force.xyz;
}
else {
// This is an off-diagonal tile.
if (lasty != y) {
unsigned int j = y + tgx;
local_posq[get_local_id(0)] = posq[j];
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
local_force[get_local_id(0)] = 0.0f;
#ifdef USE_CUTOFF
unsigned int flags = interactionFlags[pos];
if (!hasExclusions && flags != 0xFFFFFFFF) {
if (flags == 0) {
// No interactions in this tile.
}
else {
// Compute only a subset of the interactions in this tile.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) {
bool isExcluded = false;
int atom2 = tbx+j;
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
LOAD_ATOM2_PARAMETERS
atom2 = y+j;
float dEdR = 0.0f;
float tempEnergy = 0.0f;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION
}
energy += tempEnergy;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
tempBuffer[get_local_id(0)].xyz = delta.xyz;
// Sum the forces on atom2.
if (tgx % 2 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+1];
if (tgx % 4 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+2];
if (tgx % 8 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+4];
if (tgx % 16 == 0)
tempBuffer[get_local_id(0)] += tempBuffer[get_local_id(0)+8];
if (tgx == 0)
local_force[tbx+j] += tempBuffer[get_local_id(0)] + tempBuffer[get_local_id(0)+16];
}
}
}
}
else
#endif
{
// Compute the full set of interactions in this tile.
unsigned int xi = x/TILE_SIZE;
unsigned int yi = y/TILE_SIZE;
unsigned int tile = xi+yi*PADDED_NUM_ATOMS/TILE_SIZE-yi*(yi+1)/2;
#ifdef USE_EXCLUSIONS
unsigned int excl = (hasExclusions ? exclusions[exclusionIndices[tile]+tgx] : 0xFFFFFFFF);
excl = (excl >> tgx) | (excl << (TILE_SIZE - tgx));
#endif
unsigned int tj = tgx;
for (unsigned int j = 0; j < TILE_SIZE; j++) {
#ifdef USE_EXCLUSIONS
bool isExcluded = !(excl & 0x1);
#endif
int atom2 = tbx+tj;
float4 posq2 = local_posq[atom2];
float4 delta = (float4) (posq2.xyz - posq1.xyz, 0.0f);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x/PERIODIC_BOX_SIZE_X+0.5f)*PERIODIC_BOX_SIZE_X;
delta.y -= floor(delta.y/PERIODIC_BOX_SIZE_Y+0.5f)*PERIODIC_BOX_SIZE_Y;
delta.z -= floor(delta.z/PERIODIC_BOX_SIZE_Z+0.5f)*PERIODIC_BOX_SIZE_Z;
#endif
float r = sqrt(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
LOAD_ATOM2_PARAMETERS
atom2 = y+tj;
float dEdR = 0.0f;
float tempEnergy = 0.0f;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
COMPUTE_INTERACTION
}
energy += tempEnergy;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
local_force[tbx+tj].xyz += delta.xyz;
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
tj = (tj + 1) & (TILE_SIZE - 1);
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
unsigned int offset1 = x + tgx + (y/TILE_SIZE)*PADDED_NUM_ATOMS;
unsigned int offset2 = y + tgx + (x/TILE_SIZE)*PADDED_NUM_ATOMS;
#else
unsigned int offset1 = x + tgx + warp*PADDED_NUM_ATOMS;
unsigned int offset2 = y + tgx + warp*PADDED_NUM_ATOMS;
#endif
forceBuffers[offset1].xyz += force.xyz;
forceBuffers[offset2].xyz += local_force[get_local_id(0)].xyz;
lasty = y;
}
pos++;
}
energyBuffer[get_global_id(0)] += energy;
}
/**
* Reduce the derivatives computed in the N^2 energy kernel, and compute all per-particle energy terms.
*/
__kernel void computePerParticleEnergy(int bufferSize, int numBuffers, __global float* energyBuffer
PARAMETER_ARGUMENTS) {
float energy = 0.0f;
unsigned int index = get_global_id(0);
while (index < NUM_ATOMS) {
// Reduce the derivatives
// int totalSize = bufferSize*numBuffers;
// float sum = valueBuffers[index];
// for (int i = index+bufferSize; i < totalSize; i += bufferSize)
// sum += valueBuffers[i];
// Now calculate the per-particle energy terms.
COMPUTE_ENERGY
index += get_global_size(0);
}
energyBuffer[get_global_id(0)] += energy;
}
......@@ -2,7 +2,7 @@
* Reduce a pairwise computed value, and compute per-particle values.
*/
__kernel void reduceGBValue(int bufferSize, int numBuffers, __global float* valueBuffers
__kernel void computePerParticleValues(int bufferSize, int numBuffers, __global float* valueBuffers
PARAMETER_ARGUMENTS) {
unsigned int index = get_global_id(0);
while (index < NUM_ATOMS) {
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the OpenCL implementation of CustomGBForce.
*/
#include "../../../tests/AssertionUtilities.h"
#include "../src/sfmt/SFMT.h"
#include "openmm/Context.h"
#include "OpenCLPlatform.h"
#include "openmm/CustomGBForce.h"
#include "openmm/GBSAOBCForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
void testOBC(GBSAOBCForce::NonbondedMethod obcMethod, CustomGBForce::NonbondedMethod customMethod) {
const int numMolecules = 70;
const int numParticles = numMolecules*2;
const double boxSize = 10.0;
OpenCLPlatform platform;
// Create two systems: one with a GBSAOBCForce, and one using a CustomGBForce to implement the same interaction.
System standardSystem;
System customSystem;
for (int i = 0; i < numParticles; i++) {
standardSystem.addParticle(1.0);
customSystem.addParticle(1.0);
}
standardSystem.setPeriodicBoxVectors(Vec3(boxSize, 0.0, 0.0), Vec3(0.0, boxSize, 0.0), Vec3(0.0, 0.0, boxSize));
customSystem.setPeriodicBoxVectors(Vec3(boxSize, 0.0, 0.0), Vec3(0.0, boxSize, 0.0), Vec3(0.0, 0.0, boxSize));
GBSAOBCForce* obc = new GBSAOBCForce();
CustomGBForce* custom = new CustomGBForce();
obc->setCutoffDistance(2.0);
custom->setCutoffDistance(2.0);
custom->addPerParticleParameter("q");
custom->addPerParticleParameter("radius");
custom->addPerParticleParameter("scale");
custom->addGlobalParameter("solventDielectric", obc->getSolventDielectric());
custom->addGlobalParameter("soluteDielectric", obc->getSoluteDielectric());
custom->addComputedValue("I", "step(r+sr2-or1)*0.5*(1/L-1/U+0.25*(r*(1/U^2-1/L^2))+0.5*log(L/U)/r+0.25*(sr2*sr2/r)*(1/L^2-1/U^2)+C);"
"U=r+sr2;"
"C=2*(1/or1-1/L)*step(sr2-r-or1);"
"L=step(or1-D)*or1+step(D-or1)*D;"
"D=step(r-sr2)*(r-sr2)+step(sr2-r)*(sr2-r);"
"sr1 = scale1*or1; sr2 = scale2*or2;"
"or1 = radius1-0.009; or2 = radius2-0.009", CustomGBForce::ParticlePairNoExclusions);
custom->addComputedValue("B", "1/(1/or-tanh(1*psi-0.8*psi^2+4.85*psi^3)/radius);"
"psi=I*or; or=radius-0.009", CustomGBForce::SingleParticle);
custom->addEnergyTerm("28.3919551*(radius+0.14)^2*(radius/B)^6-0.5*138.935485*(1/soluteDielectric-1/solventDielectric)*q^2/B", CustomGBForce::SingleParticle);
custom->addEnergyTerm("-138.935485*(1/soluteDielectric-1/solventDielectric)*q1*q2/f;"
"f=sqrt(r^2+B1*B2*exp(-r^2/(4*B1*B2)))", CustomGBForce::ParticlePairNoExclusions);
vector<Vec3> positions(numParticles);
vector<Vec3> velocities(numParticles);
init_gen_rand(0);
vector<double> params(3);
for (int i = 0; i < numMolecules; i++) {
if (i < numMolecules/2) {
obc->addParticle(1.0, 0.2, 0.5);
params[0] = 1.0;
params[1] = 0.2;
params[2] = 0.5;
custom->addParticle(params);
obc->addParticle(-1.0, 0.1, 0.5);
params[0] = -1.0;
params[1] = 0.1;
custom->addParticle(params);
}
else {
obc->addParticle(1.0, 0.2, 0.8);
params[0] = 1.0;
params[1] = 0.2;
params[2] = 0.8;
custom->addParticle(params);
obc->addParticle(-1.0, 0.1, 0.8);
params[0] = -1.0;
params[1] = 0.1;
custom->addParticle(params);
}
positions[2*i] = Vec3(boxSize*genrand_real2(), boxSize*genrand_real2(), boxSize*genrand_real2());
positions[2*i+1] = Vec3(positions[2*i][0]+1.0, positions[2*i][1], positions[2*i][2]);
velocities[2*i] = Vec3(genrand_real2(), genrand_real2(), genrand_real2());
velocities[2*i+1] = Vec3(genrand_real2(), genrand_real2(), genrand_real2());
}
obc->setNonbondedMethod(obcMethod);
custom->setNonbondedMethod(customMethod);
standardSystem.addForce(obc);
customSystem.addForce(custom);
VerletIntegrator integrator1(0.01);
VerletIntegrator integrator2(0.01);
Context context1(standardSystem, integrator1, platform);
context1.setPositions(positions);
context1.setVelocities(velocities);
State state1 = context1.getState(State::Forces | State::Energy);
Context context2(customSystem, integrator2, platform);
context2.setPositions(positions);
context2.setVelocities(velocities);
State state2 = context2.getState(State::Forces | State::Energy);
ASSERT_EQUAL_TOL(state1.getPotentialEnergy(), state2.getPotentialEnergy(), 1e-4);
for (int i = 0; i < numParticles; i++) {
ASSERT_EQUAL_VEC(state1.getForces()[i], state2.getForces()[i], 1e-4);
}
}
void testTabulatedFunction(bool interpolating) {
OpenCLPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
CustomGBForce* force = new CustomGBForce();
force->addComputedValue("a", "0", CustomGBForce::ParticlePair);
force->addEnergyTerm("fn(r)+1", CustomGBForce::ParticlePair);
force->addParticle(vector<double>());
force->addParticle(vector<double>());
vector<double> table;
for (int i = 0; i < 21; i++)
table.push_back(std::sin(0.25*i));
force->addFunction("fn", table, 1.0, 6.0, interpolating);
system.addForce(force);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
for (int i = 1; i < 30; i++) {
double x = (7.0/30.0)*i;
positions[1] = Vec3(x, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces();
double force = (x < 1.0 || x > 6.0 ? 0.0 : -std::cos(x-1.0));
double energy = (x < 1.0 || x > 6.0 ? 0.0 : std::sin(x-1.0))+1.0;
ASSERT_EQUAL_VEC(Vec3(-force, 0, 0), forces[0], 0.1);
ASSERT_EQUAL_VEC(Vec3(force, 0, 0), forces[1], 0.1);
ASSERT_EQUAL_TOL(energy, state.getPotentialEnergy(), 0.02);
}
}
int main() {
try {
testOBC(GBSAOBCForce::NoCutoff, CustomGBForce::NoCutoff);
// testOBC(GBSAOBCForce::CutoffNonPeriodic, CustomGBForce::CutoffNonPeriodic);
// testOBC(GBSAOBCForce::CutoffPeriodic, CustomGBForce::CutoffPeriodic);
// testTabulatedFunction(true);
// testTabulatedFunction(false);
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment