Commit fd473eea authored by Peter Eastman's avatar Peter Eastman
Browse files

Merge branch 'master' into nucleic

parents 0a751b5b 6a985cfd
......@@ -102,6 +102,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return new OpenCLCalcCustomExternalForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomHbondForceKernel::Name())
return new OpenCLCalcCustomHbondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomCentroidBondForceKernel::Name())
return new OpenCLCalcCustomCentroidBondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomCompoundBondForceKernel::Name())
return new OpenCLCalcCustomCompoundBondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomManyParticleForceKernel::Name())
......
......@@ -31,6 +31,7 @@
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/CMAPTorsionForceImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomCentroidBondForceImpl.h"
#include "openmm/internal/CustomCompoundBondForceImpl.h"
#include "openmm/internal/CustomHbondForceImpl.h"
#include "openmm/internal/CustomManyParticleForceImpl.h"
......@@ -127,7 +128,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, boo
double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cl.getBondedUtilities().computeInteractions(groups);
cl.getNonbondedUtilities().computeInteractions(groups);
cl.getNonbondedUtilities().computeInteractions(groups, includeForces, includeEnergy);
double sum = 0.0;
for (vector<OpenCLContext::ForcePostComputation*>::iterator iter = cl.getPostComputations().begin(); iter != cl.getPostComputations().end(); ++iter)
sum += (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
......@@ -135,7 +136,7 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
cl.getIntegrationUtilities().distributeForcesFromVirtualSites();
if (includeEnergy) {
OpenCLArray& energyArray = cl.getEnergyBuffer();
if (cl.getUseDoublePrecision()) {
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double* energy = (double*) cl.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
......@@ -1551,8 +1552,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
else
cl.getPosq().upload(posqf);
sigmaEpsilon->upload(sigmaEpsilonVector);
bool useCutoff = (force.getNonbondedMethod() != NonbondedForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != NonbondedForce::NoCutoff && force.getNonbondedMethod() != NonbondedForce::CutoffNonPeriodic);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
map<string, string> defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
......@@ -1580,7 +1582,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
dispersionCoefficient = 0.0;
alpha = 0;
ewaldSelfEnergy = 0.0;
if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
if (nonbondedMethod == Ewald) {
// Compute the Ewald parameters.
int kmaxx, kmaxy, kmaxz;
......@@ -1606,10 +1608,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
cosSinSums = new OpenCLArray(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if (force.getNonbondedMethod() == NonbondedForce::PME) {
else if (nonbondedMethod == PME) {
// Compute the PME parameters.
int gridSizeX, gridSizeY, gridSizeZ;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
gridSizeX = OpenCLFFT3D::findLegalDimension(gridSizeX);
gridSizeY = OpenCLFFT3D::findLegalDimension(gridSizeY);
......@@ -2056,14 +2057,26 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
// Compute other values.
NonbondedForce::NonbondedMethod method = force.getNonbondedMethod();
if (method == NonbondedForce::Ewald || method == NonbondedForce::PME)
if (nonbondedMethod == Ewald || nonbondedMethod == PME)
ewaldSelfEnergy = (cl.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0 && (method == NonbondedForce::CutoffPeriodic || method == NonbondedForce::Ewald || method == NonbondedForce::PME))
if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cl.invalidateMolecules();
}
void OpenCLCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (cl.getPlatformData().useCpuPme)
cpuPme.getAs<CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = this->alpha;
nx = gridSizeX;
ny = gridSizeY;
nz = gridSizeZ;
}
}
class OpenCLCustomNonbondedForceInfo : public OpenCLForceInfo {
public:
OpenCLCustomNonbondedForceInfo(int requiredBuffers, const CustomNonbondedForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
......@@ -3642,7 +3655,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
if (useLong) {
pairEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs->getDeviceBuffer());
for (int i = 0; i < numComputedValues; ++i)
pairEnergyKernel.setArg(index++, nb.getForceThreadBlockSize()*elementSize, NULL);
pairEnergyKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*elementSize, NULL);
}
else {
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
......@@ -3808,7 +3821,9 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction()).optimize();
map<string, Lepton::CustomFunction*> customFunctions;
customFunctions["periodicdistance"] = cl.getExpressionUtilities().getPeriodicDistancePlaceholder();
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), customFunctions).optimize();
Lepton::ParsedExpression forceExpressionX = energyExpression.differentiate("x").optimize();
Lepton::ParsedExpression forceExpressionY = energyExpression.differentiate("y").optimize();
Lepton::ParsedExpression forceExpressionZ = energyExpression.differentiate("z").optimize();
......@@ -4433,6 +4448,442 @@ void OpenCLCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& cont
cl.invalidateMolecules();
}
class OpenCLCustomCentroidBondForceInfo : public OpenCLForceInfo {
public:
OpenCLCustomCentroidBondForceInfo(const CustomCentroidBondForce& force) : OpenCLForceInfo(0), force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, vector<int>& particles) {
vector<double> parameters;
vector<int> groups;
force.getBondParameters(index, groups, parameters);
for (int i = 0; i < groups.size(); i++) {
vector<int> groupParticles;
vector<double> weights;
force.getGroupParameters(groups[i], groupParticles, weights);
particles.insert(particles.end(), groupParticles.begin(), groupParticles.end());
}
}
bool areGroupsIdentical(int group1, int group2) {
vector<int> groups1, groups2;
vector<double> parameters1, parameters2;
force.getBondParameters(group1, groups1, parameters1);
force.getBondParameters(group2, groups2, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
for (int i = 0; i < groups1.size(); i++) {
vector<int> groupParticles;
vector<double> weights1, weights2;
force.getGroupParameters(groups1[i], groupParticles, weights1);
force.getGroupParameters(groups2[i], groupParticles, weights2);
if (weights1.size() != weights2.size())
return false;
for (int j = 0; j < weights1.size(); j++)
if (weights1[j] != weights2[j])
return false;
}
return true;
}
private:
const CustomCentroidBondForce& force;
};
OpenCLCalcCustomCentroidBondForceKernel::~OpenCLCalcCustomCentroidBondForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (groupParticles != NULL)
delete groupParticles;
if (groupWeights != NULL)
delete groupWeights;
if (groupOffsets != NULL)
delete groupOffsets;
if (groupForces != NULL)
delete groupForces;
if (bondGroups != NULL)
delete bondGroups;
if (centerPositions != NULL)
delete centerPositions;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i];
}
void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
numBonds = force.getNumBonds();
if (numBonds == 0)
return;
if (!cl.getSupports64BitGlobalAtomics())
throw OpenMMException("CustomCentroidBondForce requires a device that supports 64 bit atomic operations");
cl.addForce(new OpenCLCustomCentroidBondForceInfo(force));
// Record the groups.
numGroups = force.getNumGroups();
vector<cl_int> groupParticleVec;
vector<cl_float> groupWeightVecFloat;
vector<cl_double> groupWeightVecDouble;
vector<cl_int> groupOffsetVec;
groupOffsetVec.push_back(0);
for (int i = 0; i < numGroups; i++) {
vector<int> particles;
vector<double> weights;
force.getGroupParameters(i, particles, weights);
groupParticleVec.insert(groupParticleVec.end(), particles.begin(), particles.end());
groupOffsetVec.push_back(groupParticleVec.size());
}
vector<vector<double> > normalizedWeights;
CustomCentroidBondForceImpl::computeNormalizedWeights(force, system, normalizedWeights);
if (cl.getUseDoublePrecision()) {
for (int i = 0; i < numGroups; i++)
groupWeightVecDouble.insert(groupWeightVecDouble.end(), normalizedWeights[i].begin(), normalizedWeights[i].end());
}
else {
for (int i = 0; i < numGroups; i++)
for (int j = 0; j < normalizedWeights[i].size(); j++)
groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
}
groupParticles = OpenCLArray::create<int>(cl, groupParticleVec.size(), "groupParticles");
groupParticles->upload(groupParticleVec);
if (cl.getUseDoublePrecision()) {
groupWeights = OpenCLArray::create<double>(cl, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecDouble);
centerPositions = OpenCLArray::create<mm_double4>(cl, numGroups, "centerPositions");
}
else {
groupWeights = OpenCLArray::create<float>(cl, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecFloat);
centerPositions = OpenCLArray::create<mm_float4>(cl, numGroups, "centerPositions");
}
groupOffsets = OpenCLArray::create<int>(cl, groupOffsetVec.size(), "groupOffsets");
groupOffsets->upload(groupOffsetVec);
groupForces = OpenCLArray::create<long long>(cl, numGroups*3, "groupForces");
cl.addAutoclearBuffer(*groupForces);
// Record the bonds.
int groupsPerBond = force.getNumGroupsPerBond();
vector<cl_int> bondGroupVec(numBonds*groupsPerBond);
params = new OpenCLParameterSet(cl, force.getNumPerBondParameters(), numBonds, "customCentroidBondParams");
vector<vector<float> > paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
vector<int> groups;
vector<double> parameters;
force.getBondParameters(i, groups, parameters);
for (int j = 0; j < groups.size(); j++)
bondGroupVec[i+j*numBonds] = groups[j];
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
bondGroups = OpenCLArray::create<int>(cl, bondGroupVec.size(), "bondGroups");
bondGroups->upload(bondGroupVec);
// Record the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream extraArgs;
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string arrayName = "table"+cl.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(OpenCLArray::create<float>(cl, f.size(), "TabulatedFunction"));
tabulatedFunctions.back()->upload(f);
extraArgs << ", __global const float";
if (width > 1)
extraArgs << width;
extraArgs << "* restrict " << arrayName;
}
// Record information about parameters.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
map<string, string> variables;
for (int i = 0; i < groupsPerBond; i++) {
string index = cl.intToString(i+1);
variables["x"+index] = "pos"+index+".x";
variables["y"+index] = "pos"+index+".y";
variables["z"+index] = "pos"+index+".z";
}
for (int i = 0; i < force.getNumPerBondParameters(); i++) {
const string& name = force.getPerBondParameterName(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = OpenCLArray::create<float>(cl, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals->upload(globalParamValues);
extraArgs << ", __global const float* restrict globals";
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cl.intToString(i)+"]";
variables[name] = value;
}
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
// and dihedrals the expression depends on.
map<string, vector<int> > distances;
map<string, vector<int> > angles;
map<string, vector<int> > dihedrals;
Lepton::ParsedExpression energyExpression = CustomCentroidBondForceImpl::prepareExpression(force, functions, distances, angles, dihedrals);
map<string, Lepton::ParsedExpression> forceExpressions;
set<string> computedDeltas;
vector<string> atomNames, posNames;
for (int i = 0; i < groupsPerBond; i++) {
string index = cl.intToString(i+1);
atomNames.push_back("P"+index);
posNames.push_back("pos"+index);
}
stringstream compute;
for (int i = 0; i < groupsPerBond; i++) {
compute<<"int group"<<(i+1)<<" = bondGroups[index+"<<(i*numBonds)<<"];\n";
compute<<"real4 pos"<<(i+1)<<" = centerPositions[group"<<(i+1)<<"];\n";
}
int index = 0;
for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName = atomNames[groups[0]]+atomNames[groups[1]];
if (computedDeltas.count(deltaName) == 0) {
compute<<"real4 delta"<<deltaName<<" = delta("<<posNames[groups[0]]<<", "<<posNames[groups[1]]<<");\n";
computedDeltas.insert(deltaName);
}
compute<<"real r_"<<deltaName<<" = sqrt(delta"<<deltaName<<".w);\n";
variables[iter->first] = "r_"+deltaName;
forceExpressions["real dEdDistance"+cl.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[1]]+atomNames[groups[0]];
string deltaName2 = atomNames[groups[1]]+atomNames[groups[2]];
string angleName = "angle_"+atomNames[groups[0]]+atomNames[groups[1]]+atomNames[groups[2]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[groups[1]]<<", "<<posNames[groups[0]]<<");\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[groups[1]]<<", "<<posNames[groups[2]]<<");\n";
computedDeltas.insert(deltaName2);
}
compute<<"real "<<angleName<<" = computeAngle(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
variables[iter->first] = angleName;
forceExpressions["real dEdAngle"+cl.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[0]]+atomNames[groups[1]];
string deltaName2 = atomNames[groups[2]]+atomNames[groups[1]];
string deltaName3 = atomNames[groups[2]]+atomNames[groups[3]];
string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
string dihedralName = "dihedral_"+atomNames[groups[0]]+atomNames[groups[1]]+atomNames[groups[2]]+atomNames[groups[3]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[groups[0]]<<", "<<posNames[groups[1]]<<");\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[groups[2]]<<", "<<posNames[groups[1]]<<");\n";
computedDeltas.insert(deltaName2);
}
if (computedDeltas.count(deltaName3) == 0) {
compute<<"real4 delta"<<deltaName3<<" = delta("<<posNames[groups[2]]<<", "<<posNames[groups[3]]<<");\n";
computedDeltas.insert(deltaName3);
}
compute<<"real4 "<<crossName1<<" = computeCross(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
compute<<"real4 "<<crossName2<<" = computeCross(delta"<<deltaName2<<", delta"<<deltaName3<<");\n";
compute<<"real "<<dihedralName<<" = computeAngle("<<crossName1<<", "<<crossName2<<");\n";
compute<<dihedralName<<" *= (delta"<<deltaName1<<".x*"<<crossName2<<".x + delta"<<deltaName1<<".y*"<<crossName2<<".y + delta"<<deltaName1<<".z*"<<crossName2<<".z < 0 ? -1 : 1);\n";
variables[iter->first] = dihedralName;
forceExpressions["real dEdDihedral"+cl.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
// Now evaluate the expressions.
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
extraArgs<<", __global const "<<buffer.getType()<<"* restrict globalParams"<<i;
compute<<buffer.getType()<<" bondParams"<<(i+1)<<" = globalParams"<<i<<"[index];\n";
}
forceExpressions["energy += "] = energyExpression;
compute << cl.getExpressionUtilities().createExpressions(forceExpressions, variables, functionList, functionDefinitions, "temp");
// Finally, apply forces to groups.
vector<string> forceNames;
for (int i = 0; i < groupsPerBond; i++) {
string istr = cl.intToString(i+1);
string forceName = "force"+istr;
forceNames.push_back(forceName);
compute<<"real3 "<<forceName<<" = (real3) 0;\n";
compute<<"{\n";
Lepton::ParsedExpression forceExpressionX = energyExpression.differentiate("x"+istr).optimize();
Lepton::ParsedExpression forceExpressionY = energyExpression.differentiate("y"+istr).optimize();
Lepton::ParsedExpression forceExpressionZ = energyExpression.differentiate("z"+istr).optimize();
map<string, Lepton::ParsedExpression> expressions;
if (!isZeroExpression(forceExpressionX))
expressions[forceName+".x -= "] = forceExpressionX;
if (!isZeroExpression(forceExpressionY))
expressions[forceName+".y -= "] = forceExpressionY;
if (!isZeroExpression(forceExpressionZ))
expressions[forceName+".z -= "] = forceExpressionZ;
if (expressions.size() > 0)
compute<<cl.getExpressionUtilities().createExpressions(expressions, variables, functionList, functionDefinitions, "coordtemp");
compute<<"}\n";
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName = atomNames[groups[0]]+atomNames[groups[1]];
string value = "(dEdDistance"+cl.intToString(index)+"/r_"+deltaName+")*delta"+deltaName+".xyz";
compute<<forceNames[groups[0]]<<" += "<<"-"<<value<<";\n";
compute<<forceNames[groups[1]]<<" += "<<value<<";\n";
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[1]]+atomNames[groups[0]];
string deltaName2 = atomNames[groups[1]]+atomNames[groups[2]];
compute<<"{\n";
compute<<"real4 crossProd = cross(delta"<<deltaName2<<", delta"<<deltaName1<<");\n";
compute<<"real lengthCross = max(length(crossProd), (real) 1e-6f);\n";
compute<<"real4 deltaCross0 = -cross(delta"<<deltaName1<<", crossProd)*dEdAngle"<<cl.intToString(index)<<"/(delta"<<deltaName1<<".w*lengthCross);\n";
compute<<"real4 deltaCross2 = cross(delta"<<deltaName2<<", crossProd)*dEdAngle"<<cl.intToString(index)<<"/(delta"<<deltaName2<<".w*lengthCross);\n";
compute<<"real4 deltaCross1 = -(deltaCross0+deltaCross2);\n";
compute<<forceNames[groups[0]]<<".xyz += deltaCross0.xyz;\n";
compute<<forceNames[groups[1]]<<".xyz += deltaCross1.xyz;\n";
compute<<forceNames[groups[2]]<<".xyz += deltaCross2.xyz;\n";
compute<<"}\n";
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[0]]+atomNames[groups[1]];
string deltaName2 = atomNames[groups[2]]+atomNames[groups[1]];
string deltaName3 = atomNames[groups[2]]+atomNames[groups[3]];
string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
compute<<"{\n";
compute<<"real r = sqrt(delta"<<deltaName2<<".w);\n";
compute<<"real4 ff;\n";
compute<<"ff.x = (-dEdDihedral"<<cl.intToString(index)<<"*r)/"<<crossName1<<".w;\n";
compute<<"ff.y = (delta"<<deltaName1<<".x*delta"<<deltaName2<<".x + delta"<<deltaName1<<".y*delta"<<deltaName2<<".y + delta"<<deltaName1<<".z*delta"<<deltaName2<<".z)/delta"<<deltaName2<<".w;\n";
compute<<"ff.z = (delta"<<deltaName3<<".x*delta"<<deltaName2<<".x + delta"<<deltaName3<<".y*delta"<<deltaName2<<".y + delta"<<deltaName3<<".z*delta"<<deltaName2<<".z)/delta"<<deltaName2<<".w;\n";
compute<<"ff.w = (dEdDihedral"<<cl.intToString(index)<<"*r)/"<<crossName2<<".w;\n";
compute<<"real4 internalF0 = ff.x*"<<crossName1<<";\n";
compute<<"real4 internalF3 = ff.w*"<<crossName2<<";\n";
compute<<"real4 s = ff.y*internalF0 - ff.z*internalF3;\n";
compute<<forceNames[groups[0]]<<".xyz += internalF0.xyz;\n";
compute<<forceNames[groups[1]]<<".xyz += s.xyz-internalF0.xyz;\n";
compute<<forceNames[groups[2]]<<".xyz += -s.xyz-internalF3.xyz;\n";
compute<<forceNames[groups[3]]<<".xyz += internalF3.xyz;\n";
compute<<"}\n";
}
// Save the forces to global memory.
for (int i = 0; i < groupsPerBond; i++) {
compute<<"atom_add(&groupForce[group"<<(i+1)<<"], (long) (force"<<(i+1)<<".x*0x100000000));\n";
compute<<"atom_add(&groupForce[group"<<(i+1)<<"+NUM_GROUPS], (long) (force"<<(i+1)<<".y*0x100000000));\n";
compute<<"atom_add(&groupForce[group"<<(i+1)<<"+NUM_GROUPS*2], (long) (force"<<(i+1)<<".z*0x100000000));\n";
}
map<string, string> replacements;
replacements["M_PI"] = cl.doubleToString(M_PI);
replacements["NUM_GROUPS"] = cl.intToString(numGroups);
replacements["NUM_BONDS"] = cl.intToString(numBonds);
replacements["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
replacements["EXTRA_ARGS"] = extraArgs.str();
replacements["COMPUTE_FORCE"] = compute.str();
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customCentroidBond, replacements));
index = 0;
computeCentersKernel = cl::Kernel(program, "computeGroupCenters");
computeCentersKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupParticles->getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupWeights->getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupOffsets->getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, centerPositions->getDeviceBuffer());
index = 0;
groupForcesKernel = cl::Kernel(program, "computeGroupForces");
groupForcesKernel.setArg<cl::Buffer>(index++, groupForces->getDeviceBuffer());
index++; // Energy buffer hasn't been created yet
groupForcesKernel.setArg<cl::Buffer>(index++, centerPositions->getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(index++, bondGroups->getDeviceBuffer());
for (int i = 0; i < tabulatedFunctions.size(); i++)
groupForcesKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
if (globals != NULL)
groupForcesKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++)
groupForcesKernel.setArg<cl::Memory>(index++, params->getBuffers()[i].getMemory());
index = 0;
applyForcesKernel = cl::Kernel(program, "applyForcesToAtoms");
applyForcesKernel.setArg<cl::Buffer>(index++, groupParticles->getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupWeights->getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupOffsets->getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupForces->getDeviceBuffer());
}
double OpenCLCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numBonds == 0)
return 0.0;
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
cl.executeKernel(computeCentersKernel, OpenCLContext::TileSize*numGroups);
groupForcesKernel.setArg<cl::Buffer>(1, cl.getEnergyBuffer().getDeviceBuffer());
cl.executeKernel(groupForcesKernel, numBonds);
applyForcesKernel.setArg<cl::Buffer>(4, cl.getLongForceBuffer().getDeviceBuffer());
cl.executeKernel(applyForcesKernel, OpenCLContext::TileSize*numGroups);
return 0.0;
}
void OpenCLCalcCustomCentroidBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force) {
if (numBonds != force.getNumBonds())
throw OpenMMException("updateParametersInContext: The number of bonds has changed");
if (numBonds == 0)
return;
// Record the per-bond parameters.
vector<vector<float> > paramVector(numBonds);
vector<int> particles;
vector<double> parameters;
for (int i = 0; i < numBonds; i++) {
force.getBondParameters(i, particles, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cl.invalidateMolecules();
}
class OpenCLCustomCompoundBondForceInfo : public OpenCLForceInfo {
public:
OpenCLCustomCompoundBondForceInfo(const CustomCompoundBondForce& force) : OpenCLForceInfo(0), force(force) {
......
......@@ -180,13 +180,29 @@ void OpenCLNonbondedUtilities::requestExclusions(const vector<vector<int> >& exc
}
static bool compareUshort2(mm_ushort2 a, mm_ushort2 b) {
// This version is used on devices with SIMD width of 32 or less. It sorts tiles to improve cache efficiency.
return ((a.y < b.y) || (a.y == b.y && a.x < b.x));
}
static bool compareUshort2LargeSIMD(mm_ushort2 a, mm_ushort2 b) {
// This version is used on devices with SIMD width greater than 32. It puts diagonal tiles before off-diagonal
// ones to reduce thread divergence.
if (a.x == a.y) {
if (b.x == b.y)
return (a.x < b.x);
return true;
}
if (b.x == b.y)
return false;
return ((a.y < b.y) || (a.y == b.y && a.x < b.x));
}
void OpenCLNonbondedUtilities::initialize(const System& system) {
if (atomExclusions.size() == 0) {
// No exclusions were specifically requested, so just mark every atom as not interacting with itself.
atomExclusions.resize(context.getNumAtoms());
for (int i = 0; i < (int) atomExclusions.size(); i++)
atomExclusions[i].push_back(i);
......@@ -199,7 +215,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
setAtomBlockRange(context.getContextIndex()/(double) numContexts, (context.getContextIndex()+1)/(double) numContexts);
// Build a list of tiles that contain exclusions.
set<pair<int, int> > tilesWithExclusions;
for (int atom1 = 0; atom1 < (int) atomExclusions.size(); ++atom1) {
int x = atom1/OpenCLContext::TileSize;
......@@ -212,7 +228,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
vector<mm_ushort2> exclusionTilesVec;
for (set<pair<int, int> >::const_iterator iter = tilesWithExclusions.begin(); iter != tilesWithExclusions.end(); ++iter)
exclusionTilesVec.push_back(mm_ushort2((unsigned short) iter->first, (unsigned short) iter->second));
sort(exclusionTilesVec.begin(), exclusionTilesVec.end(), compareUshort2);
sort(exclusionTilesVec.begin(), exclusionTilesVec.end(), context.getSIMDWidth() <= 32 ? compareUshort2 : compareUshort2LargeSIMD);
exclusionTiles = OpenCLArray::create<mm_ushort2>(context, exclusionTilesVec.size(), "exclusionTiles");
exclusionTiles->upload(exclusionTilesVec);
map<pair<int, int>, int> exclusionTileMap;
......@@ -341,37 +357,43 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
if (lastCutoff != kernels.cutoffDistance)
forceRebuildNeighborList = true;
setPeriodicBoxArgs(context, kernels.findBlockBoundsKernel, 1);
context.executeKernel(kernels.findBlockBoundsKernel, context.getNumAtoms());
blockSorter->sort(*sortedBlocks);
kernels.sortBoxDataKernel.setArg<cl_int>(9, forceRebuildNeighborList);
context.executeKernel(kernels.sortBoxDataKernel, context.getNumAtoms());
setPeriodicBoxArgs(context, kernels.findInteractingBlocksKernel, 0);
context.executeKernel(kernels.findInteractingBlocksKernel, context.getNumAtoms(), interactingBlocksThreadBlockSize);
forceRebuildNeighborList = false;
bool rebuild = false;
do {
setPeriodicBoxArgs(context, kernels.findBlockBoundsKernel, 1);
context.executeKernel(kernels.findBlockBoundsKernel, context.getNumAtoms());
blockSorter->sort(*sortedBlocks);
kernels.sortBoxDataKernel.setArg<cl_int>(9, forceRebuildNeighborList);
context.executeKernel(kernels.sortBoxDataKernel, context.getNumAtoms());
setPeriodicBoxArgs(context, kernels.findInteractingBlocksKernel, 0);
context.executeKernel(kernels.findInteractingBlocksKernel, context.getNumAtoms(), interactingBlocksThreadBlockSize);
forceRebuildNeighborList = false;
if (context.getComputeForceCount() == 1)
rebuild = updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough.
} while (rebuild);
lastCutoff = kernels.cutoffDistance;
}
void OpenCLNonbondedUtilities::computeInteractions(int forceGroups) {
void OpenCLNonbondedUtilities::computeInteractions(int forceGroups, bool includeForces, bool includeEnergy) {
if ((forceGroups&groupFlags) == 0)
return;
KernelSet& kernels = groupKernels[forceGroups];
if (kernels.hasForces) {
cl::Kernel& kernel = (includeForces ? (includeEnergy ? kernels.forceEnergyKernel : kernels.forceKernel) : kernels.energyKernel);
if (*reinterpret_cast<cl_kernel*>(&kernel) == NULL)
kernel = createInteractionKernel(kernels.source, parameters, arguments, true, true, forceGroups, includeForces, includeEnergy);
if (useCutoff)
setPeriodicBoxArgs(context, kernels.forceKernel, 9);
context.executeKernel(kernels.forceKernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
if (context.getComputeForceCount() == 1)
updateNeighborListSize(); // This is the first time step, so check whether our initial guess was large enough.
setPeriodicBoxArgs(context, kernel, 9);
context.executeKernel(kernel, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
}
void OpenCLNonbondedUtilities::updateNeighborListSize() {
bool OpenCLNonbondedUtilities::updateNeighborListSize() {
if (!useCutoff)
return;
return false;
unsigned int* pinnedInteractionCount = (unsigned int*) context.getPinnedBuffer();
interactionCount->download(pinnedInteractionCount);
if (pinnedInteractionCount[0] <= (unsigned int) interactingTiles->getSize())
return;
return false;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
......@@ -387,14 +409,28 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles");
interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms");
for (map<int, KernelSet>::iterator iter = groupKernels.begin(); iter != groupKernels.end(); ++iter) {
iter->second.forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
iter->second.forceKernel.setArg<cl_uint>(14, maxTiles);
iter->second.forceKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer());
iter->second.findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
iter->second.findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer());
iter->second.findInteractingBlocksKernel.setArg<cl_uint>(9, maxTiles);
KernelSet& kernels = iter->second;
if (*reinterpret_cast<cl_kernel*>(&kernels.forceKernel) != NULL) {
kernels.forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
kernels.forceKernel.setArg<cl_uint>(14, maxTiles);
kernels.forceKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer());
}
if (*reinterpret_cast<cl_kernel*>(&kernels.energyKernel) != NULL) {
kernels.energyKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
kernels.energyKernel.setArg<cl_uint>(14, maxTiles);
kernels.energyKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer());
}
if (*reinterpret_cast<cl_kernel*>(&kernels.forceEnergyKernel) != NULL) {
kernels.forceEnergyKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
kernels.forceEnergyKernel.setArg<cl_uint>(14, maxTiles);
kernels.forceEnergyKernel.setArg<cl::Buffer>(17, interactingAtoms->getDeviceBuffer());
}
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(6, interactingTiles->getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(7, interactingAtoms->getDeviceBuffer());
kernels.findInteractingBlocksKernel.setArg<cl_uint>(9, maxTiles);
}
forceRebuildNeighborList = true;
return true;
}
void OpenCLNonbondedUtilities::setUsePadding(bool padding) {
......@@ -410,12 +446,23 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
numTiles = (int) (endFraction*totalTiles)-startTileIndex;
if (useCutoff) {
// We are using a cutoff, and the kernels have already been created.
for (map<int, KernelSet>::iterator iter = groupKernels.begin(); iter != groupKernels.end(); ++iter) {
iter->second.forceKernel.setArg<cl_uint>(5, startTileIndex);
iter->second.forceKernel.setArg<cl_uint>(6, numTiles);
iter->second.findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex);
iter->second.findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks);
KernelSet& kernels = iter->second;
if (*reinterpret_cast<cl_kernel*>(&kernels.forceKernel) != NULL) {
kernels.forceKernel.setArg<cl_uint>(5, startTileIndex);
kernels.forceKernel.setArg<cl_uint>(6, numTiles);
}
if (*reinterpret_cast<cl_kernel*>(&kernels.energyKernel) != NULL) {
kernels.energyKernel.setArg<cl_uint>(5, startTileIndex);
kernels.energyKernel.setArg<cl_uint>(6, numTiles);
}
if (*reinterpret_cast<cl_kernel*>(&kernels.forceEnergyKernel) != NULL) {
kernels.forceEnergyKernel.setArg<cl_uint>(5, startTileIndex);
kernels.forceEnergyKernel.setArg<cl_uint>(6, numTiles);
}
kernels.findInteractingBlocksKernel.setArg<cl_uint>(10, startBlockIndex);
kernels.findInteractingBlocksKernel.setArg<cl_uint>(11, numBlocks);
}
forceRebuildNeighborList = true;
}
......@@ -433,8 +480,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
}
kernels.hasForces = (source.size() > 0);
kernels.cutoffDistance = cutoff;
if (kernels.hasForces)
kernels.forceKernel = createInteractionKernel(source, parameters, arguments, true, true, groups);
kernels.source = source;
if (useCutoff) {
double padding = (usePadding ? 0.1*cutoff : 0.0);
double paddedCutoff = cutoff+padding;
......@@ -491,7 +537,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
kernels.findInteractingBlocksKernel.setArg<cl::Buffer>(18, rebuildNeighborList->getDeviceBuffer());
if (kernels.findInteractingBlocksKernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(context.getDevice()) < groupSize) {
// The device can't handle this block size, so reduce it.
groupSize -= 32;
if (groupSize < 32)
throw OpenMMException("Failed to create findInteractingBlocks kernel");
......@@ -504,7 +550,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
groupKernels[groups] = kernels;
}
cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& source, const vector<ParameterInfo>& params, const vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups) {
cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& source, const vector<ParameterInfo>& params, const vector<ParameterInfo>& arguments, bool useExclusions, bool isSymmetric, int groups, bool includeForces, bool includeEnergy) {
map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = source;
const string suffixes[] = {"x", "y", "z", "w"};
......@@ -603,6 +649,10 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines["USE_SYMMETRIC"] = "1";
if (useCutoff && context.getSIMDWidth() < 32)
defines["PRUNE_BY_CUTOFF"] = "1";
if (includeForces)
defines["INCLUDE_FORCES"] = "1";
if (includeEnergy)
defines["INCLUDE_ENERGY"] = "1";
defines["FORCE_WORK_GROUP_SIZE"] = context.intToString(forceThreadBlockSize);
double maxCutoff = 0.0;
for (int i = 0; i < 32; i++) {
......
......@@ -579,6 +579,10 @@ void OpenCLParallelCalcNonbondedForceKernel::copyParametersToContext(ContextImpl
getKernel(i).copyParametersToContext(context, force);
}
void OpenCLParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
dynamic_cast<const OpenCLCalcNonbondedForceKernel&>(kernels[0].getImpl()).getPMEParameters(alpha, nx, ny, nz);
}
class OpenCLParallelCalcCustomNonbondedForceKernel::Task : public OpenCLContext::WorkTask {
public:
Task(ContextImpl& context, OpenCLCalcCustomNonbondedForceKernel& kernel, bool includeForce,
......
......@@ -74,6 +74,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory(CalcCustomGBForceKernel::Name(), factory);
registerKernelFactory(CalcCustomExternalForceKernel::Name(), factory);
registerKernelFactory(CalcCustomHbondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
......
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
/**
* Compute the center of each group.
*/
__kernel void computeGroupCenters(__global const real4* restrict posq, __global const int* restrict groupParticles,
__global const real* restrict groupWeights, __global const int* restrict groupOffsets, __global real4* restrict centerPositions) {
__local volatile real3 temp[64];
for (int group = get_group_id(0); group < NUM_GROUPS; group += get_num_groups(0)) {
// The threads in this block work together to compute the center one group.
int firstIndex = groupOffsets[group];
int lastIndex = groupOffsets[group+1];
real3 center = (real3) 0;
for (int index = get_local_id(0); index < lastIndex-firstIndex; index += get_local_size(0)) {
int atom = groupParticles[firstIndex+index];
real weight = groupWeights[firstIndex+index];
real4 pos = posq[atom];
center.x += weight*pos.x;
center.y += weight*pos.y;
center.z += weight*pos.z;
}
// Sum the values.
int thread = get_local_id(0);
temp[thread].x = center.x;
temp[thread].y = center.y;
temp[thread].z = center.z;
barrier(CLK_LOCAL_MEM_FENCE);
if (thread < 32) {
temp[thread].x += temp[thread+32].x;
temp[thread].y += temp[thread+32].y;
temp[thread].z += temp[thread+32].z;
}
SYNC_WARPS;
if (thread < 16) {
temp[thread].x += temp[thread+16].x;
temp[thread].y += temp[thread+16].y;
temp[thread].z += temp[thread+16].z;
}
SYNC_WARPS;
if (thread < 8) {
temp[thread].x += temp[thread+8].x;
temp[thread].y += temp[thread+8].y;
temp[thread].z += temp[thread+8].z;
}
SYNC_WARPS;
if (thread < 4) {
temp[thread].x += temp[thread+4].x;
temp[thread].y += temp[thread+4].y;
temp[thread].z += temp[thread+4].z;
}
SYNC_WARPS;
if (thread < 2) {
temp[thread].x += temp[thread+2].x;
temp[thread].y += temp[thread+2].y;
temp[thread].z += temp[thread+2].z;
}
SYNC_WARPS;
if (thread == 0)
centerPositions[group] = (real4) (temp[0].x+temp[1].x, temp[0].y+temp[1].y, temp[0].z+temp[1].z, 0);
}
}
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
real4 delta(real4 vec1, real4 vec2) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
}
/**
* Compute the angle between two vectors. The w component of each vector should contain the squared magnitude.
*/
real computeAngle(real4 vec1, real4 vec2) {
real dotProduct = vec1.x*vec2.x + vec1.y*vec2.y + vec1.z*vec2.z;
real cosine = dotProduct*RSQRT(vec1.w*vec2.w);
real angle;
if (cosine > 0.99f || cosine < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real4 crossProduct = cross(vec1, vec2);
real scale = vec1.w*vec2.w;
angle = asin(SQRT(dot(crossProduct, crossProduct)/scale));
if (cosine < 0)
angle = M_PI-angle;
}
else
angle = acos(cosine);
return angle;
}
/**
* Compute the cross product of two vectors, setting the fourth component to the squared magnitude.
*/
real4 computeCross(real4 vec1, real4 vec2) {
real4 result = cross(vec1, vec2);
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
}
/**
* Compute the forces on groups based on the bonds.
*/
__kernel void computeGroupForces(__global long* restrict groupForce, __global mixed* restrict energyBuffer, __global const real4* restrict centerPositions,
__global const int* restrict bondGroups
EXTRA_ARGS) {
mixed energy = 0;
for (int index = get_global_id(0); index < NUM_BONDS; index += get_global_size(0)) {
COMPUTE_FORCE
}
energyBuffer[get_global_id(0)] += energy;
}
/**
* Apply the forces from the group centers to the individual atoms.
*/
__kernel void applyForcesToAtoms(__global const int* restrict groupParticles, __global const real* restrict groupWeights, __global const int* restrict groupOffsets,
__global const long* restrict groupForce, __global long* restrict atomForce) {
for (int group = get_group_id(0); group < NUM_GROUPS; group += get_num_groups(0)) {
long fx = groupForce[group];
long fy = groupForce[group+NUM_GROUPS];
long fz = groupForce[group+NUM_GROUPS*2];
int firstIndex = groupOffsets[group];
int lastIndex = groupOffsets[group+1];
for (int index = get_local_id(0); index < lastIndex-firstIndex; index += get_local_size(0)) {
int atom = groupParticles[firstIndex+index];
real weight = groupWeights[firstIndex+index];
atom_add(&atomForce[atom], (long) (fx*weight));
atom_add(&atomForce[atom+PADDED_NUM_ATOMS], (long) (fy*weight));
atom_add(&atomForce[atom+2*PADDED_NUM_ATOMS], (long) (fz*weight));
}
}
}
......@@ -16,7 +16,7 @@ __kernel void computeN2Energy(
#else
__global real4* restrict forceBuffers,
#endif
__global real* restrict energyBuffer, __local real4* restrict local_force,
__global mixed* restrict energyBuffer, __local real4* restrict local_force,
__global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF
......@@ -31,7 +31,7 @@ __kernel void computeN2Energy(
const unsigned int warp = get_global_id(0)/TILE_SIZE;
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
const unsigned int tbx = get_local_id(0) - tgx;
real energy = 0;
mixed energy = 0;
// First loop: process tiles that contain exclusions.
......
......@@ -16,7 +16,7 @@ __kernel void computeN2Energy(
#else
__global real4* restrict forceBuffers,
#endif
__global real* restrict energyBuffer, __local real4* restrict local_force,
__global mixed* restrict energyBuffer, __local real4* restrict local_force,
__global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF
......@@ -27,7 +27,7 @@ __kernel void computeN2Energy(
unsigned int numTiles
#endif
PARAMETER_ARGUMENTS) {
real energy = 0;
mixed energy = 0;
// First loop: process tiles that contain exclusions.
......
......@@ -9,9 +9,9 @@
* Reduce the derivatives computed in the N^2 energy kernel, and compute all per-particle energy terms.
*/
__kernel void computePerParticleEnergy(int bufferSize, int numBuffers, __global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq
__kernel void computePerParticleEnergy(int bufferSize, int numBuffers, __global real4* restrict forceBuffers, __global mixed* restrict energyBuffer, __global const real4* restrict posq
PARAMETER_ARGUMENTS) {
real energy = 0;
mixed energy = 0;
unsigned int index = get_global_id(0);
while (index < NUM_ATOMS) {
// Reduce the derivatives
......
......@@ -53,11 +53,11 @@ real4 computeCross(real4 vec1, real4 vec2) {
/**
* Compute forces on donors.
*/
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
real energy = 0;
mixed energy = 0;
real4 f1 = (real4) 0;
real4 f2 = (real4) 0;
real4 f3 = (real4) 0;
......@@ -142,7 +142,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
/**
* Compute forces on acceptors.
*/
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
......
......@@ -72,7 +72,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
* Compute the interaction.
*/
__kernel void computeInteraction(
__global long* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq,
__global long* restrict forceBuffers, __global mixed* restrict energyBuffer, __global const real4* restrict posq,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
#ifdef USE_CUTOFF
, __global const int* restrict neighbors, __global const int* restrict neighborStartIndex
......@@ -84,7 +84,7 @@ __kernel void computeInteraction(
, __global int* restrict exclusions, __global int* restrict exclusionStartIndex
#endif
PARAMETER_ARGUMENTS) {
real energy = 0.0f;
mixed energy = 0;
// Loop over particles to be the first one in the set.
......
......@@ -42,14 +42,14 @@ __kernel void computeInteractionGroups(
#else
__global real4* restrict forceBuffers,
#endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict groupData,
__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict groupData,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
const unsigned int warp = get_global_id(0)/TILE_SIZE; // global warpIndex
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = get_local_id(0) - tgx; // block warpIndex
real energy = 0.0f;
mixed energy = 0;
__local AtomData localData[LOCAL_MEMORY_SIZE];
const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps;
......
......@@ -6,13 +6,13 @@ real2 multofReal2(real2 a, real2 b) {
* Precompute the cosine and sine sums which appear in each force term.
*/
__kernel void calculateEwaldCosSinSums(__global real* restrict energyBuffer, __global const real4* restrict posq, __global real2* restrict cosSinSum, real4 reciprocalPeriodicBoxSize, real reciprocalCoefficient) {
__kernel void calculateEwaldCosSinSums(__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global real2* restrict cosSinSum, real4 reciprocalPeriodicBoxSize, real reciprocalCoefficient) {
const unsigned int ksizex = 2*KMAX_X-1;
const unsigned int ksizey = 2*KMAX_Y-1;
const unsigned int ksizez = 2*KMAX_Z-1;
const unsigned int totalK = ksizex*ksizey*ksizez;
unsigned int index = get_global_id(0);
real energy = 0.0f;
mixed energy = 0;
while (index < (KMAX_Y-1)*ksizez+KMAX_Z)
index += get_global_size(0);
while (index < totalK) {
......
......@@ -387,7 +387,7 @@ __kernel void computeGBSAForce1(
#else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
......@@ -400,7 +400,7 @@ __kernel void computeGBSAForce1(
const unsigned int warp = get_global_id(0)/TILE_SIZE;
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
const unsigned int tbx = get_local_id(0) - tgx;
real energy = 0.0f;
mixed energy = 0;
__local AtomData2 localData[FORCE_WORK_GROUP_SIZE];
// First loop: process tiles that contain exclusions.
......
......@@ -50,8 +50,8 @@ __kernel void reduceBornForce(int bufferSize, int numBuffers, __global real* bor
#ifdef SUPPORTS_64_BIT_ATOMICS
__global const long* restrict bornForceIn,
#endif
__global real* restrict energyBuffer, __global const float2* restrict params, __global const real* restrict bornRadii, __global const real* restrict obcChain) {
real energy = 0.0f;
__global mixed* restrict energyBuffer, __global const float2* restrict params, __global const real* restrict bornRadii, __global const real* restrict obcChain) {
mixed energy = 0;
unsigned int index = get_global_id(0);
while (index < NUM_ATOMS) {
// Sum the Born force
......
......@@ -409,7 +409,7 @@ __kernel void computeGBSAForce1(
#else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
......@@ -418,7 +418,7 @@ __kernel void computeGBSAForce1(
unsigned int numTiles,
#endif
__global const ushort2* exclusionTiles) {
real energy = 0.0f;
mixed energy = 0;
__local AtomData2 localData[TILE_SIZE];
// First loop: process tiles that contain exclusions.
......
......@@ -22,10 +22,10 @@ __kernel void computeNonbonded(
#else
__global real4* restrict forceBuffers,
#endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif
......@@ -34,11 +34,11 @@ __kernel void computeNonbonded(
const unsigned int warp = get_global_id(0)/TILE_SIZE;
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
const unsigned int tbx = get_local_id(0) - tgx;
real energy = 0;
mixed energy = 0;
__local AtomData localData[FORCE_WORK_GROUP_SIZE];
// First loop: process tiles that contain exclusions.
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+warp*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(warp+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/totalWarps;
for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
......@@ -87,11 +87,13 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += 0.5f*tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
force.xyz -= delta.xyz*dEdR;
#else
force.xyz -= dEdR1.xyz;
#endif
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
......@@ -100,7 +102,7 @@ __kernel void computeNonbonded(
}
else {
// This is an off-diagonal tile.
const unsigned int localAtomIndex = get_local_id(0);
unsigned int j = y*TILE_SIZE + tgx;
real4 tempPosq = posq[j];
......@@ -126,7 +128,7 @@ __kernel void computeNonbonded(
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
......@@ -144,6 +146,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
......@@ -156,6 +159,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
#endif
......@@ -169,6 +173,7 @@ __kernel void computeNonbonded(
// Write results.
#ifdef INCLUDE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (force.x*0x100000000));
......@@ -186,6 +191,7 @@ __kernel void computeNonbonded(
forceBuffers[offset1].xyz += force.xyz;
if (x != y)
forceBuffers[offset2] += (real4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0.0f);
#endif
#endif
}
......@@ -213,7 +219,7 @@ __kernel void computeNonbonded(
bool includeTile = true;
// Extract the coordinates of this tile.
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
......@@ -245,7 +251,7 @@ __kernel void computeNonbonded(
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
}
......@@ -300,7 +306,7 @@ __kernel void computeNonbonded(
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
......@@ -318,6 +324,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
......@@ -330,6 +337,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
#endif
......@@ -352,7 +360,7 @@ __kernel void computeNonbonded(
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
......@@ -370,6 +378,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
......@@ -382,6 +391,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
#endif
......@@ -392,6 +402,7 @@ __kernel void computeNonbonded(
// Write results.
#ifdef INCLUDE_FORCES
#ifdef USE_CUTOFF
unsigned int atom2 = atomIndices[get_local_id(0)];
#else
......@@ -412,9 +423,12 @@ __kernel void computeNonbonded(
forceBuffers[offset1].xyz += force.xyz;
if (atom2 < PADDED_NUM_ATOMS)
forceBuffers[offset2] += (real4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0.0f);
#endif
#endif
}
pos++;
}
#ifdef INCLUDE_ENERGY
energyBuffer[get_global_id(0)] += energy;
#endif
}
......@@ -19,19 +19,19 @@ __kernel void computeNonbonded(
#else
__global real4* restrict forceBuffers,
#endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif
PARAMETER_ARGUMENTS) {
real energy = 0;
mixed energy = 0;
__local AtomData localData[TILE_SIZE];
// First loop: process tiles that contain exclusions.
const unsigned int firstExclusionTile = FIRST_EXCLUSION_TILE+get_group_id(0)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/get_num_groups(0);
const unsigned int lastExclusionTile = FIRST_EXCLUSION_TILE+(get_group_id(0)+1)*(LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE)/get_num_groups(0);
for (int pos = firstExclusionTile; pos < lastExclusionTile; pos++) {
......@@ -70,7 +70,7 @@ __kernel void computeNonbonded(
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
......@@ -138,7 +138,7 @@ __kernel void computeNonbonded(
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
......@@ -228,18 +228,18 @@ __kernel void computeNonbonded(
while (pos < end) {
const bool hasExclusions = false;
bool includeTile = true;
// Extract the coordinates of this tile.
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
}
else
#endif
......@@ -304,7 +304,7 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = dot(delta.xyz, delta.xyz);
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
real invR = RSQRT(r2);
real r = r2*invR;
unsigned int atom2 = j;
......@@ -367,7 +367,7 @@ __kernel void computeNonbonded(
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < MAX_CUTOFF*MAX_CUTOFF) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
......
......@@ -325,14 +325,14 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
}
}
__kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global real* restrict energyBuffer,
__kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixed* restrict energyBuffer,
__global const real* restrict pmeBsplineModuliX, __global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ,
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real energy = 0;
mixed energy = 0;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
// real indices
int kx = index/(GRID_SIZE_Y*(GRID_SIZE_Z));
......
......@@ -15,6 +15,6 @@ ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${AP
TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${OPENMM_LIBRARY_NAME} ${OPENCL_LIBRARIES} ${PTHREADS_LIB_STATIC})
#-DPTW32_STATIC_LIB only works for the windows pthreads.
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_OPENCL_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB")
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_OPENCL_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${STATIC_TARGET})
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment