Unverified Commit 98d81730 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Converted more code to common platform (#3073)

* Converted more code to common platform

* Converted more code to common platform
parent 72c70cfe
......@@ -413,6 +413,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines["ACOS"] = "acos";
compilationDefines["ASIN"] = "asin";
compilationDefines["ATAN"] = "atan";
compilationDefines["ERF"] = "erf";
compilationDefines["ERFC"] = "erfc";
// Set defines for applying periodic boundary conditions.
......
......@@ -72,9 +72,9 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
if (name == UpdateStateDataKernel::Name())
return new OpenCLUpdateStateDataKernel(name, platform, cl);
if (name == ApplyConstraintsKernel::Name())
return new OpenCLApplyConstraintsKernel(name, platform, cl);
return new CommonApplyConstraintsKernel(name, platform, cl);
if (name == VirtualSitesKernel::Name())
return new OpenCLVirtualSitesKernel(name, platform, cl);
return new CommonVirtualSitesKernel(name, platform, cl);
if (name == CalcHarmonicBondForceKernel::Name())
return new CommonCalcHarmonicBondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomBondForceKernel::Name())
......@@ -134,7 +134,7 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
if (name == IntegrateNoseHooverStepKernel::Name())
return new CommonIntegrateNoseHooverStepKernel(name, platform, cl);
if (name == ApplyMonteCarloBarostatKernel::Name())
return new OpenCLApplyMonteCarloBarostatKernel(name, platform, cl);
return new CommonApplyMonteCarloBarostatKernel(name, platform, cl);
if (name == RemoveCMMotionKernel::Name())
return new CommonRemoveCMMotionKernel(name, platform, cl);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
......
......@@ -27,23 +27,14 @@
#include "OpenCLKernels.h"
#include "OpenCLForceInfo.h"
#include "openmm/Context.h"
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomCompoundBondForceImpl.h"
#include "openmm/internal/CustomHbondForceImpl.h"
#include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/internal/OSRngSeed.h"
#include "CommonKernelSources.h"
#include "OpenCLBondedUtilities.h"
#include "OpenCLExpressionUtilities.h"
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLNonbondedUtilities.h"
#include "OpenCLKernelSources.h"
#include "lepton/CustomFunction.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include "lepton/ParsedExpression.h"
#include "ReferenceTabulatedFunction.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h"
#include <algorithm>
......@@ -54,14 +45,6 @@
using namespace OpenMM;
using namespace std;
using namespace Lepton;
static void setPosqCorrectionArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseMixedPrecision())
kernel.setArg<cl::Buffer>(index, cl.getPosqCorrection().getDeviceBuffer());
else
kernel.setArg(index, sizeof(void*), NULL);
}
static void setPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision())
......@@ -87,40 +70,6 @@ static void setPeriodicBoxArgs(OpenCLContext& cl, cl::Kernel& kernel, int index)
}
}
static bool isZeroExpression(const Lepton::ParsedExpression& expression) {
const Lepton::Operation& op = expression.getRootNode().getOperation();
if (op.getId() != Lepton::Operation::CONSTANT)
return false;
return (dynamic_cast<const Lepton::Operation::Constant&>(op).getValue() == 0.0);
}
static bool usesVariable(const Lepton::ExpressionTreeNode& node, const string& variable) {
const Lepton::Operation& op = node.getOperation();
if (op.getId() == Lepton::Operation::VARIABLE && op.getName() == variable)
return true;
for (auto& child : node.getChildren())
if (usesVariable(child, variable))
return true;
return false;
}
static bool usesVariable(const Lepton::ParsedExpression& expression, const string& variable) {
return usesVariable(expression.getRootNode(), variable);
}
static pair<ExpressionTreeNode, string> makeVariable(const string& name, const string& value) {
return make_pair(ExpressionTreeNode(new Operation::Variable(name)), value);
}
static void replaceFunctionsInExpression(map<string, CustomFunction*>& functions, ExpressionProgram& expression) {
for (int i = 0; i < expression.getNumOperations(); i++) {
if (expression.getOperation(i).getId() == Operation::CUSTOM) {
const Operation::Custom& op = dynamic_cast<const Operation::Custom&>(expression.getOperation(i));
expression.setOperation(i, new Operation::Custom(op.getName(), functions[op.getName()]->clone(), op.getDerivOrder()));
}
}
}
void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
}
......@@ -479,38 +428,6 @@ void OpenCLUpdateStateDataKernel::loadCheckpoint(ContextImpl& context, istream&
listener->execute();
}
void OpenCLApplyConstraintsKernel::initialize(const System& system) {
}
void OpenCLApplyConstraintsKernel::apply(ContextImpl& context, double tol) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
map<string, string> defines;
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
cl::Program program = cl.createProgram(OpenCLKernelSources::constraints, defines);
applyDeltasKernel = cl::Kernel(program, "applyPositionDeltas");
applyDeltasKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
setPosqCorrectionArg(cl, applyDeltasKernel, 1);
applyDeltasKernel.setArg<cl::Buffer>(2, cl.getIntegrationUtilities().getPosDelta().getDeviceBuffer());
}
OpenCLIntegrationUtilities& integration = cl.getIntegrationUtilities();
cl.clearBuffer(integration.getPosDelta());
integration.applyConstraints(tol);
cl.executeKernel(applyDeltasKernel, cl.getNumAtoms());
integration.computeVirtualSites();
}
void OpenCLApplyConstraintsKernel::applyToVelocities(ContextImpl& context, double tol) {
cl.getIntegrationUtilities().applyVelocityConstraints(tol);
}
void OpenCLVirtualSitesKernel::initialize(const System& system) {
}
void OpenCLVirtualSitesKernel::computePositions(ContextImpl& context) {
cl.getIntegrationUtilities().computeVirtualSites();
}
class OpenCLCalcNonbondedForceKernel::ForceInfo : public OpenCLForceInfo {
public:
ForceInfo(int requiredBuffers, const NonbondedForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
......@@ -767,11 +684,14 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
map<string, string> replacements;
replacements["NUM_ATOMS"] = cl.intToString(numParticles);
replacements["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
replacements["KMAX_X"] = cl.intToString(kmaxx);
replacements["KMAX_Y"] = cl.intToString(kmaxy);
replacements["KMAX_Z"] = cl.intToString(kmaxz);
replacements["EXP_COEFFICIENT"] = cl.doubleToString(-1.0/(4.0*alpha*alpha));
cl::Program program = cl.createProgram(OpenCLKernelSources::ewald, replacements);
replacements["ONE_4PI_EPS0"] = cl.doubleToString(ONE_4PI_EPS0);
replacements["M_PI"] = cl.doubleToString(M_PI);
cl::Program program = cl.createProgram(CommonKernelSources::ewald, replacements);
ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums");
ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2));
......@@ -1005,13 +925,13 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
replacements["USE_PERIODIC"] = force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0";
if (doLJPME)
replacements["EWALD_DISPERSION_ALPHA"] = cl.doubleToString(dispersionAlpha);
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::pmeExclusions, replacements), force.getForceGroup());
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(CommonKernelSources::pmeExclusions, replacements), force.getForceGroup());
}
}
// Add the interaction to the default nonbonded kernel.
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
string source = cl.replaceStrings(CommonKernelSources::coulombLennardJones, defines);
charges.initialize(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "charges");
baseParticleParams.initialize<mm_float4>(cl, cl.getPaddedNumAtoms(), "baseParticleParams");
baseParticleParams.upload(baseParticleParamVec);
......@@ -1059,7 +979,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
map<string, string> replacements;
replacements["APPLY_PERIODIC"] = (usePeriodic && force.getExceptionsUsePeriodicBoundaryConditions() ? "1" : "0");
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exceptionParams.getDeviceBuffer(), "float4");
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(CommonKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
// Initialize parameter offsets.
......@@ -1131,7 +1051,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// Initialize the kernel for updating parameters.
cl::Program program = cl.createProgram(OpenCLKernelSources::nonbondedParameters, paramsDefines);
cl::Program program = cl.createProgram(CommonKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = cl::Kernel(program, "computeParameters");
computeExclusionParamsKernel = cl::Kernel(program, "computeExclusionParameters");
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
......@@ -1172,7 +1092,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
ewaldSumsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
ewaldSumsKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
ewaldSumsKernel.setArg<cl::Buffer>(2, cosSinSums.getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(0, cl.getForceBuffers().getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(0, cl.getLongForceBuffer().getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
ewaldForcesKernel.setArg<cl::Buffer>(2, cosSinSums.getDeviceBuffer());
}
......@@ -1332,19 +1252,13 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (cosSinSums.isInitialized() && includeReciprocal) {
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
mm_double4 recipBoxSize = mm_double4(2*M_PI/boxSize.x, 2*M_PI/boxSize.y, 2*M_PI/boxSize.z, 0.0);
double recipCoefficient = ONE_4PI_EPS0*4*M_PI/(boxSize.x*boxSize.y*boxSize.z);
if (cl.getUseDoublePrecision()) {
ewaldSumsKernel.setArg<mm_double4>(3, recipBoxSize);
ewaldSumsKernel.setArg<cl_double>(4, recipCoefficient);
ewaldForcesKernel.setArg<mm_double4>(3, recipBoxSize);
ewaldForcesKernel.setArg<cl_double>(4, recipCoefficient);
ewaldSumsKernel.setArg<mm_double4>(3, boxSize);
ewaldForcesKernel.setArg<mm_double4>(3, boxSize);
}
else {
ewaldSumsKernel.setArg<mm_float4>(3, mm_float4((float) recipBoxSize.x, (float) recipBoxSize.y, (float) recipBoxSize.z, 0));
ewaldSumsKernel.setArg<cl_float>(4, (cl_float) recipCoefficient);
ewaldForcesKernel.setArg<mm_float4>(3, mm_float4((float) recipBoxSize.x, (float) recipBoxSize.y, (float) recipBoxSize.z, 0));
ewaldForcesKernel.setArg<cl_float>(4, (cl_float) recipCoefficient);
ewaldSumsKernel.setArg<mm_float4>(3, mm_float4((float) boxSize.x, (float) boxSize.y, (float) boxSize.z, 0));
ewaldForcesKernel.setArg<mm_float4>(3, mm_float4((float) boxSize.x, (float) boxSize.y, (float) boxSize.z, 0));
}
cl.executeKernel(ewaldSumsKernel, cosSinSums.getSize());
cl.executeKernel(ewaldForcesKernel, cl.getNumAtoms());
......@@ -1671,283 +1585,3 @@ void OpenCLCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx,
nz = dispersionGridSizeZ;
}
}
class OpenCLCalcCustomCVForceKernel::ForceInfo : public OpenCLForceInfo {
public:
ForceInfo(ComputeForceInfo& force) : OpenCLForceInfo(0), force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
return force.areParticlesIdentical(particle1, particle2);
}
int getNumParticleGroups() {
return force.getNumParticleGroups();
}
void getParticlesInGroup(int index, std::vector<int>& particles) {
force.getParticlesInGroup(index, particles);
}
bool areGroupsIdentical(int group1, int group2) {
return force.areGroupsIdentical(group1, group2);
}
private:
ComputeForceInfo& force;
};
class OpenCLCalcCustomCVForceKernel::ReorderListener : public OpenCLContext::ReorderListener {
public:
ReorderListener(OpenCLContext& cl, OpenCLArray& invAtomOrder) : cl(cl), invAtomOrder(invAtomOrder) {
}
void execute() {
vector<cl_int> invOrder(cl.getPaddedNumAtoms());
const vector<int>& order = cl.getAtomIndex();
for (int i = 0; i < order.size(); i++)
invOrder[order[i]] = i;
invAtomOrder.upload(invOrder);
}
private:
OpenCLContext& cl;
OpenCLArray& invAtomOrder;
};
void OpenCLCalcCustomCVForceKernel::initialize(const System& system, const CustomCVForce& force, ContextImpl& innerContext) {
int numCVs = force.getNumCollectiveVariables();
cl.addForce(new OpenCLForceInfo(1));
for (int i = 0; i < force.getNumGlobalParameters(); i++)
globalParameterNames.push_back(force.getGlobalParameterName(i));
for (int i = 0; i < numCVs; i++)
variableNames.push_back(force.getCollectiveVariableName(i));
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string name = force.getEnergyParameterDerivativeName(i);
paramDerivNames.push_back(name);
cl.addEnergyParameterDerivative(name);
}
// Create custom functions for the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
for (int i = 0; i < (int) force.getNumTabulatedFunctions(); i++)
functions[force.getTabulatedFunctionName(i)] = createReferenceTabulatedFunction(force.getTabulatedFunction(i));
// Create the expressions.
Lepton::ParsedExpression energyExpr = Lepton::Parser::parse(force.getEnergyFunction(), functions);
energyExpression = energyExpr.createProgram();
variableDerivExpressions.clear();
for (auto& name : variableNames)
variableDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
paramDerivExpressions.clear();
for (auto& name : paramDerivNames)
paramDerivExpressions.push_back(energyExpr.differentiate(name).optimize().createProgram());
// Delete the custom functions.
for (auto& function : functions)
delete function.second;
// Copy parameter derivatives from the inner context.
OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
for (auto& param : cl2.getEnergyParamDerivNames())
cl.addEnergyParameterDerivative(param);
// Create arrays for storing information.
int elementSize = (cl.getUseDoublePrecision() || cl.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
cvForces.resize(numCVs);
for (int i = 0; i < numCVs; i++)
cvForces[i].initialize(cl, cl.getNumAtoms(), 4*elementSize, "cvForce");
invAtomOrder.initialize<cl_int>(cl, cl.getPaddedNumAtoms(), "invAtomOrder");
innerInvAtomOrder.initialize<cl_int>(cl, cl.getPaddedNumAtoms(), "innerInvAtomOrder");
// Create the kernels.
stringstream args, add;
for (int i = 0; i < numCVs; i++) {
args << ", __global real4* restrict force" << i << ", real dEdV" << i;
add << "f += force" << i << "[i]*dEdV" << i << ";\n";
}
map<string, string> replacements;
replacements["PARAMETER_ARGUMENTS"] = args.str();
replacements["ADD_FORCES"] = add.str();
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customCVForce, replacements));
copyStateKernel = cl::Kernel(program, "copyState");
copyForcesKernel = cl::Kernel(program, "copyForces");
addForcesKernel = cl::Kernel(program, "addForces");
// This context needs to respect all forces in the inner context when reordering atoms.
for (auto* info : cl2.getForceInfos())
cl.addForce(new ForceInfo(*info));
}
double OpenCLCalcCustomCVForceKernel::execute(ContextImpl& context, ContextImpl& innerContext, bool includeForces, bool includeEnergy) {
copyState(context, innerContext);
int numCVs = variableNames.size();
int numAtoms = cl.getNumAtoms();
OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
vector<double> cvValues;
vector<map<string, double> > cvDerivs(numCVs);
for (int i = 0; i < numCVs; i++) {
cvValues.push_back(innerContext.calcForcesAndEnergy(true, true, 1<<i));
copyForcesKernel.setArg<cl::Buffer>(0, cvForces[i].getDeviceBuffer());
cl.executeKernel(copyForcesKernel, numAtoms);
innerContext.getEnergyParameterDerivatives(cvDerivs[i]);
}
// Compute the energy and forces.
map<string, double> variables;
for (auto& name : globalParameterNames)
variables[name] = context.getParameter(name);
for (int i = 0; i < numCVs; i++)
variables[variableNames[i]] = cvValues[i];
double energy = energyExpression.evaluate(variables);
for (int i = 0; i < numCVs; i++) {
double dEdV = variableDerivExpressions[i].evaluate(variables);
if (cl.getUseDoublePrecision())
addForcesKernel.setArg<cl_double>(2*i+3, dEdV);
else
addForcesKernel.setArg<cl_float>(2*i+3, dEdV);
}
cl.executeKernel(addForcesKernel, numAtoms);
// Compute the energy parameter derivatives.
map<string, double>& energyParamDerivs = cl.getEnergyParamDerivWorkspace();
for (int i = 0; i < paramDerivExpressions.size(); i++)
energyParamDerivs[paramDerivNames[i]] += paramDerivExpressions[i].evaluate(variables);
for (int i = 0; i < numCVs; i++) {
double dEdV = variableDerivExpressions[i].evaluate(variables);
for (auto& deriv : cvDerivs[i])
energyParamDerivs[deriv.first] += dEdV*deriv.second;
}
return energy;
}
void OpenCLCalcCustomCVForceKernel::copyState(ContextImpl& context, ContextImpl& innerContext) {
int numAtoms = cl.getNumAtoms();
OpenCLContext& cl2 = *reinterpret_cast<OpenCLPlatform::PlatformData*>(innerContext.getPlatformData())->contexts[0];
if (!hasInitializedKernels) {
hasInitializedKernels = true;
// Initialize the listeners.
ReorderListener* listener1 = new ReorderListener(cl, invAtomOrder);
ReorderListener* listener2 = new ReorderListener(cl2, innerInvAtomOrder);
cl.addReorderListener(listener1);
cl2.addReorderListener(listener2);
listener1->execute();
listener2->execute();
// Initialize the kernels.
copyStateKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(2, cl.getVelm().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(3, cl.getAtomIndexArray().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(4, cl2.getPosq().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(6, cl2.getVelm().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(7, innerInvAtomOrder.getDeviceBuffer());
copyStateKernel.setArg<cl_int>(8, numAtoms);
if (cl.getUseMixedPrecision()) {
copyStateKernel.setArg<cl::Buffer>(1, cl.getPosqCorrection().getDeviceBuffer());
copyStateKernel.setArg<cl::Buffer>(5, cl2.getPosqCorrection().getDeviceBuffer());
}
else {
copyStateKernel.setArg(1, sizeof(void*), NULL);
copyStateKernel.setArg(5, sizeof(void*), NULL);
}
copyForcesKernel.setArg<cl::Buffer>(1, invAtomOrder.getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(2, cl2.getForce().getDeviceBuffer());
copyForcesKernel.setArg<cl::Buffer>(3, cl2.getAtomIndexArray().getDeviceBuffer());
copyForcesKernel.setArg<cl_int>(4, numAtoms);
addForcesKernel.setArg<cl::Buffer>(0, cl.getForce().getDeviceBuffer());
addForcesKernel.setArg<cl_int>(1, numAtoms);
for (int i = 0; i < cvForces.size(); i++)
addForcesKernel.setArg<cl::Buffer>(2*i+2, cvForces[i].getDeviceBuffer());
}
cl.executeKernel(copyStateKernel, numAtoms);
Vec3 a, b, c;
context.getPeriodicBoxVectors(a, b, c);
innerContext.setPeriodicBoxVectors(a, b, c);
innerContext.setTime(context.getTime());
map<string, double> innerParameters = innerContext.getParameters();
for (auto& param : innerParameters)
innerContext.setParameter(param.first, context.getParameter(param.first));
}
void OpenCLCalcCustomCVForceKernel::copyParametersToContext(ContextImpl& context, const CustomCVForce& force) {
// Create custom functions for the tabulated functions.
map<string, CustomFunction*> functions;
for (int i = 0; i < (int) force.getNumTabulatedFunctions(); i++)
functions[force.getTabulatedFunctionName(i)] = createReferenceTabulatedFunction(force.getTabulatedFunction(i));
// Replace tabulated functions in the expressions.
replaceFunctionsInExpression(functions, energyExpression);
for (auto& expression : variableDerivExpressions)
replaceFunctionsInExpression(functions, expression);
for (auto& expression : paramDerivExpressions)
replaceFunctionsInExpression(functions, expression);
// Delete the custom functions.
for (auto& function : functions)
delete function.second;
}
void OpenCLApplyMonteCarloBarostatKernel::initialize(const System& system, const Force& thermostat) {
savedPositions.initialize(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedPositions");
savedForces.initialize(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4), "savedForces");
cl::Program program = cl.createProgram(OpenCLKernelSources::monteCarloBarostat);
kernel = cl::Kernel(program, "scalePositions");
}
void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, double scaleX, double scaleY, double scaleZ) {
if (!hasInitializedKernels) {
hasInitializedKernels = true;
// Create the arrays with the molecule definitions.
vector<vector<int> > molecules = context.getMolecules();
numMolecules = molecules.size();
moleculeAtoms.initialize<int>(cl, cl.getNumAtoms(), "moleculeAtoms");
moleculeStartIndex.initialize<int>(cl, numMolecules+1, "moleculeStartIndex");
vector<int> atoms(moleculeAtoms.getSize());
vector<int> startIndex(moleculeStartIndex.getSize());
int index = 0;
for (int i = 0; i < numMolecules; i++) {
startIndex[i] = index;
for (int molecule : molecules[i])
atoms[index++] = molecule;
}
startIndex[numMolecules] = index;
moleculeAtoms.upload(atoms);
moleculeStartIndex.upload(startIndex);
// Initialize the kernel arguments.
kernel.setArg<cl_int>(3, numMolecules);
kernel.setArg<cl::Buffer>(9, cl.getPosq().getDeviceBuffer());
kernel.setArg<cl::Buffer>(10, moleculeAtoms.getDeviceBuffer());
kernel.setArg<cl::Buffer>(11, moleculeStartIndex.getDeviceBuffer());
}
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(cl.getPosq().getDeviceBuffer(), savedPositions.getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(cl.getForce().getDeviceBuffer(), savedForces.getDeviceBuffer(), 0, 0, bytesToCopy);
kernel.setArg<cl_float>(0, (cl_float) scaleX);
kernel.setArg<cl_float>(1, (cl_float) scaleY);
kernel.setArg<cl_float>(2, (cl_float) scaleZ);
setPeriodicBoxArgs(cl, kernel, 4);
cl.executeKernel(kernel, cl.getNumAtoms());
for (auto& offset : cl.getPosCellOffsets())
offset = mm_int4(0, 0, 0, 0);
lastAtomOrder = cl.getAtomIndex();
}
void OpenCLApplyMonteCarloBarostatKernel::restoreCoordinates(ContextImpl& context) {
int bytesToCopy = cl.getPosq().getSize()*(cl.getUseDoublePrecision() ? sizeof(mm_double4) : sizeof(mm_float4));
cl.getQueue().enqueueCopyBuffer(savedPositions.getDeviceBuffer(), cl.getPosq().getDeviceBuffer(), 0, 0, bytesToCopy);
cl.getQueue().enqueueCopyBuffer(savedForces.getDeviceBuffer(), cl.getForce().getDeviceBuffer(), 0, 0, bytesToCopy);
}
real4 v0 = pos2-pos1;
real4 v1 = pos2-pos3;
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0)
APPLY_PERIODIC_TO_DELTA(v1)
#endif
real4 cp = cross(v0, v1);
real rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z;
rp = max(SQRT(rp), (real) 1.0e-06f);
real r21 = v0.x*v0.x + v0.y*v0.y + v0.z*v0.z;
real r23 = v1.x*v1.x + v1.y*v1.y + v1.z*v1.z;
real dot = v0.x*v1.x + v0.y*v1.y + v0.z*v1.z;
real cosine = clamp(dot*RSQRT(r21*r23), (real) -1, (real) 1);
real theta = acos(cosine);
COMPUTE_FORCE
real4 force1 = cross(v0, cp)*(dEdAngle/(r21*rp));
real4 force3 = cross(cp, v1)*(dEdAngle/(r23*rp));
real4 force2 = -force1-force3;
real4 delta = pos2-pos1;
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
COMPUTE_FORCE
dEdR = (r > 0.0f) ? (dEdR / r) : 0.0f;
delta.xyz *= dEdR;
real4 force1 = delta;
real4 force2 = -delta;
\ No newline at end of file
__kernel void applyPositionDeltas(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta) {
for (unsigned int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
mixed4 pos = posq[index];
#endif
pos.xyz += posDelta[index].xyz;
#ifdef USE_MIXED_PRECISION
posq[index] = (real4) ((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
}
}
{
#ifdef USE_DOUBLE_PRECISION
unsigned long includeInteraction;
#else
unsigned int includeInteraction;
#endif
#if USE_EWALD
includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
#if HAS_COULOMB
const real prefactor = ONE_4PI_EPS0*CHARGE1*CHARGE2*invR;
#else
const real prefactor = 0.0f;
#endif
#ifdef USE_DOUBLE_PRECISION
const real erfcAlphaR = erfc(alphaR);
#else
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 1.5e-7.
const real t = RECIP(1.0f+0.3275911f*alphaR);
const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr;
#endif
real tempForce = 0;
#if HAS_LENNARD_JONES
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH
if (r > LJ_SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
ljEnergy *= switchValue;
}
#endif
#if DO_LJPME
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space.
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
#endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += select((real) 0, ljEnergy + prefactor*erfcAlphaR, includeInteraction);
#else
tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += select((real) 0, prefactor*erfcAlphaR, includeInteraction);
#endif
dEdR += select((real) 0, tempForce*invR*invR, includeInteraction);
#else
#ifdef USE_CUTOFF
includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
#else
includeInteraction = (!isExcluded);
#endif
real tempForce = 0;
#if HAS_LENNARD_JONES
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(SIGMA_EPSILON1.y*SIGMA_EPSILON2.y);
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6-1);
#if USE_LJ_SWITCH
if (r > LJ_SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
ljEnergy *= switchValue;
}
#endif
ljEnergy = select((real) 0, ljEnergy, includeInteraction);
tempEnergy += ljEnergy;
#endif
#if HAS_COULOMB
#ifdef USE_CUTOFF
const real prefactor = ONE_4PI_EPS0*CHARGE1*CHARGE2;
tempForce += prefactor*(invR - 2.0f*REACTION_FIELD_K*r2);
tempEnergy += select((real) 0, prefactor*(invR + REACTION_FIELD_K*r2 - REACTION_FIELD_C), includeInteraction);
#else
const real prefactor = ONE_4PI_EPS0*CHARGE1*CHARGE2*invR;
tempForce += prefactor;
tempEnergy += select((real) 0, prefactor, includeInteraction);
#endif
#endif
dEdR += select((real) 0, tempForce*invR*invR, includeInteraction);
#endif
}
/**
* Copy the positions and velocities to the inner context.
*/
__kernel void copyState(__global real4* posq, __global real4* posqCorrection, __global mixed4* velm, __global int* restrict atomOrder,
__global real4* innerPosq, __global real4* innerPosqCorrection, __global mixed4* innerVelm, __global int* restrict innerInvAtomOrder,
int numAtoms) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
int index = innerInvAtomOrder[atomOrder[i]];
innerPosq[index] = posq[i];
innerVelm[index] = velm[i];
#ifdef USE_MIXED_PRECISION
innerPosqCorrection[index] = posqCorrection[i];
#endif
}
}
/**
* Copy the forces back to the main context.
*/
__kernel void copyForces(__global real4* forces, __global int* restrict invAtomOrder, __global real4* innerForces,
__global int* restrict innerAtomOrder, int numAtoms) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
int index = invAtomOrder[innerAtomOrder[i]];
forces[index] = innerForces[i];
}
}
/**
* Add all the forces from the CVs.
*/
__kernel void addForces(__global real4* forces, int numAtoms
PARAMETER_ARGUMENTS) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
real4 f = forces[i];
ADD_FORCES
forces[i] = f;
}
}
\ No newline at end of file
real2 multofReal2(real2 a, real2 b) {
return (real2) (a.x*b.x - a.y*b.y, a.x*b.y + a.y*b.x);
}
/**
* Precompute the cosine and sine sums which appear in each force term.
*/
__kernel void calculateEwaldCosSinSums(__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global real2* restrict cosSinSum, real4 reciprocalPeriodicBoxSize, real reciprocalCoefficient) {
const unsigned int ksizex = 2*KMAX_X-1;
const unsigned int ksizey = 2*KMAX_Y-1;
const unsigned int ksizez = 2*KMAX_Z-1;
const unsigned int totalK = ksizex*ksizey*ksizez;
unsigned int index = get_global_id(0);
mixed energy = 0;
while (index < (KMAX_Y-1)*ksizez+KMAX_Z)
index += get_global_size(0);
while (index < totalK) {
// Find the wave vector (kx, ky, kz) this index corresponds to.
int rx = index/(ksizey*ksizez);
int remainder = index - rx*ksizey*ksizez;
int ry = remainder/ksizez;
int rz = remainder - ry*ksizez - KMAX_Z + 1;
ry += -KMAX_Y + 1;
real kx = rx*reciprocalPeriodicBoxSize.x;
real ky = ry*reciprocalPeriodicBoxSize.y;
real kz = rz*reciprocalPeriodicBoxSize.z;
// Compute the sum for this wave vector.
real2 sum = 0.0f;
for (int atom = 0; atom < NUM_ATOMS; atom++) {
real4 apos = posq[atom];
real phase = apos.x*kx;
real2 structureFactor = (real2) (cos(phase), sin(phase));
phase = apos.y*ky;
structureFactor = multofReal2(structureFactor, (real2) (cos(phase), sin(phase)));
phase = apos.z*kz;
structureFactor = multofReal2(structureFactor, (real2) (cos(phase), sin(phase)));
sum += apos.w*structureFactor;
}
cosSinSum[index] = sum;
// Compute the contribution to the energy.
real k2 = kx*kx + ky*ky + kz*kz;
real ak = EXP(k2*EXP_COEFFICIENT) / k2;
energy += reciprocalCoefficient*ak*(sum.x*sum.x + sum.y*sum.y);
index += get_global_size(0);
}
energyBuffer[get_global_id(0)] += energy;
}
/**
* Compute the reciprocal space part of the Ewald force, using the precomputed sums from the
* previous routine.
*/
__kernel void calculateEwaldForces(__global real4* restrict forceBuffers, __global const real4* restrict posq, __global const real2* restrict cosSinSum, real4 reciprocalPeriodicBoxSize, real reciprocalCoefficient) {
unsigned int atom = get_global_id(0);
while (atom < NUM_ATOMS) {
real4 force = forceBuffers[atom];
real4 apos = posq[atom];
// Loop over all wave vectors.
int lowry = 0;
int lowrz = 1;
for (int rx = 0; rx < KMAX_X; rx++) {
real kx = rx*reciprocalPeriodicBoxSize.x;
for (int ry = lowry; ry < KMAX_Y; ry++) {
real ky = ry*reciprocalPeriodicBoxSize.y;
real phase = apos.x*kx;
real2 tab_xy = (real2) (cos(phase), sin(phase));
phase = apos.y*ky;
tab_xy = multofReal2(tab_xy, (real2) (cos(phase), sin(phase)));
for (int rz = lowrz; rz < KMAX_Z; rz++) {
real kz = rz*reciprocalPeriodicBoxSize.z;
// Compute the force contribution of this wave vector.
int index = rx*(KMAX_Y*2-1)*(KMAX_Z*2-1) + (ry+KMAX_Y-1)*(KMAX_Z*2-1) + (rz+KMAX_Z-1);
real k2 = kx*kx + ky*ky + kz*kz;
real ak = EXP(k2*EXP_COEFFICIENT)/k2;
phase = apos.z*kz;
real2 structureFactor = multofReal2(tab_xy, (real2) (cos(phase), sin(phase)));
real2 sum = cosSinSum[index];
real dEdR = 2*reciprocalCoefficient*ak*apos.w*(sum.x*structureFactor.y - sum.y*structureFactor.x);
force.x += dEdR*kx;
force.y += dEdR*ky;
force.z += dEdR*kz;
lowrz = 1 - KMAX_Z;
}
lowry = 1 - KMAX_Y;
}
}
// Record the force on the atom.
forceBuffers[atom] = force;
atom += get_global_size(0);
}
}
/**
* Scale the particle positions with each axis independent.
*/
__kernel void scalePositions(float scaleX, float scaleY, float scaleZ, int numMolecules, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, __global real4* restrict posq,
__global const int* restrict moleculeAtoms, __global const int* restrict moleculeStartIndex) {
for (int index = get_global_id(0); index < numMolecules; index += get_global_size(0)) {
int first = moleculeStartIndex[index];
int last = moleculeStartIndex[index+1];
int numAtoms = last-first;
// Find the center of each molecule.
real3 center = (real3) 0;
for (int atom = first; atom < last; atom++)
center += posq[moleculeAtoms[atom]].xyz;
center /= (real) numAtoms;
// Move it into the first periodic box.
real3 oldCenter = center;
APPLY_PERIODIC_TO_POS(center)
real3 delta = oldCenter-center;;
real3 scaleXYZ = (real3) (scaleX, scaleY, scaleZ);
// Now scale the position of the molecule center.
delta = center*(scaleXYZ-1)-delta;
for (int atom = first; atom < last; atom++) {
real4 pos = posq[moleculeAtoms[atom]];
pos.xyz += delta.xyz;
posq[moleculeAtoms[atom]] = pos;
}
}
}
float4 exceptionParams = PARAMS[index];
real4 delta = pos2-pos1;
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2);
real sig2 = invR*exceptionParams.y;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real dEdR = exceptionParams.z*(12.0f*sig6-6.0f)*sig6;
real tempEnergy = exceptionParams.z*(sig6-1.0f)*sig6;
dEdR += exceptionParams.x*invR;
dEdR *= invR*invR;
tempEnergy += exceptionParams.x*invR;
energy += tempEnergy;
delta.xyz *= dEdR;
real4 force1 = -delta;
real4 force2 = delta;
/**
* Compute the nonbonded parameters for particles and exceptions.
*/
__kernel void computeParameters(__global mixed* restrict energyBuffer, int includeSelfEnergy, __global real* restrict globalParams,
int numAtoms, __global const float4* restrict baseParticleParams, __global real4* restrict posq, __global real* restrict charge,
__global float2* restrict sigmaEpsilon, __global float4* restrict particleParamOffsets, __global int* restrict particleOffsetIndices
#ifdef HAS_EXCEPTIONS
, int numExceptions, __global const float4* restrict baseExceptionParams, __global float4* restrict exceptionParams,
__global float4* restrict exceptionParamOffsets, __global int* restrict exceptionOffsetIndices
#endif
) {
mixed energy = 0;
// Compute particle parameters.
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
float4 params = baseParticleParams[i];
#ifdef HAS_OFFSETS
int start = particleOffsetIndices[i], end = particleOffsetIndices[i+1];
for (int j = start; j < end; j++) {
float4 offset = particleParamOffsets[j];
real value = globalParams[(int) offset.w];
params.x += value*offset.x;
params.y += value*offset.y;
params.z += value*offset.z;
}
#endif
#ifdef USE_POSQ_CHARGES
posq[i].w = params.x;
#else
charge[i] = params.x;
#endif
sigmaEpsilon[i] = (float2) (0.5f*params.y, 2*SQRT(params.z));
#ifdef HAS_OFFSETS
#ifdef INCLUDE_EWALD
energy -= EWALD_SELF_ENERGY_SCALE*params.x*params.x;
#endif
#ifdef INCLUDE_LJPME
real sig3 = params.y*params.y*params.y;
energy += LJPME_SELF_ENERGY_SCALE*sig3*sig3*params.z;
#endif
#endif
}
// Compute exception parameters.
#ifdef HAS_EXCEPTIONS
for (int i = get_global_id(0); i < numExceptions; i += get_global_size(0)) {
float4 params = baseExceptionParams[i];
#ifdef HAS_OFFSETS
int start = exceptionOffsetIndices[i], end = exceptionOffsetIndices[i+1];
for (int j = start; j < end; j++) {
float4 offset = exceptionParamOffsets[j];
real value = globalParams[(int) offset.w];
params.x += value*offset.x;
params.y += value*offset.y;
params.z += value*offset.z;
}
#endif
exceptionParams[i] = (float4) ((float) (ONE_4PI_EPS0*params.x), (float) params.y, (float) (4*params.z), 0);
}
#endif
if (includeSelfEnergy)
energyBuffer[get_global_id(0)] += energy;
}
/**
* Compute parameters for subtracting the reciprocal part of excluded interactions.
*/
__kernel void computeExclusionParameters(__global real4* restrict posq, __global real* restrict charge, __global float2* restrict sigmaEpsilon,
int numExclusions, __global const int2* restrict exclusionAtoms, __global float4* restrict exclusionParams) {
for (int i = get_global_id(0); i < numExclusions; i += get_global_size(0)) {
int2 atoms = exclusionAtoms[i];
#ifdef USE_POSQ_CHARGES
real chargeProd = posq[atoms.x].w*posq[atoms.y].w;
#else
real chargeProd = charge[atoms.x]*charge[atoms.y];
#endif
#ifdef INCLUDE_LJPME
float2 sigEps1 = sigmaEpsilon[atoms.x];
float2 sigEps2 = sigmaEpsilon[atoms.y];
float sigma = sigEps1.x*sigEps2.x;
float epsilon = sigEps1.y*sigEps2.y;
#else
float sigma = 0;
float epsilon = 0;
#endif
exclusionParams[i] = (float4) ((float) (ONE_4PI_EPS0*chargeProd), sigma, epsilon, 0);
}
}
const float4 exclusionParams = PARAMS[index];
real3 delta = (real3) (pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#if USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
const real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
const real r = SQRT(r2);
const real invR = RECIP(r);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
real tempForce = 0.0f;
if (alphaR > 1e-6f) {
const real erfAlphaR = erf(alphaR);
const real prefactor = exclusionParams.x*invR;
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
energy -= prefactor*erfAlphaR;
}
else {
energy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*exclusionParams.x;
}
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const real c6 = 64*exclusionParams.y*exclusionParams.y*exclusionParams.y*exclusionParams.z;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
energy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
if (r > 0)
delta *= tempForce*invR*invR;
real3 force1 = -delta;
real3 force2 = delta;
const real PI = 3.14159265358979323846f;
real4 v0 = (real4) (pos1.xyz-pos2.xyz, 0.0f);
real4 v1 = (real4) (pos3.xyz-pos2.xyz, 0.0f);
real4 v2 = (real4) (pos3.xyz-pos4.xyz, 0.0f);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0)
APPLY_PERIODIC_TO_DELTA(v1)
APPLY_PERIODIC_TO_DELTA(v2)
#endif
real4 cp0 = cross(v0, v1);
real4 cp1 = cross(v1, v2);
real cosangle = dot(normalize(cp0), normalize(cp1));
real theta;
if (cosangle > 0.99f || cosangle < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real4 cross_prod = cross(cp0, cp1);
real scale = dot(cp0, cp0)*dot(cp1, cp1);
theta = asin(SQRT(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0)
theta = PI-theta;
}
else
theta = acos(cosangle);
theta = (dot(v0, cp1) >= 0 ? theta : -theta);
COMPUTE_FORCE
real normCross1 = dot(cp0, cp0);
real normSqrBC = dot(v1, v1);
real normBC = SQRT(normSqrBC);
real normCross2 = dot(cp1, cp1);
real dp = 1.0f/normSqrBC;
real4 ff = (real4) ((-dEdAngle*normBC)/normCross1, dot(v0, v1)*dp, dot(v2, v1)*dp, (dEdAngle*normBC)/normCross2);
real4 force1 = ff.x*cp0;
real4 force4 = ff.w*cp1;
real4 s = ff.y*force1 - ff.z*force4;
real4 force2 = s-force1;
real4 force3 = -s-force4;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment