Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
fd473eea
Commit
fd473eea
authored
Oct 29, 2015
by
Peter Eastman
Browse files
Merge branch 'master' into nucleic
parents
0a751b5b
6a985cfd
Changes
279
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
756 additions
and
93 deletions
+756
-93
platforms/opencl/src/OpenCLKernelFactory.cpp
platforms/opencl/src/OpenCLKernelFactory.cpp
+2
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+463
-12
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+84
-34
platforms/opencl/src/OpenCLParallelKernels.cpp
platforms/opencl/src/OpenCLParallelKernels.cpp
+4
-0
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+1
-0
platforms/opencl/src/kernels/customCentroidBond.cl
platforms/opencl/src/kernels/customCentroidBond.cl
+141
-0
platforms/opencl/src/kernels/customGBEnergyN2.cl
platforms/opencl/src/kernels/customGBEnergyN2.cl
+2
-2
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+2
-2
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
+2
-2
platforms/opencl/src/kernels/customHbondForce.cl
platforms/opencl/src/kernels/customHbondForce.cl
+3
-3
platforms/opencl/src/kernels/customManyParticle.cl
platforms/opencl/src/kernels/customManyParticle.cl
+2
-2
platforms/opencl/src/kernels/customNonbondedGroups.cl
platforms/opencl/src/kernels/customNonbondedGroups.cl
+2
-2
platforms/opencl/src/kernels/ewald.cl
platforms/opencl/src/kernels/ewald.cl
+2
-2
platforms/opencl/src/kernels/gbsaObc.cl
platforms/opencl/src/kernels/gbsaObc.cl
+2
-2
platforms/opencl/src/kernels/gbsaObcReductions.cl
platforms/opencl/src/kernels/gbsaObcReductions.cl
+2
-2
platforms/opencl/src/kernels/gbsaObc_cpu.cl
platforms/opencl/src/kernels/gbsaObc_cpu.cl
+2
-2
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+24
-10
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+13
-13
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+2
-2
platforms/opencl/staticTarget/CMakeLists.txt
platforms/opencl/staticTarget/CMakeLists.txt
+1
-1
No files found.
Too many changes to show.
To preserve performance only
279 of 279+
files are displayed.
Plain diff
Email patch
platforms/opencl/src/OpenCLKernelFactory.cpp
View file @
fd473eea
...
@@ -102,6 +102,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
...
@@ -102,6 +102,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return
new
OpenCLCalcCustomExternalForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
return
new
OpenCLCalcCustomExternalForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcCustomHbondForceKernel
::
Name
())
if
(
name
==
CalcCustomHbondForceKernel
::
Name
())
return
new
OpenCLCalcCustomHbondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
return
new
OpenCLCalcCustomHbondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcCustomCentroidBondForceKernel
::
Name
())
return
new
OpenCLCalcCustomCentroidBondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
return
new
OpenCLCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
return
new
OpenCLCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
fd473eea
...
@@ -31,6 +31,7 @@
...
@@ -31,6 +31,7 @@
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/CMAPTorsionForceImpl.h"
#include "openmm/internal/CMAPTorsionForceImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomCentroidBondForceImpl.h"
#include "openmm/internal/CustomCompoundBondForceImpl.h"
#include "openmm/internal/CustomCompoundBondForceImpl.h"
#include "openmm/internal/CustomHbondForceImpl.h"
#include "openmm/internal/CustomHbondForceImpl.h"
#include "openmm/internal/CustomManyParticleForceImpl.h"
#include "openmm/internal/CustomManyParticleForceImpl.h"
...
@@ -127,7 +128,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, boo
...
@@ -127,7 +128,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, boo
double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cl.getBondedUtilities().computeInteractions(groups);
cl.getBondedUtilities().computeInteractions(groups);
cl
.
getNonbondedUtilities
().
computeInteractions
(
groups
);
cl.getNonbondedUtilities().computeInteractions(groups
, includeForces, includeEnergy
);
double sum = 0.0;
double sum = 0.0;
for (vector<OpenCLContext::ForcePostComputation*>::iterator iter = cl.getPostComputations().begin(); iter != cl.getPostComputations().end(); ++iter)
for (vector<OpenCLContext::ForcePostComputation*>::iterator iter = cl.getPostComputations().begin(); iter != cl.getPostComputations().end(); ++iter)
sum += (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
sum += (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
...
@@ -135,7 +136,7 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
...
@@ -135,7 +136,7 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
cl.getIntegrationUtilities().distributeForcesFromVirtualSites();
cl.getIntegrationUtilities().distributeForcesFromVirtualSites();
if (includeEnergy) {
if (includeEnergy) {
OpenCLArray& energyArray = cl.getEnergyBuffer();
OpenCLArray& energyArray = cl.getEnergyBuffer();
if
(
cl
.
getUseDoublePrecision
())
{
if (cl.getUseDoublePrecision()
|| cl.getUseMixedPrecision()
) {
double* energy = (double*) cl.getPinnedBuffer();
double* energy = (double*) cl.getPinnedBuffer();
energyArray.download(energy);
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
for (int i = 0; i < energyArray.getSize(); i++)
...
@@ -1551,8 +1552,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1551,8 +1552,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
else
else
cl.getPosq().upload(posqf);
cl.getPosq().upload(posqf);
sigmaEpsilon->upload(sigmaEpsilonVector);
sigmaEpsilon->upload(sigmaEpsilonVector);
bool
useCutoff
=
(
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
NoCutoff
);
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool
usePeriodic
=
(
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
CutoffNonPeriodic
);
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
map<string, string> defines;
map<string, string> defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
...
@@ -1580,7 +1582,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1580,7 +1582,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
dispersionCoefficient = 0.0;
dispersionCoefficient = 0.0;
alpha = 0;
alpha = 0;
ewaldSelfEnergy = 0.0;
ewaldSelfEnergy = 0.0;
if
(
force
.
getN
onbondedMethod
()
==
NonbondedForce
::
Ewald
)
{
if (
n
onbondedMethod == Ewald) {
// Compute the Ewald parameters.
// Compute the Ewald parameters.
int kmaxx, kmaxy, kmaxz;
int kmaxx, kmaxy, kmaxz;
...
@@ -1606,10 +1608,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1606,10 +1608,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
cosSinSums = new OpenCLArray(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
cosSinSums = new OpenCLArray(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
}
}
else
if
(
force
.
getN
onbondedMethod
()
==
NonbondedForce
::
PME
)
{
else if (
n
onbondedMethod == PME) {
// Compute the PME parameters.
// Compute the PME parameters.
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
gridSizeX = OpenCLFFT3D::findLegalDimension(gridSizeX);
gridSizeX = OpenCLFFT3D::findLegalDimension(gridSizeX);
gridSizeY = OpenCLFFT3D::findLegalDimension(gridSizeY);
gridSizeY = OpenCLFFT3D::findLegalDimension(gridSizeY);
...
@@ -2056,14 +2057,26 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
...
@@ -2056,14 +2057,26 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
// Compute other values.
// Compute other values.
NonbondedForce
::
NonbondedMethod
method
=
force
.
getNonbondedMethod
();
if (nonbondedMethod == Ewald || nonbondedMethod == PME)
if
(
method
==
NonbondedForce
::
Ewald
||
method
==
NonbondedForce
::
PME
)
ewaldSelfEnergy = (cl.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
ewaldSelfEnergy = (cl.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
if
(
force
.
getUseDispersionCorrection
()
&&
cl
.
getContextIndex
()
==
0
&&
(
method
==
NonbondedForce
::
CutoffPeriodic
||
method
==
NonbondedForce
::
Ewald
||
method
==
NonbondedForce
::
PME
))
if (force.getUseDispersionCorrection() && cl.getContextIndex() == 0 && (
nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod ==
PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cl.invalidateMolecules();
cl.invalidateMolecules();
}
}
void OpenCLCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (cl.getPlatformData().useCpuPme)
cpuPme.getAs<CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = this->alpha;
nx = gridSizeX;
ny = gridSizeY;
nz = gridSizeZ;
}
}
class OpenCLCustomNonbondedForceInfo : public OpenCLForceInfo {
class OpenCLCustomNonbondedForceInfo : public OpenCLForceInfo {
public:
public:
OpenCLCustomNonbondedForceInfo(int requiredBuffers, const CustomNonbondedForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
OpenCLCustomNonbondedForceInfo(int requiredBuffers, const CustomNonbondedForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
...
@@ -3642,7 +3655,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -3642,7 +3655,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
if (useLong) {
if (useLong) {
pairEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs->getDeviceBuffer());
pairEnergyKernel.setArg<cl::Memory>(index++, longEnergyDerivs->getDeviceBuffer());
for (int i = 0; i < numComputedValues; ++i)
for (int i = 0; i < numComputedValues; ++i)
pairEnergyKernel
.
setArg
(
index
++
,
nb
.
getForceThreadBlockSize
()
*
elementSize
,
NULL
);
pairEnergyKernel.setArg(index++,
(deviceIsCpu ? OpenCLContext::TileSize :
nb.getForceThreadBlockSize()
)
*elementSize, NULL);
}
}
else {
else {
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
for (int i = 0; i < (int) energyDerivs->getBuffers().size(); i++) {
...
@@ -3808,7 +3821,9 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
...
@@ -3808,7 +3821,9 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
globalParamValues[i] = (cl_float) force.getGlobalParameterDefaultValue(i);
}
}
Lepton
::
ParsedExpression
energyExpression
=
Lepton
::
Parser
::
parse
(
force
.
getEnergyFunction
()).
optimize
();
map<string, Lepton::CustomFunction*> customFunctions;
customFunctions["periodicdistance"] = cl.getExpressionUtilities().getPeriodicDistancePlaceholder();
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), customFunctions).optimize();
Lepton::ParsedExpression forceExpressionX = energyExpression.differentiate("x").optimize();
Lepton::ParsedExpression forceExpressionX = energyExpression.differentiate("x").optimize();
Lepton::ParsedExpression forceExpressionY = energyExpression.differentiate("y").optimize();
Lepton::ParsedExpression forceExpressionY = energyExpression.differentiate("y").optimize();
Lepton::ParsedExpression forceExpressionZ = energyExpression.differentiate("z").optimize();
Lepton::ParsedExpression forceExpressionZ = energyExpression.differentiate("z").optimize();
...
@@ -4433,6 +4448,442 @@ void OpenCLCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& cont
...
@@ -4433,6 +4448,442 @@ void OpenCLCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& cont
cl.invalidateMolecules();
cl.invalidateMolecules();
}
}
class OpenCLCustomCentroidBondForceInfo : public OpenCLForceInfo {
public:
OpenCLCustomCentroidBondForceInfo(const CustomCentroidBondForce& force) : OpenCLForceInfo(0), force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
}
void getParticlesInGroup(int index, vector<int>& particles) {
vector<double> parameters;
vector<int> groups;
force.getBondParameters(index, groups, parameters);
for (int i = 0; i < groups.size(); i++) {
vector<int> groupParticles;
vector<double> weights;
force.getGroupParameters(groups[i], groupParticles, weights);
particles.insert(particles.end(), groupParticles.begin(), groupParticles.end());
}
}
bool areGroupsIdentical(int group1, int group2) {
vector<int> groups1, groups2;
vector<double> parameters1, parameters2;
force.getBondParameters(group1, groups1, parameters1);
force.getBondParameters(group2, groups2, parameters2);
for (int i = 0; i < (int) parameters1.size(); i++)
if (parameters1[i] != parameters2[i])
return false;
for (int i = 0; i < groups1.size(); i++) {
vector<int> groupParticles;
vector<double> weights1, weights2;
force.getGroupParameters(groups1[i], groupParticles, weights1);
force.getGroupParameters(groups2[i], groupParticles, weights2);
if (weights1.size() != weights2.size())
return false;
for (int j = 0; j < weights1.size(); j++)
if (weights1[j] != weights2[j])
return false;
}
return true;
}
private:
const CustomCentroidBondForce& force;
};
OpenCLCalcCustomCentroidBondForceKernel::~OpenCLCalcCustomCentroidBondForceKernel() {
if (params != NULL)
delete params;
if (globals != NULL)
delete globals;
if (groupParticles != NULL)
delete groupParticles;
if (groupWeights != NULL)
delete groupWeights;
if (groupOffsets != NULL)
delete groupOffsets;
if (groupForces != NULL)
delete groupForces;
if (bondGroups != NULL)
delete bondGroups;
if (centerPositions != NULL)
delete centerPositions;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i];
}
void OpenCLCalcCustomCentroidBondForceKernel::initialize(const System& system, const CustomCentroidBondForce& force) {
numBonds = force.getNumBonds();
if (numBonds == 0)
return;
if (!cl.getSupports64BitGlobalAtomics())
throw OpenMMException("CustomCentroidBondForce requires a device that supports 64 bit atomic operations");
cl.addForce(new OpenCLCustomCentroidBondForceInfo(force));
// Record the groups.
numGroups = force.getNumGroups();
vector<cl_int> groupParticleVec;
vector<cl_float> groupWeightVecFloat;
vector<cl_double> groupWeightVecDouble;
vector<cl_int> groupOffsetVec;
groupOffsetVec.push_back(0);
for (int i = 0; i < numGroups; i++) {
vector<int> particles;
vector<double> weights;
force.getGroupParameters(i, particles, weights);
groupParticleVec.insert(groupParticleVec.end(), particles.begin(), particles.end());
groupOffsetVec.push_back(groupParticleVec.size());
}
vector<vector<double> > normalizedWeights;
CustomCentroidBondForceImpl::computeNormalizedWeights(force, system, normalizedWeights);
if (cl.getUseDoublePrecision()) {
for (int i = 0; i < numGroups; i++)
groupWeightVecDouble.insert(groupWeightVecDouble.end(), normalizedWeights[i].begin(), normalizedWeights[i].end());
}
else {
for (int i = 0; i < numGroups; i++)
for (int j = 0; j < normalizedWeights[i].size(); j++)
groupWeightVecFloat.push_back((float) normalizedWeights[i][j]);
}
groupParticles = OpenCLArray::create<int>(cl, groupParticleVec.size(), "groupParticles");
groupParticles->upload(groupParticleVec);
if (cl.getUseDoublePrecision()) {
groupWeights = OpenCLArray::create<double>(cl, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecDouble);
centerPositions = OpenCLArray::create<mm_double4>(cl, numGroups, "centerPositions");
}
else {
groupWeights = OpenCLArray::create<float>(cl, groupParticleVec.size(), "groupWeights");
groupWeights->upload(groupWeightVecFloat);
centerPositions = OpenCLArray::create<mm_float4>(cl, numGroups, "centerPositions");
}
groupOffsets = OpenCLArray::create<int>(cl, groupOffsetVec.size(), "groupOffsets");
groupOffsets->upload(groupOffsetVec);
groupForces = OpenCLArray::create<long long>(cl, numGroups*3, "groupForces");
cl.addAutoclearBuffer(*groupForces);
// Record the bonds.
int groupsPerBond = force.getNumGroupsPerBond();
vector<cl_int> bondGroupVec(numBonds*groupsPerBond);
params = new OpenCLParameterSet(cl, force.getNumPerBondParameters(), numBonds, "customCentroidBondParams");
vector<vector<float> > paramVector(numBonds);
for (int i = 0; i < numBonds; i++) {
vector<int> groups;
vector<double> parameters;
force.getBondParameters(i, groups, parameters);
for (int j = 0; j < groups.size(); j++)
bondGroupVec[i+j*numBonds] = groups[j];
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
bondGroups = OpenCLArray::create<int>(cl, bondGroupVec.size(), "bondGroups");
bondGroups->upload(bondGroupVec);
// Record the tabulated functions.
map<string, Lepton::CustomFunction*> functions;
vector<pair<string, string> > functionDefinitions;
vector<const TabulatedFunction*> functionList;
stringstream extraArgs;
for (int i = 0; i < force.getNumTabulatedFunctions(); i++) {
functionList.push_back(&force.getTabulatedFunction(i));
string name = force.getTabulatedFunctionName(i);
string arrayName = "table"+cl.intToString(i);
functionDefinitions.push_back(make_pair(name, arrayName));
functions[name] = cl.getExpressionUtilities().getFunctionPlaceholder(force.getTabulatedFunction(i));
int width;
vector<float> f = cl.getExpressionUtilities().computeFunctionCoefficients(force.getTabulatedFunction(i), width);
tabulatedFunctions.push_back(OpenCLArray::create<float>(cl, f.size(), "TabulatedFunction"));
tabulatedFunctions.back()->upload(f);
extraArgs << ", __global const float";
if (width > 1)
extraArgs << width;
extraArgs << "* restrict " << arrayName;
}
// Record information about parameters.
globalParamNames.resize(force.getNumGlobalParameters());
globalParamValues.resize(force.getNumGlobalParameters());
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
globalParamNames[i] = force.getGlobalParameterName(i);
globalParamValues[i] = (float) force.getGlobalParameterDefaultValue(i);
}
map<string, string> variables;
for (int i = 0; i < groupsPerBond; i++) {
string index = cl.intToString(i+1);
variables["x"+index] = "pos"+index+".x";
variables["y"+index] = "pos"+index+".y";
variables["z"+index] = "pos"+index+".z";
}
for (int i = 0; i < force.getNumPerBondParameters(); i++) {
const string& name = force.getPerBondParameterName(i);
variables[name] = "bondParams"+params->getParameterSuffix(i);
}
if (force.getNumGlobalParameters() > 0) {
globals = OpenCLArray::create<float>(cl, force.getNumGlobalParameters(), "customCentroidBondGlobals");
globals->upload(globalParamValues);
extraArgs << ", __global const float* restrict globals";
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
const string& name = force.getGlobalParameterName(i);
string value = "globals["+cl.intToString(i)+"]";
variables[name] = value;
}
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
// and dihedrals the expression depends on.
map<string, vector<int> > distances;
map<string, vector<int> > angles;
map<string, vector<int> > dihedrals;
Lepton::ParsedExpression energyExpression = CustomCentroidBondForceImpl::prepareExpression(force, functions, distances, angles, dihedrals);
map<string, Lepton::ParsedExpression> forceExpressions;
set<string> computedDeltas;
vector<string> atomNames, posNames;
for (int i = 0; i < groupsPerBond; i++) {
string index = cl.intToString(i+1);
atomNames.push_back("P"+index);
posNames.push_back("pos"+index);
}
stringstream compute;
for (int i = 0; i < groupsPerBond; i++) {
compute<<"int group"<<(i+1)<<" = bondGroups[index+"<<(i*numBonds)<<"];\n";
compute<<"real4 pos"<<(i+1)<<" = centerPositions[group"<<(i+1)<<"];\n";
}
int index = 0;
for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName = atomNames[groups[0]]+atomNames[groups[1]];
if (computedDeltas.count(deltaName) == 0) {
compute<<"real4 delta"<<deltaName<<" = delta("<<posNames[groups[0]]<<", "<<posNames[groups[1]]<<");\n";
computedDeltas.insert(deltaName);
}
compute<<"real r_"<<deltaName<<" = sqrt(delta"<<deltaName<<".w);\n";
variables[iter->first] = "r_"+deltaName;
forceExpressions["real dEdDistance"+cl.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[1]]+atomNames[groups[0]];
string deltaName2 = atomNames[groups[1]]+atomNames[groups[2]];
string angleName = "angle_"+atomNames[groups[0]]+atomNames[groups[1]]+atomNames[groups[2]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[groups[1]]<<", "<<posNames[groups[0]]<<");\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[groups[1]]<<", "<<posNames[groups[2]]<<");\n";
computedDeltas.insert(deltaName2);
}
compute<<"real "<<angleName<<" = computeAngle(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
variables[iter->first] = angleName;
forceExpressions["real dEdAngle"+cl.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[0]]+atomNames[groups[1]];
string deltaName2 = atomNames[groups[2]]+atomNames[groups[1]];
string deltaName3 = atomNames[groups[2]]+atomNames[groups[3]];
string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
string dihedralName = "dihedral_"+atomNames[groups[0]]+atomNames[groups[1]]+atomNames[groups[2]]+atomNames[groups[3]];
if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[groups[0]]<<", "<<posNames[groups[1]]<<");\n";
computedDeltas.insert(deltaName1);
}
if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[groups[2]]<<", "<<posNames[groups[1]]<<");\n";
computedDeltas.insert(deltaName2);
}
if (computedDeltas.count(deltaName3) == 0) {
compute<<"real4 delta"<<deltaName3<<" = delta("<<posNames[groups[2]]<<", "<<posNames[groups[3]]<<");\n";
computedDeltas.insert(deltaName3);
}
compute<<"real4 "<<crossName1<<" = computeCross(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
compute<<"real4 "<<crossName2<<" = computeCross(delta"<<deltaName2<<", delta"<<deltaName3<<");\n";
compute<<"real "<<dihedralName<<" = computeAngle("<<crossName1<<", "<<crossName2<<");\n";
compute<<dihedralName<<" *= (delta"<<deltaName1<<".x*"<<crossName2<<".x + delta"<<deltaName1<<".y*"<<crossName2<<".y + delta"<<deltaName1<<".z*"<<crossName2<<".z < 0 ? -1 : 1);\n";
variables[iter->first] = dihedralName;
forceExpressions["real dEdDihedral"+cl.intToString(index)+" = "] = energyExpression.differentiate(iter->first).optimize();
}
// Now evaluate the expressions.
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
extraArgs<<", __global const "<<buffer.getType()<<"* restrict globalParams"<<i;
compute<<buffer.getType()<<" bondParams"<<(i+1)<<" = globalParams"<<i<<"[index];\n";
}
forceExpressions["energy += "] = energyExpression;
compute << cl.getExpressionUtilities().createExpressions(forceExpressions, variables, functionList, functionDefinitions, "temp");
// Finally, apply forces to groups.
vector<string> forceNames;
for (int i = 0; i < groupsPerBond; i++) {
string istr = cl.intToString(i+1);
string forceName = "force"+istr;
forceNames.push_back(forceName);
compute<<"real3 "<<forceName<<" = (real3) 0;\n";
compute<<"{\n";
Lepton::ParsedExpression forceExpressionX = energyExpression.differentiate("x"+istr).optimize();
Lepton::ParsedExpression forceExpressionY = energyExpression.differentiate("y"+istr).optimize();
Lepton::ParsedExpression forceExpressionZ = energyExpression.differentiate("z"+istr).optimize();
map<string, Lepton::ParsedExpression> expressions;
if (!isZeroExpression(forceExpressionX))
expressions[forceName+".x -= "] = forceExpressionX;
if (!isZeroExpression(forceExpressionY))
expressions[forceName+".y -= "] = forceExpressionY;
if (!isZeroExpression(forceExpressionZ))
expressions[forceName+".z -= "] = forceExpressionZ;
if (expressions.size() > 0)
compute<<cl.getExpressionUtilities().createExpressions(expressions, variables, functionList, functionDefinitions, "coordtemp");
compute<<"}\n";
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = distances.begin(); iter != distances.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName = atomNames[groups[0]]+atomNames[groups[1]];
string value = "(dEdDistance"+cl.intToString(index)+"/r_"+deltaName+")*delta"+deltaName+".xyz";
compute<<forceNames[groups[0]]<<" += "<<"-"<<value<<";\n";
compute<<forceNames[groups[1]]<<" += "<<value<<";\n";
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = angles.begin(); iter != angles.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[1]]+atomNames[groups[0]];
string deltaName2 = atomNames[groups[1]]+atomNames[groups[2]];
compute<<"{\n";
compute<<"real4 crossProd = cross(delta"<<deltaName2<<", delta"<<deltaName1<<");\n";
compute<<"real lengthCross = max(length(crossProd), (real) 1e-6f);\n";
compute<<"real4 deltaCross0 = -cross(delta"<<deltaName1<<", crossProd)*dEdAngle"<<cl.intToString(index)<<"/(delta"<<deltaName1<<".w*lengthCross);\n";
compute<<"real4 deltaCross2 = cross(delta"<<deltaName2<<", crossProd)*dEdAngle"<<cl.intToString(index)<<"/(delta"<<deltaName2<<".w*lengthCross);\n";
compute<<"real4 deltaCross1 = -(deltaCross0+deltaCross2);\n";
compute<<forceNames[groups[0]]<<".xyz += deltaCross0.xyz;\n";
compute<<forceNames[groups[1]]<<".xyz += deltaCross1.xyz;\n";
compute<<forceNames[groups[2]]<<".xyz += deltaCross2.xyz;\n";
compute<<"}\n";
}
index = 0;
for (map<string, vector<int> >::const_iterator iter = dihedrals.begin(); iter != dihedrals.end(); ++iter, ++index) {
const vector<int>& groups = iter->second;
string deltaName1 = atomNames[groups[0]]+atomNames[groups[1]];
string deltaName2 = atomNames[groups[2]]+atomNames[groups[1]];
string deltaName3 = atomNames[groups[2]]+atomNames[groups[3]];
string crossName1 = "cross_"+deltaName1+"_"+deltaName2;
string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
compute<<"{\n";
compute<<"real r = sqrt(delta"<<deltaName2<<".w);\n";
compute<<"real4 ff;\n";
compute<<"ff.x = (-dEdDihedral"<<cl.intToString(index)<<"*r)/"<<crossName1<<".w;\n";
compute<<"ff.y = (delta"<<deltaName1<<".x*delta"<<deltaName2<<".x + delta"<<deltaName1<<".y*delta"<<deltaName2<<".y + delta"<<deltaName1<<".z*delta"<<deltaName2<<".z)/delta"<<deltaName2<<".w;\n";
compute<<"ff.z = (delta"<<deltaName3<<".x*delta"<<deltaName2<<".x + delta"<<deltaName3<<".y*delta"<<deltaName2<<".y + delta"<<deltaName3<<".z*delta"<<deltaName2<<".z)/delta"<<deltaName2<<".w;\n";
compute<<"ff.w = (dEdDihedral"<<cl.intToString(index)<<"*r)/"<<crossName2<<".w;\n";
compute<<"real4 internalF0 = ff.x*"<<crossName1<<";\n";
compute<<"real4 internalF3 = ff.w*"<<crossName2<<";\n";
compute<<"real4 s = ff.y*internalF0 - ff.z*internalF3;\n";
compute<<forceNames[groups[0]]<<".xyz += internalF0.xyz;\n";
compute<<forceNames[groups[1]]<<".xyz += s.xyz-internalF0.xyz;\n";
compute<<forceNames[groups[2]]<<".xyz += -s.xyz-internalF3.xyz;\n";
compute<<forceNames[groups[3]]<<".xyz += internalF3.xyz;\n";
compute<<"}\n";
}
// Save the forces to global memory.
for (int i = 0; i < groupsPerBond; i++) {
compute<<"atom_add(&groupForce[group"<<(i+1)<<"], (long) (force"<<(i+1)<<".x*0x100000000));\n";
compute<<"atom_add(&groupForce[group"<<(i+1)<<"+NUM_GROUPS], (long) (force"<<(i+1)<<".y*0x100000000));\n";
compute<<"atom_add(&groupForce[group"<<(i+1)<<"+NUM_GROUPS*2], (long) (force"<<(i+1)<<".z*0x100000000));\n";
}
map<string, string> replacements;
replacements["M_PI"] = cl.doubleToString(M_PI);
replacements["NUM_GROUPS"] = cl.intToString(numGroups);
replacements["NUM_BONDS"] = cl.intToString(numBonds);
replacements["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
replacements["EXTRA_ARGS"] = extraArgs.str();
replacements["COMPUTE_FORCE"] = compute.str();
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customCentroidBond, replacements));
index = 0;
computeCentersKernel = cl::Kernel(program, "computeGroupCenters");
computeCentersKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupParticles->getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupWeights->getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, groupOffsets->getDeviceBuffer());
computeCentersKernel.setArg<cl::Buffer>(index++, centerPositions->getDeviceBuffer());
index = 0;
groupForcesKernel = cl::Kernel(program, "computeGroupForces");
groupForcesKernel.setArg<cl::Buffer>(index++, groupForces->getDeviceBuffer());
index++; // Energy buffer hasn't been created yet
groupForcesKernel.setArg<cl::Buffer>(index++, centerPositions->getDeviceBuffer());
groupForcesKernel.setArg<cl::Buffer>(index++, bondGroups->getDeviceBuffer());
for (int i = 0; i < tabulatedFunctions.size(); i++)
groupForcesKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
if (globals != NULL)
groupForcesKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) params->getBuffers().size(); i++)
groupForcesKernel.setArg<cl::Memory>(index++, params->getBuffers()[i].getMemory());
index = 0;
applyForcesKernel = cl::Kernel(program, "applyForcesToAtoms");
applyForcesKernel.setArg<cl::Buffer>(index++, groupParticles->getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupWeights->getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupOffsets->getDeviceBuffer());
applyForcesKernel.setArg<cl::Buffer>(index++, groupForces->getDeviceBuffer());
}
double OpenCLCalcCustomCentroidBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (numBonds == 0)
return 0.0;
if (globals != NULL) {
bool changed = false;
for (int i = 0; i < (int) globalParamNames.size(); i++) {
float value = (float) context.getParameter(globalParamNames[i]);
if (value != globalParamValues[i])
changed = true;
globalParamValues[i] = value;
}
if (changed)
globals->upload(globalParamValues);
}
cl.executeKernel(computeCentersKernel, OpenCLContext::TileSize*numGroups);
groupForcesKernel.setArg<cl::Buffer>(1, cl.getEnergyBuffer().getDeviceBuffer());
cl.executeKernel(groupForcesKernel, numBonds);
applyForcesKernel.setArg<cl::Buffer>(4, cl.getLongForceBuffer().getDeviceBuffer());
cl.executeKernel(applyForcesKernel, OpenCLContext::TileSize*numGroups);
return 0.0;
}
void OpenCLCalcCustomCentroidBondForceKernel::copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force) {
if (numBonds != force.getNumBonds())
throw OpenMMException("updateParametersInContext: The number of bonds has changed");
if (numBonds == 0)
return;
// Record the per-bond parameters.
vector<vector<float> > paramVector(numBonds);
vector<int> particles;
vector<double> parameters;
for (int i = 0; i < numBonds; i++) {
force.getBondParameters(i, particles, parameters);
paramVector[i].resize(parameters.size());
for (int j = 0; j < (int) parameters.size(); j++)
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
// Mark that the current reordering may be invalid.
cl.invalidateMolecules();
}
class OpenCLCustomCompoundBondForceInfo : public OpenCLForceInfo {
class OpenCLCustomCompoundBondForceInfo : public OpenCLForceInfo {
public:
public:
OpenCLCustomCompoundBondForceInfo(const CustomCompoundBondForce& force) : OpenCLForceInfo(0), force(force) {
OpenCLCustomCompoundBondForceInfo(const CustomCompoundBondForce& force) : OpenCLForceInfo(0), force(force) {
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
fd473eea
...
@@ -180,13 +180,29 @@ void OpenCLNonbondedUtilities::requestExclusions(const vector<vector<int> >& exc
...
@@ -180,13 +180,29 @@ void OpenCLNonbondedUtilities::requestExclusions(const vector<vector<int> >& exc
}
}
static
bool
compareUshort2
(
mm_ushort2
a
,
mm_ushort2
b
)
{
static
bool
compareUshort2
(
mm_ushort2
a
,
mm_ushort2
b
)
{
// This version is used on devices with SIMD width of 32 or less. It sorts tiles to improve cache efficiency.
return
((
a
.
y
<
b
.
y
)
||
(
a
.
y
==
b
.
y
&&
a
.
x
<
b
.
x
));
}
static
bool
compareUshort2LargeSIMD
(
mm_ushort2
a
,
mm_ushort2
b
)
{
// This version is used on devices with SIMD width greater than 32. It puts diagonal tiles before off-diagonal
// ones to reduce thread divergence.
if
(
a
.
x
==
a
.
y
)
{
if
(
b
.
x
==
b
.
y
)
return
(
a
.
x
<
b
.
x
);
return
true
;
}
if
(
b
.
x
==
b
.
y
)
return
false
;
return
((
a
.
y
<
b
.
y
)
||
(
a
.
y
==
b
.
y
&&
a
.
x
<
b
.
x
));
return
((
a
.
y
<
b
.
y
)
||
(
a
.
y
==
b
.
y
&&
a
.
x
<
b
.
x
));
}
}
void
OpenCLNonbondedUtilities
::
initialize
(
const
System
&
system
)
{
void
OpenCLNonbondedUtilities
::
initialize
(
const
System
&
system
)
{
if
(
atomExclusions
.
size
()
==
0
)
{
if
(
atomExclusions
.
size
()
==
0
)
{
// No exclusions were specifically requested, so just mark every atom as not interacting with itself.
// No exclusions were specifically requested, so just mark every atom as not interacting with itself.
atomExclusions
.
resize
(
context
.
getNumAtoms
());
atomExclusions
.
resize
(
context
.
getNumAtoms
());
for
(
int
i
=
0
;
i
<
(
int
)
atomExclusions
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
atomExclusions
.
size
();
i
++
)
atomExclusions
[
i
].
push_back
(
i
);
atomExclusions
[
i
].
push_back
(
i
);
...
@@ -199,7 +215,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -199,7 +215,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
setAtomBlockRange
(
context
.
getContextIndex
()
/
(
double
)
numContexts
,
(
context
.
getContextIndex
()
+
1
)
/
(
double
)
numContexts
);
setAtomBlockRange
(
context
.
getContextIndex
()
/
(
double
)
numContexts
,
(
context
.
getContextIndex
()
+
1
)
/
(
double
)
numContexts
);
// Build a list of tiles that contain exclusions.
// Build a list of tiles that contain exclusions.
set
<
pair
<
int
,
int
>
>
tilesWithExclusions
;
set
<
pair
<
int
,
int
>
>
tilesWithExclusions
;
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
OpenCLContext
::
TileSize
;
int
x
=
atom1
/
OpenCLContext
::
TileSize
;
...
@@ -212,7 +228,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -212,7 +228,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
vector
<
mm_ushort2
>
exclusionTilesVec
;
vector
<
mm_ushort2
>
exclusionTilesVec
;
for
(
set
<
pair
<
int
,
int
>
>::
const_iterator
iter
=
tilesWithExclusions
.
begin
();
iter
!=
tilesWithExclusions
.
end
();
++
iter
)
for
(
set
<
pair
<
int
,
int
>
>::
const_iterator
iter
=
tilesWithExclusions
.
begin
();
iter
!=
tilesWithExclusions
.
end
();
++
iter
)
exclusionTilesVec
.
push_back
(
mm_ushort2
((
unsigned
short
)
iter
->
first
,
(
unsigned
short
)
iter
->
second
));
exclusionTilesVec
.
push_back
(
mm_ushort2
((
unsigned
short
)
iter
->
first
,
(
unsigned
short
)
iter
->
second
));
sort
(
exclusionTilesVec
.
begin
(),
exclusionTilesVec
.
end
(),
co
mpareUshort2
);
sort
(
exclusionTilesVec
.
begin
(),
exclusionTilesVec
.
end
(),
co
ntext
.
getSIMDWidth
()
<=
32
?
compareUshort2
:
compareUshort2LargeSIMD
);
exclusionTiles
=
OpenCLArray
::
create
<
mm_ushort2
>
(
context
,
exclusionTilesVec
.
size
(),
"exclusionTiles"
);
exclusionTiles
=
OpenCLArray
::
create
<
mm_ushort2
>
(
context
,
exclusionTilesVec
.
size
(),
"exclusionTiles"
);
exclusionTiles
->
upload
(
exclusionTilesVec
);
exclusionTiles
->
upload
(
exclusionTilesVec
);
map
<
pair
<
int
,
int
>
,
int
>
exclusionTileMap
;
map
<
pair
<
int
,
int
>
,
int
>
exclusionTileMap
;
...
@@ -341,37 +357,43 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
...
@@ -341,37 +357,43 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
bool
rebuild
=
false
;
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
do
{
blockSorter
->
sort
(
*
sortedBlocks
);
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
kernels
.
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
forceRebuildNeighborList
);
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
kernels
.
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
forceRebuildNeighborList
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
context
.
getNumAtoms
());
forceRebuildNeighborList
=
false
;
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
forceRebuildNeighborList
=
false
;
if
(
context
.
getComputeForceCount
()
==
1
)
rebuild
=
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
while
(
rebuild
);
lastCutoff
=
kernels
.
cutoffDistance
;
lastCutoff
=
kernels
.
cutoffDistance
;
}
}
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
)
{
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
if
((
forceGroups
&
groupFlags
)
==
0
)
if
((
forceGroups
&
groupFlags
)
==
0
)
return
;
return
;
KernelSet
&
kernels
=
groupKernels
[
forceGroups
];
KernelSet
&
kernels
=
groupKernels
[
forceGroups
];
if
(
kernels
.
hasForces
)
{
if
(
kernels
.
hasForces
)
{
cl
::
Kernel
&
kernel
=
(
includeForces
?
(
includeEnergy
?
kernels
.
forceEnergyKernel
:
kernels
.
forceKernel
)
:
kernels
.
energyKernel
);
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernel
)
==
NULL
)
kernel
=
createInteractionKernel
(
kernels
.
source
,
parameters
,
arguments
,
true
,
true
,
forceGroups
,
includeForces
,
includeEnergy
);
if
(
useCutoff
)
if
(
useCutoff
)
setPeriodicBoxArgs
(
context
,
kernels
.
forceKernel
,
9
);
setPeriodicBoxArgs
(
context
,
kernel
,
9
);
context
.
executeKernel
(
kernels
.
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
context
.
executeKernel
(
kernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
if
(
context
.
getComputeForceCount
()
==
1
)
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
}
}
}
void
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
bool
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
if
(
!
useCutoff
)
return
;
return
false
;
unsigned
int
*
pinnedInteractionCount
=
(
unsigned
int
*
)
context
.
getPinnedBuffer
();
unsigned
int
*
pinnedInteractionCount
=
(
unsigned
int
*
)
context
.
getPinnedBuffer
();
interactionCount
->
download
(
pinnedInteractionCount
);
interactionCount
->
download
(
pinnedInteractionCount
);
if
(
pinnedInteractionCount
[
0
]
<=
(
unsigned
int
)
interactingTiles
->
getSize
())
if
(
pinnedInteractionCount
[
0
]
<=
(
unsigned
int
)
interactingTiles
->
getSize
())
return
;
return
false
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
// this from happening in the future.
...
@@ -387,14 +409,28 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
...
@@ -387,14 +409,28 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
interactingTiles
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingTiles
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
OpenCLContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
interactingAtoms
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
OpenCLContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
iter
->
second
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
KernelSet
&
kernels
=
iter
->
second
;
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceKernel
)
!=
NULL
)
{
iter
->
second
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
kernels
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
forceKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
kernels
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
energyKernel
)
!=
NULL
)
{
kernels
.
energyKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
energyKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
energyKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceEnergyKernel
)
!=
NULL
)
{
kernels
.
forceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
forceEnergyKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
forceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
}
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
}
}
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
return
true
;
}
}
void
OpenCLNonbondedUtilities
::
setUsePadding
(
bool
padding
)
{
void
OpenCLNonbondedUtilities
::
setUsePadding
(
bool
padding
)
{
...
@@ -410,12 +446,23 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
...
@@ -410,12 +446,23 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
if
(
useCutoff
)
{
if
(
useCutoff
)
{
// We are using a cutoff, and the kernels have already been created.
// We are using a cutoff, and the kernels have already been created.
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
KernelSet
&
kernels
=
iter
->
second
;
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceKernel
)
!=
NULL
)
{
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
kernels
.
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
kernels
.
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
energyKernel
)
!=
NULL
)
{
kernels
.
energyKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
kernels
.
energyKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceEnergyKernel
)
!=
NULL
)
{
kernels
.
forceEnergyKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
kernels
.
forceEnergyKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
}
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
}
}
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
}
}
...
@@ -433,8 +480,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
...
@@ -433,8 +480,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
}
}
kernels
.
hasForces
=
(
source
.
size
()
>
0
);
kernels
.
hasForces
=
(
source
.
size
()
>
0
);
kernels
.
cutoffDistance
=
cutoff
;
kernels
.
cutoffDistance
=
cutoff
;
if
(
kernels
.
hasForces
)
kernels
.
source
=
source
;
kernels
.
forceKernel
=
createInteractionKernel
(
source
,
parameters
,
arguments
,
true
,
true
,
groups
);
if
(
useCutoff
)
{
if
(
useCutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
double
paddedCutoff
=
cutoff
+
padding
;
double
paddedCutoff
=
cutoff
+
padding
;
...
@@ -491,7 +537,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
...
@@ -491,7 +537,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
18
,
rebuildNeighborList
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
18
,
rebuildNeighborList
->
getDeviceBuffer
());
if
(
kernels
.
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
if
(
kernels
.
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
// The device can't handle this block size, so reduce it.
// The device can't handle this block size, so reduce it.
groupSize
-=
32
;
groupSize
-=
32
;
if
(
groupSize
<
32
)
if
(
groupSize
<
32
)
throw
OpenMMException
(
"Failed to create findInteractingBlocks kernel"
);
throw
OpenMMException
(
"Failed to create findInteractingBlocks kernel"
);
...
@@ -504,7 +550,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
...
@@ -504,7 +550,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
groupKernels
[
groups
]
=
kernels
;
groupKernels
[
groups
]
=
kernels
;
}
}
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
)
{
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
,
bool
includeForces
,
bool
includeEnergy
)
{
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
...
@@ -603,6 +649,10 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -603,6 +649,10 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
if
(
useCutoff
&&
context
.
getSIMDWidth
()
<
32
)
if
(
useCutoff
&&
context
.
getSIMDWidth
()
<
32
)
defines
[
"PRUNE_BY_CUTOFF"
]
=
"1"
;
defines
[
"PRUNE_BY_CUTOFF"
]
=
"1"
;
if
(
includeForces
)
defines
[
"INCLUDE_FORCES"
]
=
"1"
;
if
(
includeEnergy
)
defines
[
"INCLUDE_ENERGY"
]
=
"1"
;
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
double
maxCutoff
=
0.0
;
double
maxCutoff
=
0.0
;
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
...
...
platforms/opencl/src/OpenCLParallelKernels.cpp
View file @
fd473eea
...
@@ -579,6 +579,10 @@ void OpenCLParallelCalcNonbondedForceKernel::copyParametersToContext(ContextImpl
...
@@ -579,6 +579,10 @@ void OpenCLParallelCalcNonbondedForceKernel::copyParametersToContext(ContextImpl
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
}
void
OpenCLParallelCalcNonbondedForceKernel
::
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
dynamic_cast
<
const
OpenCLCalcNonbondedForceKernel
&>
(
kernels
[
0
].
getImpl
()).
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
class
OpenCLParallelCalcCustomNonbondedForceKernel
::
Task
:
public
OpenCLContext
::
WorkTask
{
class
OpenCLParallelCalcCustomNonbondedForceKernel
::
Task
:
public
OpenCLContext
::
WorkTask
{
public:
public:
Task
(
ContextImpl
&
context
,
OpenCLCalcCustomNonbondedForceKernel
&
kernel
,
bool
includeForce
,
Task
(
ContextImpl
&
context
,
OpenCLCalcCustomNonbondedForceKernel
&
kernel
,
bool
includeForce
,
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
fd473eea
...
@@ -74,6 +74,7 @@ OpenCLPlatform::OpenCLPlatform() {
...
@@ -74,6 +74,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory
(
CalcCustomGBForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomGBForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomExternalForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomExternalForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomHbondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomHbondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
...
...
platforms/opencl/src/kernels/customCentroidBond.cl
0 → 100644
View file @
fd473eea
#
pragma
OPENCL
EXTENSION
cl_khr_int64_base_atomics
:
enable
/**
*
Compute
the
center
of
each
group.
*/
__kernel
void
computeGroupCenters
(
__global
const
real4*
restrict
posq,
__global
const
int*
restrict
groupParticles,
__global
const
real*
restrict
groupWeights,
__global
const
int*
restrict
groupOffsets,
__global
real4*
restrict
centerPositions
)
{
__local
volatile
real3
temp[64]
;
for
(
int
group
=
get_group_id
(
0
)
; group < NUM_GROUPS; group += get_num_groups(0)) {
//
The
threads
in
this
block
work
together
to
compute
the
center
one
group.
int
firstIndex
=
groupOffsets[group]
;
int
lastIndex
=
groupOffsets[group+1]
;
real3
center
=
(
real3
)
0
;
for
(
int
index
=
get_local_id
(
0
)
; index < lastIndex-firstIndex; index += get_local_size(0)) {
int
atom
=
groupParticles[firstIndex+index]
;
real
weight
=
groupWeights[firstIndex+index]
;
real4
pos
=
posq[atom]
;
center.x
+=
weight*pos.x
;
center.y
+=
weight*pos.y
;
center.z
+=
weight*pos.z
;
}
//
Sum
the
values.
int
thread
=
get_local_id
(
0
)
;
temp[thread].x
=
center.x
;
temp[thread].y
=
center.y
;
temp[thread].z
=
center.z
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
thread
<
32
)
{
temp[thread].x
+=
temp[thread+32].x
;
temp[thread].y
+=
temp[thread+32].y
;
temp[thread].z
+=
temp[thread+32].z
;
}
SYNC_WARPS
;
if
(
thread
<
16
)
{
temp[thread].x
+=
temp[thread+16].x
;
temp[thread].y
+=
temp[thread+16].y
;
temp[thread].z
+=
temp[thread+16].z
;
}
SYNC_WARPS
;
if
(
thread
<
8
)
{
temp[thread].x
+=
temp[thread+8].x
;
temp[thread].y
+=
temp[thread+8].y
;
temp[thread].z
+=
temp[thread+8].z
;
}
SYNC_WARPS
;
if
(
thread
<
4
)
{
temp[thread].x
+=
temp[thread+4].x
;
temp[thread].y
+=
temp[thread+4].y
;
temp[thread].z
+=
temp[thread+4].z
;
}
SYNC_WARPS
;
if
(
thread
<
2
)
{
temp[thread].x
+=
temp[thread+2].x
;
temp[thread].y
+=
temp[thread+2].y
;
temp[thread].z
+=
temp[thread+2].z
;
}
SYNC_WARPS
;
if
(
thread
==
0
)
centerPositions[group]
=
(
real4
)
(
temp[0].x+temp[1].x,
temp[0].y+temp[1].y,
temp[0].z+temp[1].z,
0
)
;
}
}
/**
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
real4
delta
(
real4
vec1,
real4
vec2
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
}
/**
*
Compute
the
angle
between
two
vectors.
The
w
component
of
each
vector
should
contain
the
squared
magnitude.
*/
real
computeAngle
(
real4
vec1,
real4
vec2
)
{
real
dotProduct
=
vec1.x*vec2.x
+
vec1.y*vec2.y
+
vec1.z*vec2.z
;
real
cosine
=
dotProduct*RSQRT
(
vec1.w*vec2.w
)
;
real
angle
;
if
(
cosine
>
0.99f
||
cosine
<
-0.99f
)
{
//
We
're
close
to
the
singularity
in
acos
()
,
so
take
the
cross
product
and
use
asin
()
instead.
real4
crossProduct
=
cross
(
vec1,
vec2
)
;
real
scale
=
vec1.w*vec2.w
;
angle
=
asin
(
SQRT
(
dot
(
crossProduct,
crossProduct
)
/scale
))
;
if
(
cosine
<
0
)
angle
=
M_PI-angle
;
}
else
angle
=
acos
(
cosine
)
;
return
angle
;
}
/**
*
Compute
the
cross
product
of
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
real4
computeCross
(
real4
vec1,
real4
vec2
)
{
real4
result
=
cross
(
vec1,
vec2
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
}
/**
*
Compute
the
forces
on
groups
based
on
the
bonds.
*/
__kernel
void
computeGroupForces
(
__global
long*
restrict
groupForce,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
centerPositions,
__global
const
int*
restrict
bondGroups
EXTRA_ARGS
)
{
mixed
energy
=
0
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_BONDS; index += get_global_size(0)) {
COMPUTE_FORCE
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
}
/**
*
Apply
the
forces
from
the
group
centers
to
the
individual
atoms.
*/
__kernel
void
applyForcesToAtoms
(
__global
const
int*
restrict
groupParticles,
__global
const
real*
restrict
groupWeights,
__global
const
int*
restrict
groupOffsets,
__global
const
long*
restrict
groupForce,
__global
long*
restrict
atomForce
)
{
for
(
int
group
=
get_group_id
(
0
)
; group < NUM_GROUPS; group += get_num_groups(0)) {
long
fx
=
groupForce[group]
;
long
fy
=
groupForce[group+NUM_GROUPS]
;
long
fz
=
groupForce[group+NUM_GROUPS*2]
;
int
firstIndex
=
groupOffsets[group]
;
int
lastIndex
=
groupOffsets[group+1]
;
for
(
int
index
=
get_local_id
(
0
)
; index < lastIndex-firstIndex; index += get_local_size(0)) {
int
atom
=
groupParticles[firstIndex+index]
;
real
weight
=
groupWeights[firstIndex+index]
;
atom_add
(
&atomForce[atom],
(
long
)
(
fx*weight
))
;
atom_add
(
&atomForce[atom+PADDED_NUM_ATOMS],
(
long
)
(
fy*weight
))
;
atom_add
(
&atomForce[atom+2*PADDED_NUM_ATOMS],
(
long
)
(
fz*weight
))
;
}
}
}
platforms/opencl/src/kernels/customGBEnergyN2.cl
View file @
fd473eea
...
@@ -16,7 +16,7 @@ __kernel void computeN2Energy(
...
@@ -16,7 +16,7 @@ __kernel void computeN2Energy(
#
else
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
forceBuffers,
#
endif
#
endif
__global
real
*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
mixed
*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
exclusionTiles,
__global
const
ushort2*
exclusionTiles,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -31,7 +31,7 @@ __kernel void computeN2Energy(
...
@@ -31,7 +31,7 @@ __kernel void computeN2Energy(
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
//
First
loop:
process
tiles
that
contain
exclusions.
//
First
loop:
process
tiles
that
contain
exclusions.
...
...
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
View file @
fd473eea
...
@@ -16,7 +16,7 @@ __kernel void computeN2Energy(
...
@@ -16,7 +16,7 @@ __kernel void computeN2Energy(
#
else
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
forceBuffers,
#
endif
#
endif
__global
real
*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
mixed
*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
exclusionTiles,
__global
const
ushort2*
exclusionTiles,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -27,7 +27,7 @@ __kernel void computeN2Energy(
...
@@ -27,7 +27,7 @@ __kernel void computeN2Energy(
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
;
mixed
energy
=
0
;
//
First
loop:
process
tiles
that
contain
exclusions.
//
First
loop:
process
tiles
that
contain
exclusions.
...
...
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
View file @
fd473eea
...
@@ -9,9 +9,9 @@
...
@@ -9,9 +9,9 @@
*
Reduce
the
derivatives
computed
in
the
N^2
energy
kernel,
and
compute
all
per-particle
energy
terms.
*
Reduce
the
derivatives
computed
in
the
N^2
energy
kernel,
and
compute
all
per-particle
energy
terms.
*/
*/
__kernel
void
computePerParticleEnergy
(
int
bufferSize,
int
numBuffers,
__global
real4*
restrict
forceBuffers,
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq
__kernel
void
computePerParticleEnergy
(
int
bufferSize,
int
numBuffers,
__global
real4*
restrict
forceBuffers,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
;
mixed
energy
=
0
;
unsigned
int
index
=
get_global_id
(
0
)
;
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
//
Reduce
the
derivatives
//
Reduce
the
derivatives
...
...
platforms/opencl/src/kernels/customHbondForce.cl
View file @
fd473eea
...
@@ -53,11 +53,11 @@ real4 computeCross(real4 vec1, real4 vec2) {
...
@@ -53,11 +53,11 @@ real4 computeCross(real4 vec1, real4 vec2) {
/**
/**
* Compute forces on donors.
* Compute forces on donors.
*/
*/
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
real
* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
mixed
* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
PARAMETER_ARGUMENTS) {
real
energy = 0;
mixed
energy = 0;
real4 f1 = (real4) 0;
real4 f1 = (real4) 0;
real4 f2 = (real4) 0;
real4 f2 = (real4) 0;
real4 f3 = (real4) 0;
real4 f3 = (real4) 0;
...
@@ -142,7 +142,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
...
@@ -142,7 +142,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
/**
/**
* Compute forces on acceptors.
* Compute forces on acceptors.
*/
*/
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global
real
* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global
mixed
* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
PARAMETER_ARGUMENTS) {
...
...
platforms/opencl/src/kernels/customManyParticle.cl
View file @
fd473eea
...
@@ -72,7 +72,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
...
@@ -72,7 +72,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
*
Compute
the
interaction.
*
Compute
the
interaction.
*/
*/
__kernel
void
computeInteraction
(
__kernel
void
computeInteraction
(
__global
long*
restrict
forceBuffers,
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
long*
restrict
forceBuffers,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
neighbors,
__global
const
int*
restrict
neighborStartIndex
,
__global
const
int*
restrict
neighbors,
__global
const
int*
restrict
neighborStartIndex
...
@@ -84,7 +84,7 @@ __kernel void computeInteraction(
...
@@ -84,7 +84,7 @@ __kernel void computeInteraction(
,
__global
int*
restrict
exclusions,
__global
int*
restrict
exclusionStartIndex
,
__global
int*
restrict
exclusions,
__global
int*
restrict
exclusionStartIndex
#
endif
#
endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
.0f
;
mixed
energy
=
0
;
//
Loop
over
particles
to
be
the
first
one
in
the
set.
//
Loop
over
particles
to
be
the
first
one
in
the
set.
...
...
platforms/opencl/src/kernels/customNonbondedGroups.cl
View file @
fd473eea
...
@@ -42,14 +42,14 @@ __kernel void computeInteractionGroups(
...
@@ -42,14 +42,14 @@ __kernel void computeInteractionGroups(
#
else
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
forceBuffers,
#
endif
#
endif
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
; // global warpIndex
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
; // global warpIndex
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
; // index within the warp
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
; // index within the warp
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
; // block warpIndex
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
; // block warpIndex
real
energy
=
0
.0f
;
mixed
energy
=
0
;
__local
AtomData
localData[LOCAL_MEMORY_SIZE]
;
__local
AtomData
localData[LOCAL_MEMORY_SIZE]
;
const
unsigned
int
startTile
=
FIRST_TILE+warp*
(
LAST_TILE-FIRST_TILE
)
/totalWarps
;
const
unsigned
int
startTile
=
FIRST_TILE+warp*
(
LAST_TILE-FIRST_TILE
)
/totalWarps
;
...
...
platforms/opencl/src/kernels/ewald.cl
View file @
fd473eea
...
@@ -6,13 +6,13 @@ real2 multofReal2(real2 a, real2 b) {
...
@@ -6,13 +6,13 @@ real2 multofReal2(real2 a, real2 b) {
*
Precompute
the
cosine
and
sine
sums
which
appear
in
each
force
term.
*
Precompute
the
cosine
and
sine
sums
which
appear
in
each
force
term.
*/
*/
__kernel
void
calculateEwaldCosSinSums
(
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
real2*
restrict
cosSinSum,
real4
reciprocalPeriodicBoxSize,
real
reciprocalCoefficient
)
{
__kernel
void
calculateEwaldCosSinSums
(
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
real2*
restrict
cosSinSum,
real4
reciprocalPeriodicBoxSize,
real
reciprocalCoefficient
)
{
const
unsigned
int
ksizex
=
2*KMAX_X-1
;
const
unsigned
int
ksizex
=
2*KMAX_X-1
;
const
unsigned
int
ksizey
=
2*KMAX_Y-1
;
const
unsigned
int
ksizey
=
2*KMAX_Y-1
;
const
unsigned
int
ksizez
=
2*KMAX_Z-1
;
const
unsigned
int
ksizez
=
2*KMAX_Z-1
;
const
unsigned
int
totalK
=
ksizex*ksizey*ksizez
;
const
unsigned
int
totalK
=
ksizex*ksizey*ksizez
;
unsigned
int
index
=
get_global_id
(
0
)
;
unsigned
int
index
=
get_global_id
(
0
)
;
real
energy
=
0
.0f
;
mixed
energy
=
0
;
while
(
index
<
(
KMAX_Y-1
)
*ksizez+KMAX_Z
)
while
(
index
<
(
KMAX_Y-1
)
*ksizez+KMAX_Z
)
index
+=
get_global_size
(
0
)
;
index
+=
get_global_size
(
0
)
;
while
(
index
<
totalK
)
{
while
(
index
<
totalK
)
{
...
...
platforms/opencl/src/kernels/gbsaObc.cl
View file @
fd473eea
...
@@ -387,7 +387,7 @@ __kernel void computeGBSAForce1(
...
@@ -387,7 +387,7 @@ __kernel void computeGBSAForce1(
#else
#else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif
#endif
__global
real
* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
__global
mixed
* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...
@@ -400,7 +400,7 @@ __kernel void computeGBSAForce1(
...
@@ -400,7 +400,7 @@ __kernel void computeGBSAForce1(
const unsigned int warp = get_global_id(0)/TILE_SIZE;
const unsigned int warp = get_global_id(0)/TILE_SIZE;
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
const unsigned int tbx = get_local_id(0) - tgx;
const unsigned int tbx = get_local_id(0) - tgx;
real
energy = 0
.0f
;
mixed
energy = 0;
__local AtomData2 localData[FORCE_WORK_GROUP_SIZE];
__local AtomData2 localData[FORCE_WORK_GROUP_SIZE];
// First loop: process tiles that contain exclusions.
// First loop: process tiles that contain exclusions.
...
...
platforms/opencl/src/kernels/gbsaObcReductions.cl
View file @
fd473eea
...
@@ -50,8 +50,8 @@ __kernel void reduceBornForce(int bufferSize, int numBuffers, __global real* bor
...
@@ -50,8 +50,8 @@ __kernel void reduceBornForce(int bufferSize, int numBuffers, __global real* bor
#
ifdef
SUPPORTS_64_BIT_ATOMICS
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
const
long*
restrict
bornForceIn,
__global
const
long*
restrict
bornForceIn,
#
endif
#
endif
__global
real
*
restrict
energyBuffer,
__global
const
float2*
restrict
params,
__global
const
real*
restrict
bornRadii,
__global
const
real*
restrict
obcChain
)
{
__global
mixed
*
restrict
energyBuffer,
__global
const
float2*
restrict
params,
__global
const
real*
restrict
bornRadii,
__global
const
real*
restrict
obcChain
)
{
real
energy
=
0
.0f
;
mixed
energy
=
0
;
unsigned
int
index
=
get_global_id
(
0
)
;
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
NUM_ATOMS
)
{
//
Sum
the
Born
force
//
Sum
the
Born
force
...
...
platforms/opencl/src/kernels/gbsaObc_cpu.cl
View file @
fd473eea
...
@@ -409,7 +409,7 @@ __kernel void computeGBSAForce1(
...
@@ -409,7 +409,7 @@ __kernel void computeGBSAForce1(
#else
#else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif
#endif
__global
real
* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
__global
mixed
* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...
@@ -418,7 +418,7 @@ __kernel void computeGBSAForce1(
...
@@ -418,7 +418,7 @@ __kernel void computeGBSAForce1(
unsigned int numTiles,
unsigned int numTiles,
#endif
#endif
__global const ushort2* exclusionTiles) {
__global const ushort2* exclusionTiles) {
real
energy = 0
.0f
;
mixed
energy = 0;
__local AtomData2 localData[TILE_SIZE];
__local AtomData2 localData[TILE_SIZE];
// First loop: process tiles that contain exclusions.
// First loop: process tiles that contain exclusions.
...
...
platforms/opencl/src/kernels/nonbonded.cl
View file @
fd473eea
...
@@ -22,10 +22,10 @@ __kernel void computeNonbonded(
...
@@ -22,10 +22,10 @@ __kernel void computeNonbonded(
#
else
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
forceBuffers,
#
endif
#
endif
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
endif
#
endif
...
@@ -34,11 +34,11 @@ __kernel void computeNonbonded(
...
@@ -34,11 +34,11 @@ __kernel void computeNonbonded(
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
__local
AtomData
localData[FORCE_WORK_GROUP_SIZE]
;
__local
AtomData
localData[FORCE_WORK_GROUP_SIZE]
;
//
First
loop:
process
tiles
that
contain
exclusions.
//
First
loop:
process
tiles
that
contain
exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+warp*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+warp*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
warp+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
warp+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
...
@@ -87,11 +87,13 @@ __kernel void computeNonbonded(
...
@@ -87,11 +87,13 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
real tempEnergy = 0;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy += 0.5f*tempEnergy;
energy += 0.5f*tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
force.xyz -= delta.xyz*dEdR;
force.xyz -= delta.xyz*dEdR;
#else
#else
force.xyz -= dEdR1.xyz;
force.xyz -= dEdR1.xyz;
#endif
#endif
#endif
#ifdef USE_EXCLUSIONS
#ifdef USE_EXCLUSIONS
excl >>= 1;
excl >>= 1;
#endif
#endif
...
@@ -100,7 +102,7 @@ __kernel void computeNonbonded(
...
@@ -100,7 +102,7 @@ __kernel void computeNonbonded(
}
}
else {
else {
// This is an off-diagonal tile.
// This is an off-diagonal tile.
const unsigned int localAtomIndex = get_local_id(0);
const unsigned int localAtomIndex = get_local_id(0);
unsigned int j = y*TILE_SIZE + tgx;
unsigned int j = y*TILE_SIZE + tgx;
real4 tempPosq = posq[j];
real4 tempPosq = posq[j];
...
@@ -126,7 +128,7 @@ __kernel void computeNonbonded(
...
@@ -126,7 +128,7 @@ __kernel void computeNonbonded(
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
PRUNE_BY_CUTOFF
#
ifdef
PRUNE_BY_CUTOFF
if
(
r2
<
CUTOFF
_SQUARED
)
{
if
(
r2
<
MAX_
CUTOFF
*MAX_CUTOFF
)
{
#
endif
#
endif
real
invR
=
RSQRT
(
r2
)
;
real
invR
=
RSQRT
(
r2
)
;
real
r
=
r2*invR
;
real
r
=
r2*invR
;
...
@@ -144,6 +146,7 @@ __kernel void computeNonbonded(
...
@@ -144,6 +146,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
real tempEnergy = 0;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy += tempEnergy;
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
force.xyz -= delta.xyz;
...
@@ -156,6 +159,7 @@ __kernel void computeNonbonded(
...
@@ -156,6 +159,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
#ifdef PRUNE_BY_CUTOFF
}
}
#endif
#endif
...
@@ -169,6 +173,7 @@ __kernel void computeNonbonded(
...
@@ -169,6 +173,7 @@ __kernel void computeNonbonded(
// Write results.
// Write results.
#ifdef INCLUDE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx;
unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (force.x*0x100000000));
atom_add(&forceBuffers[offset], (long) (force.x*0x100000000));
...
@@ -186,6 +191,7 @@ __kernel void computeNonbonded(
...
@@ -186,6 +191,7 @@ __kernel void computeNonbonded(
forceBuffers[offset1].xyz += force.xyz;
forceBuffers[offset1].xyz += force.xyz;
if (x != y)
if (x != y)
forceBuffers[offset2] += (real4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0.0f);
forceBuffers[offset2] += (real4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0.0f);
#endif
#endif
#endif
}
}
...
@@ -213,7 +219,7 @@ __kernel void computeNonbonded(
...
@@ -213,7 +219,7 @@ __kernel void computeNonbonded(
bool includeTile = true;
bool includeTile = true;
// Extract the coordinates of this tile.
// Extract the coordinates of this tile.
int x, y;
int x, y;
bool singlePeriodicCopy = false;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -245,7 +251,7 @@ __kernel void computeNonbonded(
...
@@ -245,7 +251,7 @@ __kernel void computeNonbonded(
}
}
else
else
skipTiles[get_local_id(0)] = end;
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
currentSkipIndex = tbx;
SYNC_WARPS;
SYNC_WARPS;
}
}
...
@@ -300,7 +306,7 @@ __kernel void computeNonbonded(
...
@@ -300,7 +306,7 @@ __kernel void computeNonbonded(
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
...
@@ -318,6 +324,7 @@ __kernel void computeNonbonded(
...
@@ -318,6 +324,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
real tempEnergy = 0;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy += tempEnergy;
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
force.xyz -= delta.xyz;
...
@@ -330,6 +337,7 @@ __kernel void computeNonbonded(
...
@@ -330,6 +337,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
#ifdef PRUNE_BY_CUTOFF
}
}
#endif
#endif
...
@@ -352,7 +360,7 @@ __kernel void computeNonbonded(
...
@@ -352,7 +360,7 @@ __kernel void computeNonbonded(
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
...
@@ -370,6 +378,7 @@ __kernel void computeNonbonded(
...
@@ -370,6 +378,7 @@ __kernel void computeNonbonded(
real
tempEnergy
=
0
;
real
tempEnergy
=
0
;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
#
ifdef
INCLUDE_FORCES
#
ifdef
USE_SYMMETRIC
#
ifdef
USE_SYMMETRIC
delta.xyz
*=
dEdR
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
force.xyz
-=
delta.xyz
;
...
@@ -382,6 +391,7 @@ __kernel void computeNonbonded(
...
@@ -382,6 +391,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy
+=
dEdR2.y
;
localData[tbx+tj].fy
+=
dEdR2.y
;
localData[tbx+tj].fz
+=
dEdR2.z
;
localData[tbx+tj].fz
+=
dEdR2.z
;
#
endif
#
endif
#
endif
#
ifdef
PRUNE_BY_CUTOFF
#
ifdef
PRUNE_BY_CUTOFF
}
}
#
endif
#
endif
...
@@ -392,6 +402,7 @@ __kernel void computeNonbonded(
...
@@ -392,6 +402,7 @@ __kernel void computeNonbonded(
//
Write
results.
//
Write
results.
#
ifdef
INCLUDE_FORCES
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
atom2
=
atomIndices[get_local_id
(
0
)
]
;
unsigned
int
atom2
=
atomIndices[get_local_id
(
0
)
]
;
#
else
#
else
...
@@ -412,9 +423,12 @@ __kernel void computeNonbonded(
...
@@ -412,9 +423,12 @@ __kernel void computeNonbonded(
forceBuffers[offset1].xyz
+=
force.xyz
;
forceBuffers[offset1].xyz
+=
force.xyz
;
if
(
atom2
<
PADDED_NUM_ATOMS
)
if
(
atom2
<
PADDED_NUM_ATOMS
)
forceBuffers[offset2]
+=
(
real4
)
(
localData[get_local_id
(
0
)
].fx,
localData[get_local_id
(
0
)
].fy,
localData[get_local_id
(
0
)
].fz,
0.0f
)
;
forceBuffers[offset2]
+=
(
real4
)
(
localData[get_local_id
(
0
)
].fx,
localData[get_local_id
(
0
)
].fy,
localData[get_local_id
(
0
)
].fz,
0.0f
)
;
#
endif
#
endif
#
endif
}
}
pos++
;
pos++
;
}
}
#
ifdef
INCLUDE_ENERGY
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
#
endif
}
}
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
fd473eea
...
@@ -19,19 +19,19 @@ __kernel void computeNonbonded(
...
@@ -19,19 +19,19 @@ __kernel void computeNonbonded(
#
else
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
forceBuffers,
#
endif
#
endif
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
endif
#
endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
;
mixed
energy
=
0
;
__local
AtomData
localData[TILE_SIZE]
;
__local
AtomData
localData[TILE_SIZE]
;
//
First
loop:
process
tiles
that
contain
exclusions.
//
First
loop:
process
tiles
that
contain
exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+get_group_id
(
0
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+get_group_id
(
0
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
get_group_id
(
0
)
+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
get_group_id
(
0
)
+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
...
@@ -70,7 +70,7 @@ __kernel void computeNonbonded(
...
@@ -70,7 +70,7 @@ __kernel void computeNonbonded(
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF
_SQUARED
)
{
if
(
r2
<
MAX_
CUTOFF
*MAX_CUTOFF
)
{
#
endif
#
endif
real
invR
=
RSQRT
(
r2
)
;
real
invR
=
RSQRT
(
r2
)
;
real
r
=
r2*invR
;
real
r
=
r2*invR
;
...
@@ -138,7 +138,7 @@ __kernel void computeNonbonded(
...
@@ -138,7 +138,7 @@ __kernel void computeNonbonded(
#endif
#endif
real r2 = dot(delta.xyz, delta.xyz);
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
...
@@ -228,18 +228,18 @@ __kernel void computeNonbonded(
...
@@ -228,18 +228,18 @@ __kernel void computeNonbonded(
while (pos < end) {
while (pos < end) {
const bool hasExclusions = false;
const bool hasExclusions = false;
bool includeTile = true;
bool includeTile = true;
// Extract the coordinates of this tile.
// Extract the coordinates of this tile.
int x, y;
int x, y;
bool singlePeriodicCopy = false;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
if (numTiles <= maxTiles) {
x = tiles[pos];
x = tiles[pos];
real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >=
MAX_
CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >=
MAX_
CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >=
MAX_
CUTOFF);
}
}
else
else
#endif
#endif
...
@@ -304,7 +304,7 @@ __kernel void computeNonbonded(
...
@@ -304,7 +304,7 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = dot(delta.xyz, delta.xyz);
real r2 = dot(delta.xyz, delta.xyz);
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
unsigned int atom2 = j;
unsigned int atom2 = j;
...
@@ -367,7 +367,7 @@ __kernel void computeNonbonded(
...
@@ -367,7 +367,7 @@ __kernel void computeNonbonded(
#endif
#endif
real r2 = dot(delta.xyz, delta.xyz);
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
...
...
platforms/opencl/src/kernels/pme.cl
View file @
fd473eea
...
@@ -325,14 +325,14 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
...
@@ -325,14 +325,14 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
}
}
}
}
__kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global
real
* restrict energyBuffer,
__kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global
mixed
* restrict energyBuffer,
__global const real* restrict pmeBsplineModuliX, __global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ,
__global const real* restrict pmeBsplineModuliX, __global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ,
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real
energy = 0;
mixed
energy = 0;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
// real indices
// real indices
int kx = index/(GRID_SIZE_Y*(GRID_SIZE_Z));
int kx = index/(GRID_SIZE_Y*(GRID_SIZE_Z));
...
...
platforms/opencl/staticTarget/CMakeLists.txt
View file @
fd473eea
...
@@ -15,6 +15,6 @@ ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${AP
...
@@ -15,6 +15,6 @@ ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${AP
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
OPENCL_LIBRARIES
}
${
PTHREADS_LIB_STATIC
}
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
OPENCL_LIBRARIES
}
${
PTHREADS_LIB_STATIC
}
)
#-DPTW32_STATIC_LIB only works for the windows pthreads.
#-DPTW32_STATIC_LIB only works for the windows pthreads.
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
COMPILE
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_OPENCL_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB"
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
LINK
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_OPENCL_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB"
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
STATIC_TARGET
}
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
STATIC_TARGET
}
)
Prev
1
…
5
6
7
8
9
10
11
12
13
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment