Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
69cab567
"libraries/lepton/vscode:/vscode.git/clone" did not exist on "a85c24286b32fe4016f6dd74f89ad7c723e298d3"
Commit
69cab567
authored
May 19, 2016
by
peastman
Browse files
Began creating OpenCL implementation of GayBerneForce
parent
9f8910cd
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
832 additions
and
1 deletion
+832
-1
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+62
-0
platforms/opencl/src/OpenCLKernelFactory.cpp
platforms/opencl/src/OpenCLKernelFactory.cpp
+3
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+365
-0
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+1
-0
platforms/opencl/src/kernels/gayBerne.cl
platforms/opencl/src/kernels/gayBerne.cl
+365
-0
platforms/opencl/tests/TestOpenCLGayBerneForce.cpp
platforms/opencl/tests/TestOpenCLGayBerneForce.cpp
+36
-0
No files found.
platforms/opencl/include/OpenCLKernels.h
View file @
69cab567
...
@@ -1074,6 +1074,68 @@ private:
...
@@ -1074,6 +1074,68 @@ private:
cl
::
Kernel
forceKernel
,
blockBoundsKernel
,
neighborsKernel
,
startIndicesKernel
,
copyPairsKernel
;
cl
::
Kernel
forceKernel
,
blockBoundsKernel
,
neighborsKernel
,
startIndicesKernel
,
copyPairsKernel
;
};
};
/**
* This kernel is invoked by GayBerneForce to calculate the forces acting on the system.
*/
class
OpenCLCalcGayBerneForceKernel
:
public
CalcGayBerneForceKernel
{
public:
OpenCLCalcGayBerneForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcGayBerneForceKernel
(
name
,
platform
),
cl
(
cl
),
hasInitializedKernels
(
false
),
sortedParticles
(
NULL
),
axisParticleIndices
(
NULL
),
sigParams
(
NULL
),
epsParams
(
NULL
),
scale
(
NULL
),
aMatrix
(
NULL
),
bMatrix
(
NULL
),
gMatrix
(
NULL
),
exclusions
(
NULL
),
exclusionStartIndex
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedPos
(
NULL
),
torque
(
NULL
)
{
}
~
OpenCLCalcGayBerneForceKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the GayBerneForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
GayBerneForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the GayBerneForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
GayBerneForce
&
force
);
private:
class
ReorderListener
;
void
sortAtoms
();
OpenCLContext
&
cl
;
bool
hasInitializedKernels
;
int
numRealParticles
;
GayBerneForce
::
NonbondedMethod
nonbondedMethod
;
OpenCLArray
*
sortedParticles
;
OpenCLArray
*
axisParticleIndices
;
OpenCLArray
*
sigParams
;
OpenCLArray
*
epsParams
;
OpenCLArray
*
scale
;
OpenCLArray
*
exceptionParticles
;
OpenCLArray
*
exceptionParams
;
OpenCLArray
*
aMatrix
;
OpenCLArray
*
bMatrix
;
OpenCLArray
*
gMatrix
;
OpenCLArray
*
exclusions
;
OpenCLArray
*
exclusionStartIndex
;
OpenCLArray
*
blockCenter
;
OpenCLArray
*
blockBoundingBox
;
OpenCLArray
*
sortedPos
;
OpenCLArray
*
torque
;
std
::
vector
<
bool
>
isRealParticle
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
excludedPairs
;
cl
::
Kernel
framesKernel
,
blockBoundsKernel
,
neighborsKernel
,
forceKernel
;
};
/**
/**
* This kernel is invoked by VerletIntegrator to take one time step.
* This kernel is invoked by VerletIntegrator to take one time step.
*/
*/
...
...
platforms/opencl/src/OpenCLKernelFactory.cpp
View file @
69cab567
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008 Stanford University and the Authors.
*
* Portions copyright (c) 2008
-2016
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -108,6 +108,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
...
@@ -108,6 +108,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return
new
OpenCLCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
return
new
OpenCLCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
return
new
OpenCLCalcCustomManyParticleForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
return
new
OpenCLCalcCustomManyParticleForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcGayBerneForceKernel
::
Name
())
return
new
OpenCLCalcGayBerneForceKernel
(
name
,
platform
,
cl
);
if
(
name
==
IntegrateVerletStepKernel
::
Name
())
if
(
name
==
IntegrateVerletStepKernel
::
Name
())
return
new
OpenCLIntegrateVerletStepKernel
(
name
,
platform
,
cl
);
return
new
OpenCLIntegrateVerletStepKernel
(
name
,
platform
,
cl
);
if
(
name
==
IntegrateLangevinStepKernel
::
Name
())
if
(
name
==
IntegrateLangevinStepKernel
::
Name
())
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
69cab567
...
@@ -5946,6 +5946,371 @@ void OpenCLCalcCustomManyParticleForceKernel::copyParametersToContext(ContextImp
...
@@ -5946,6 +5946,371 @@ void OpenCLCalcCustomManyParticleForceKernel::copyParametersToContext(ContextImp
cl.invalidateMolecules();
cl.invalidateMolecules();
}
}
class OpenCLGayBerneForceInfo : public OpenCLForceInfo {
public:
OpenCLGayBerneForceInfo(int requiredBuffers, const GayBerneForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
int xparticle1, yparticle1;
double sigma1, epsilon1, sx1, sy1, sz1, ex1, ey1, ez1;
int xparticle2, yparticle2;
double sigma2, epsilon2, sx2, sy2, sz2, ex2, ey2, ez2;
force.getParticleParameters(particle1, sigma1, epsilon1, xparticle1, yparticle1, sx1, sy1, sz1, ex1, ey1, ez1);
force.getParticleParameters(particle2, sigma2, epsilon2, xparticle2, yparticle2, sx2, sy2, sz2, ex2, ey2, ez2);
return (sigma1 == sigma2 && epsilon1 == epsilon2 && sx1 == sx2 && sy1 == sy2 && sz1 == sz2 && ex1 == ex2 && ey1 == ey2 && ez1 == ez2);
}
int getNumParticleGroups() {
return force.getNumExceptions()+force.getNumParticles();
}
void getParticlesInGroup(int index, vector<int>& particles) {
if (index < force.getNumExceptions()) {
int particle1, particle2;
double sigma, epsilon;
force.getExceptionParameters(index, particle1, particle2, sigma, epsilon);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
else {
int particle = index-force.getNumExceptions();
int xparticle, yparticle;
double sigma, epsilon, sx, sy, sz, ex, ey, ez;
force.getParticleParameters(particle, sigma, epsilon, xparticle, yparticle, sx, sy, sz, ex, ey, ez);
particles.clear();
particles.push_back(particle);
if (xparticle > -1)
particles.push_back(xparticle);
if (yparticle > -1)
particles.push_back(yparticle);
}
}
bool areGroupsIdentical(int group1, int group2) {
if (group1 < force.getNumExceptions() && group2 < force.getNumExceptions()) {
int particle1, particle2;
double sigma1, sigma2, epsilon1, epsilon2;
force.getExceptionParameters(group1, particle1, particle2, sigma1, epsilon1);
force.getExceptionParameters(group2, particle1, particle2, sigma2, epsilon2);
return (sigma1 == sigma2 && epsilon1 == epsilon2);
}
return true;
}
private:
const GayBerneForce& force;
};
class OpenCLCalcGayBerneForceKernel::ReorderListener : public OpenCLContext::ReorderListener {
public:
ReorderListener(OpenCLCalcGayBerneForceKernel& owner) : owner(owner) {
}
void execute() {
owner.sortAtoms();
}
private:
OpenCLCalcGayBerneForceKernel& owner;
};
OpenCLCalcGayBerneForceKernel::~OpenCLCalcGayBerneForceKernel() {
if (sortedParticles != NULL)
delete sortedParticles;
if (axisParticleIndices != NULL)
delete axisParticleIndices;
if (sigParams != NULL)
delete sigParams;
if (epsParams != NULL)
delete epsParams;
if (scale != NULL)
delete scale;
if (exceptionParticles != NULL)
delete exceptionParticles;
if (exceptionParams != NULL)
delete exceptionParams;
if (aMatrix != NULL)
delete aMatrix;
if (bMatrix != NULL)
delete bMatrix;
if (gMatrix != NULL)
delete gMatrix;
if (exclusions != NULL)
delete exclusions;
if (exclusionStartIndex != NULL)
delete exclusionStartIndex;
if (blockCenter != NULL)
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (sortedPos != NULL)
delete sortedPos;
if (torque != NULL)
delete torque;
}
void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBerneForce& force) {
// Initialize interactions.
int numParticles = force.getNumParticles();
sigParams = OpenCLArray::create<mm_float4>(cl, cl.getPaddedNumAtoms(), "sigParams");
epsParams = OpenCLArray::create<mm_float2>(cl, cl.getPaddedNumAtoms(), "epsParams");
scale = OpenCLArray::create<mm_float4>(cl, cl.getPaddedNumAtoms(), "scale");
axisParticleIndices = OpenCLArray::create<mm_int2>(cl, cl.getPaddedNumAtoms(), "axisParticleIndices");
sortedParticles = OpenCLArray::create<cl_int>(cl, cl.getPaddedNumAtoms(), "sortedParticles");
aMatrix = OpenCLArray::create<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "aMatrix");
bMatrix = OpenCLArray::create<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "bMatrix");
gMatrix = OpenCLArray::create<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "gMatrix");
vector<mm_float4> sigParamsVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_float2> epsParamsVector(cl.getPaddedNumAtoms(), mm_float2(0, 0));
vector<mm_float4> scaleVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_int2> axisParticleVector(cl.getPaddedNumAtoms(), mm_int2(0, 0));
isRealParticle.resize(cl.getPaddedNumAtoms());
vector<vector<int> > exclusionList(numParticles);
numRealParticles = 0;
for (int i = 0; i < numParticles; i++) {
int xparticle, yparticle;
double sigma, epsilon, sx, sy, sz, ex, ey, ez;
force.getParticleParameters(i, sigma, epsilon, xparticle, yparticle, sx, sy, sz, ex, ey, ez);
axisParticleVector[i] = mm_int2(xparticle, yparticle);
sigParamsVector[i] = mm_float4((float) (0.5*sigma), (float) (0.25*sx*sx), (float) (0.25*sy*sy), (float) (0.25*sz*sz));
epsParamsVector[i] = mm_float2((float) sqrt(epsilon), (float) (0.125*(sx*sy + sz*sz)*sqrt(sx*sy)));
scaleVector[i] = mm_float4((float) (1/sqrt(ex)), (float) (1/sqrt(ey)), (float) (1/sqrt(ez)), 0);
if (epsilon != 0.0) {
numRealParticles++;
isRealParticle[i] = true;
}
else
isRealParticle[i] = false;
exclusionList[i].push_back(i);
}
sigParams->upload(sigParamsVector);
epsParams->upload(epsParamsVector);
scale->upload(scaleVector);
axisParticleIndices->upload(axisParticleVector);
// Create data structures used for the neighbor list.
int numAtomBlocks = (numRealParticles+31)/32;
int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
blockCenter = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos");
// Create arrays for accumulating torques.
if (cl.getSupports64BitGlobalAtomics())
torque = OpenCLArray::create<cl_long>(cl, 3*cl.getPaddedNumAtoms(), "torque");
else
torque = new OpenCLArray(cl, cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), elementSize, "torque");
cl.addAutoclearBuffer(*torque);
// Create the kernels.
nonbondedMethod = force.getNonbondedMethod();
bool useCutoff = (nonbondedMethod != GayBerneForce::NoCutoff);
bool usePeriodic = (nonbondedMethod == GayBerneForce::CutoffPeriodic);
map<string, string> defines;
defines["USE_SWITCH"] = (useCutoff && force.getUseSwitchingFunction() ? "1" : "0");
if (useCutoff) {
// Compute the switching coefficients.
if (force.getUseSwitchingFunction()) {
defines["SWITCH_CUTOFF"] = cl.doubleToString(force.getSwitchingDistance());
defines["SWITCH_C3"] = cl.doubleToString(10/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 3.0));
defines["SWITCH_C4"] = cl.doubleToString(15/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 4.0));
defines["SWITCH_C5"] = cl.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0));
}
}
if (!cl.getSupports64BitGlobalAtomics()) {
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
}
cl::Program program = cl.createProgram(OpenCLKernelSources::gayBerne, defines);
framesKernel = cl::Kernel(program, "computeEllipsoidFrames");
blockBoundsKernel = cl::Kernel(program, "findBlockBounds");
neighborsKernel = cl::Kernel(program, "findNeighbors");
forceKernel = cl::Kernel(program, "computeForce");
// Add the interaction to the default nonbonded kernel.
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
// cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigParams", "float", 4, sizeof(cl_float4), sigParams->getDeviceBuffer()));
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("epsParams", "float", 2, sizeof(cl_float2), epsParams->getDeviceBuffer()));
// Record exceptions and exclusions.
vector<int> exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, sigma, epsilon);
if (isRealParticle[particle1] && isRealParticle[particle2])
excludedPairs.push_back(pair<int, int>(particle1, particle2));
if (epsilon != 0.0)
exceptions.push_back(i);
}
exclusions = OpenCLArray::create<cl_int>(cl, max(1, (int) excludedPairs.size()), "exclusions");
exclusionStartIndex = OpenCLArray::create<cl_int>(cl, numRealParticles+1, "exclusionStartIndex");
// Initialize the exceptions.
int numExceptions = exceptions.size();
if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams = OpenCLArray::create<mm_float2>(cl, numExceptions, "exceptionParams");
vector<mm_float2> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double sigma, epsilon;
force.getExceptionParameters(exceptions[i], atoms[i][0], atoms[i][1], sigma, epsilon);
exceptionParamsVector[i] = mm_float2((float) sigma, (float) (4.0*epsilon));
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
exceptionParams->upload(exceptionParamsVector);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exceptionParams->getDeviceBuffer(), "float2");
// cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
cl.addForce(new OpenCLGayBerneForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
cl.addReorderListener(new ReorderListener(*this));
}
double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (!hasInitializedKernels) {
hasInitializedKernels = true;
sortAtoms();
framesKernel.setArg<cl_int>(0, numRealParticles);
framesKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(2, axisParticleIndices->getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(3, sigParams->getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(4, scale->getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(5, aMatrix->getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(6, bMatrix->getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(7, gMatrix->getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(8, sortedParticles->getDeviceBuffer());
blockBoundsKernel.setArg<cl_int>(0, numRealParticles);
blockBoundsKernel.setArg<cl::Buffer>(6, sortedParticles->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(7, cl.getPosq().getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(8, sortedPos->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(9, blockCenter->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(10, blockBoundingBox->getDeviceBuffer());
bool useLong = cl.getSupports64BitGlobalAtomics();
int index = 0;
forceKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
forceKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
forceKernel.setArg<cl_int>(index++, numRealParticles);
forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedPos->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sigParams->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, epsParams->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedParticles->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, aMatrix->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, bMatrix->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, gMatrix->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusions->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer());
}
cl.executeKernel(framesKernel, cl.getNumAtoms());
setPeriodicBoxArgs(cl, blockBoundsKernel, 1);
cl.executeKernel(blockBoundsKernel, (numRealParticles+31)/32);
if (nonbondedMethod != GayBerneForce::NoCutoff) {
}
cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize());
return 0.0;
}
void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context, const GayBerneForce& force) {
// Make sure the new parameters are acceptable.
if (force.getNumParticles() != cl.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
vector<int> exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, sigma, epsilon);
if (exceptionAtoms.size() > exceptions.size() && make_pair(particle1, particle2) == exceptionAtoms[exceptions.size()])
exceptions.push_back(i);
else if (epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
}
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cl.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
// Record the per-particle parameters.
vector<mm_float4> sigParamsVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_float2> epsParamsVector(cl.getPaddedNumAtoms(), mm_float2(0, 0));
vector<mm_float4> scaleVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
numRealParticles = 0;
for (int i = 0; i < force.getNumParticles(); i++) {
int xparticle, yparticle;
double sigma, epsilon, sx, sy, sz, ex, ey, ez;
force.getParticleParameters(i, sigma, epsilon, xparticle, yparticle, sx, sy, sz, ex, ey, ez);
sigParamsVector[i] = mm_float4((float) (0.5*sigma), (float) (0.5*sx), (float) (0.5*sy), (float) (0.5*sz));
epsParamsVector[i] = mm_float2((float) sqrt(epsilon), (float) (0.125*(sx*sy + sz*sz)*sqrt(sx*sy)));
scaleVector[i] = mm_float4((float) (1/sqrt(ex)), (float) (1/sqrt(ey)), (float) (1/sqrt(ez)), 0);
if (epsilon != 0.0) {
numRealParticles++;
isRealParticle[i] = true;
}
else
isRealParticle[i] = false;
}
sigParams->upload(sigParamsVector);
epsParams->upload(epsParamsVector);
scale->upload(scaleVector);
// Record the exceptions.
if (numExceptions > 0) {
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<mm_float2> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], sigma, epsilon);
exceptionParamsVector[i] = mm_float2((float) sigma, (float) (4.0*epsilon));
}
exceptionParams->upload(exceptionParamsVector);
}
cl.invalidateMolecules();
sortAtoms();
}
void OpenCLCalcGayBerneForceKernel::sortAtoms() {
// Sort the list of atoms by type to avoid thread divergence. This is executed every time
// the atoms are reordered.
int nextIndex = 0;
vector<cl_int> particles(cl.getPaddedNumAtoms(), 0);
const vector<int>& order = cl.getAtomIndex();
for (int i = 0; i < cl.getNumAtoms(); i++) {
int atom = order[i];
if (isRealParticle[atom])
particles[nextIndex++] = atom;
}
sortedParticles->upload(particles);
// Rebuild the list of exclusions.
vector<vector<int> > excludedAtoms(numRealParticles);
for (int i = 0; i < excludedPairs.size(); i++) {
int first = min(excludedPairs[i].first, excludedPairs[i].second);
int second = max(excludedPairs[i].first, excludedPairs[i].second);
excludedAtoms[first].push_back(second);
}
int index = 0;
vector<int> exclusionVec(exclusions->getSize());
vector<int> startIndexVec(exclusionStartIndex->getSize());
for (int i = 0; i < numRealParticles; i++) {
startIndexVec[i] = index;
for (int j = 0; j < excludedAtoms[i].size(); j++)
exclusionVec[index++] = excludedAtoms[i][j];
}
startIndexVec[numRealParticles] = index;
exclusions->upload(exclusionVec);
exclusionStartIndex->upload(startIndexVec);
}
OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() {
OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() {
}
}
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
69cab567
...
@@ -83,6 +83,7 @@ OpenCLPlatform::OpenCLPlatform() {
...
@@ -83,6 +83,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGayBerneForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateBrownianStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateBrownianStepKernel
::
Name
(),
factory
);
...
...
platforms/opencl/src/kernels/gayBerne.cl
0 → 100644
View file @
69cab567
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics
:
enable
#
define
TILE_SIZE
32
/**
*
Calculate
the
ellipsoid
coordinate
frames
and
associated
matrices.
*/
__kernel
void
computeEllipsoidFrames
(
int
numParticles,
__global
const
real4*
restrict
posq,
__global
int2*
const
restrict
axisParticleIndices,
__global
const
float4*
restrict
sigParams,
__global
const
float4*
restrict
scale,
__global
real*
restrict
aMatrix,
__global
real*
restrict
bMatrix,
__global
real*
restrict
gMatrix,
__global
const
int*
sortedParticles
)
{
for
(
int
sortedIndex
=
get_global_id
(
0
)
; sortedIndex < numParticles; sortedIndex += get_global_size(0)) {
//
Compute
the
local
coordinate
system
of
the
ellipsoid
;
int
originalIndex
=
sortedParticles[sortedIndex]
;
real3
pos
=
posq[originalIndex].xyz
;
int2
axisParticles
=
axisParticleIndices[originalIndex]
;
real3
xdir,
ydir,
zdir
;
if
(
axisParticles.x
==
-1
)
{
xdir
=
(
real3
)
(
1
,
0
,
0
)
;
ydir
=
(
real3
)
(
0
,
1
,
0
)
;
}
else
{
xdir
=
pos-posq[axisParticles.x].xyz
;
xdir
=
normalize
(
xdir
)
;
if
(
axisParticles.y
==
-1
)
{
if
(
xdir.y
>
-0.5f
&&
xdir.y
<
0.5f
)
ydir
=
(
real3
)
(
0
,
1
,
0
)
;
else
ydir
=
(
real3
)
(
1
,
0
,
0
)
;
}
else
ydir
=
pos-posq[axisParticles.y].xyz
;
ydir
-=
xdir*dot
(
xdir,
ydir
)
;
ydir
=
normalize
(
ydir
)
;
}
zdir
=
cross
(
xdir,
ydir
)
;
//
Compute
matrices
we
will
need
later.
__global
real
(
*a
)
[3]
=
(
__global
real
(
*
)
[3]
)
(
aMatrix+sortedIndex*9
)
;
__global
real
(
*b
)
[3]
=
(
__global
real
(
*
)
[3]
)
(
bMatrix+sortedIndex*9
)
;
__global
real
(
*g
)
[3]
=
(
__global
real
(
*
)
[3]
)
(
gMatrix+sortedIndex*9
)
;
a[0][0]
=
xdir.x
;
a[0][1]
=
xdir.y
;
a[0][2]
=
xdir.z
;
a[1][0]
=
ydir.x
;
a[1][1]
=
ydir.y
;
a[1][2]
=
ydir.z
;
a[2][0]
=
zdir.x
;
a[2][1]
=
zdir.y
;
a[2][2]
=
zdir.z
;
float4
sig
=
sigParams[originalIndex]
;
float3
r2
=
sig.yzw
;
float3
e2
=
scale[originalIndex].xyz
;
for
(
int
i
=
0
; i < 3; i++)
for
(
int
j
=
0
; j < 3; j++) {
real
belem
=
0
;
real
gelem
=
0
;
for
(
int
k
=
0
; k < 3; k++) {
belem
+=
a[k][i]*e2[k]*a[k][j]
;
gelem
+=
a[k][i]*r2[k]*a[k][j]
;
}
b[i][j]
=
belem
;
g[i][j]
=
gelem
;
}
}
}
/**
*
Find
a
bounding
box
for
the
atoms
in
each
block.
*/
__kernel
void
findBlockBounds
(
int
numAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
const
int*
sortedAtoms,
__global
const
real4*
restrict
posq,
__global
real4*
restrict
sortedPos,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox
)
{
int
index
=
get_global_id
(
0
)
;
int
base
=
index*TILE_SIZE
;
while
(
base
<
numAtoms
)
{
real4
pos
=
posq[sortedAtoms[base]]
;
sortedPos[base]
=
pos
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_POS
(
pos
)
#
endif
real4
minPos
=
pos
;
real4
maxPos
=
pos
;
int
last
=
min
(
base+TILE_SIZE,
numAtoms
)
;
for
(
int
i
=
base+1
; i < last; i++) {
pos
=
posq[sortedAtoms[i]]
;
sortedPos[i]
=
pos
;
#
ifdef
USE_PERIODIC
real4
center
=
0.5f*
(
maxPos+minPos
)
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
pos,
center
)
#
endif
minPos
=
min
(
minPos,
pos
)
;
maxPos
=
max
(
maxPos,
pos
)
;
}
real4
blockSize
=
0.5f*
(
maxPos-minPos
)
;
blockBoundingBox[index]
=
blockSize
;
blockCenter[index]
=
0.5f*
(
maxPos+minPos
)
;
index
+=
get_global_size
(
0
)
;
base
=
index*TILE_SIZE
;
}
}
/**
*
Build
a
list
of
neighbors
for
each
atom.
*/
__kernel
void
findNeighbors
(
int
numAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
const
int*
sortedAtoms,
__global
const
real4*
restrict
posq,
__global
real4*
restrict
sortedPos,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox
)
{
}
typedef
struct
{
float4
sig
;
float2
eps
;
real3
pos
;
real
a[3][3],
b[3][3],
g[3][3]
;
}
AtomData
;
void
loadAtomData
(
AtomData*
data,
int
sortedIndex,
int
originalIndex,
__global
const
real4*
restrict
pos,
__global
const
float4*
restrict
sigParams,
__global
const
float2*
restrict
epsParams,
__global
const
real*
restrict
aMatrix,
__global
const
real*
restrict
bMatrix,
__global
const
real*
restrict
gMatrix
)
{
data->sig
=
sigParams[originalIndex]
;
data->eps
=
epsParams[originalIndex]
;
data->pos
=
pos[sortedIndex].xyz
;
for
(
int
i
=
0
; i < 3; i++)
for
(
int
j
=
0
; j < 3; j++) {
int
k
=
9*sortedIndex+3*i+j
;
data->a[i][j]
=
aMatrix[k]
;
data->b[i][j]
=
bMatrix[k]
;
data->g[i][j]
=
gMatrix[k]
;
}
}
real3
matrixVectorProduct
(
real
(
*m
)
[3],
real3
v
)
{
return
(
real3
)
(
m[0][0]*v[0]
+
m[0][1]*v[1]
+
m[0][2]*v[2],
m[1][0]*v[0]
+
m[1][1]*v[1]
+
m[1][2]*v[2],
m[2][0]*v[0]
+
m[2][1]*v[1]
+
m[2][2]*v[2]
)
;
}
real3
vectorMatrixProduct
(
real3
v,
real
(
*m
)
[3]
)
{
return
(
real3
)
(
m[0][0]*v[0]
+
m[1][0]*v[1]
+
m[2][0]*v[2],
m[0][1]*v[0]
+
m[1][1]*v[1]
+
m[2][1]*v[2],
m[0][2]*v[0]
+
m[1][2]*v[1]
+
m[2][2]*v[2]
)
;
}
void
matrixSum
(
real
(
*result
)
[3],
real
(
*a
)
[3],
real
(
*b
)
[3]
)
{
result[0][0]
=
a[0][0]+b[0][0]
;
result[0][1]
=
a[0][1]+b[0][1]
;
result[0][2]
=
a[0][2]+b[0][2]
;
result[1][0]
=
a[1][0]+b[1][0]
;
result[1][1]
=
a[1][1]+b[1][1]
;
result[1][2]
=
a[1][2]+b[1][2]
;
result[2][0]
=
a[2][0]+b[2][0]
;
result[2][1]
=
a[2][1]+b[2][1]
;
result[2][2]
=
a[2][2]+b[2][2]
;
}
real
determinant
(
real
(
*m
)
[3]
)
{
return
(
m[0][0]*m[1][1]*m[2][2]
+
m[0][1]*m[1][2]*m[2][0]
+
m[0][2]*m[1][0]*m[2][1]
-
m[0][0]*m[1][2]*m[2][1]
-
m[0][1]*m[1][0]*m[2][2]
-
m[0][2]*m[1][1]*m[2][0]
)
;
}
void
matrixInverse
(
real
(
*result
)
[3],
real
(
*m
)
[3]
)
{
real
invDet
=
RECIP
(
determinant
(
m
))
;
result[0][0]
=
invDet*
(
m[1][1]*m[2][2]
-
m[1][2]*m[2][1]
)
;
result[1][0]
=
-invDet*
(
m[1][0]*m[2][2]
-
m[1][2]*m[2][0]
)
;
result[2][0]
=
invDet*
(
m[1][0]*m[2][1]
-
m[1][1]*m[2][0]
)
;
result[0][1]
=
-invDet*
(
m[0][1]*m[2][2]
-
m[0][2]*m[2][1]
)
;
result[1][1]
=
invDet*
(
m[0][0]*m[2][2]
-
m[0][2]*m[2][0]
)
;
result[2][1]
=
-invDet*
(
m[0][0]*m[2][1]
-
m[0][1]*m[2][0]
)
;
result[0][2]
=
invDet*
(
m[0][1]*m[1][2]
-
m[0][2]*m[1][1]
)
;
result[1][2]
=
-invDet*
(
m[0][0]*m[1][2]
-
m[0][2]*m[1][0]
)
;
result[2][2]
=
invDet*
(
m[0][0]*m[1][1]
-
m[0][1]*m[1][0]
)
;
}
void
computeOneInteraction
(
AtomData*
data1,
AtomData*
data2,
real
sigma,
real
epsilon,
real3
dr,
real
r2,
real3*
force1,
real3*
force2,
real3*
torque1,
real3*
torque2,
real
*totalEnergy
)
{
real
rInv
=
RSQRT
(
r2
)
;
real
r
=
r2*rInv
;
real3
drUnit
=
dr*rInv
;
//
Compute
the
switching
function.
real
switchValue
=
1
,
switchDeriv
=
0
;
#
if
USE_LJ_SWITCH
if
(
r
>
LJ_SWITCH_CUTOFF
)
{
real
x
=
r-LJ_SWITCH_CUTOFF
;
switchValue
=
1+x*x*x*
(
LJ_SWITCH_C3+x*
(
LJ_SWITCH_C4+x*LJ_SWITCH_C5
))
;
switchDeriv
=
x*x*
(
3*LJ_SWITCH_C3+x*
(
4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5
))
;
}
#
endif
//
Compute
vectors
and
matrices
we
'll
be
needing.
real
B12[3][3],
G12[3][3],
B12inv[3][3],
G12inv[3][3]
;
matrixSum
(
B12,
data1->b,
data2->b
)
;
matrixSum
(
G12,
data1->g,
data2->g
)
;
matrixInverse
(
B12inv,
B12
)
;
matrixInverse
(
G12inv,
G12
)
;
real
detG12
=
determinant
(
G12
)
;
//
Estimate
the
distance
between
the
ellipsoids
and
compute
the
first
terms
needed
for
the
energy.
real
sigma12
=
1/SQRT
(
0.5f*dot
(
drUnit,
matrixVectorProduct
(
G12inv,
drUnit
)))
;
real
h12
=
r
-
sigma12
;
real
rho
=
sigma/
(
h12+sigma
)
;
real
rho2
=
rho*rho
;
real
rho6
=
rho2*rho2*rho2
;
real
u
=
4*epsilon*
(
rho6*rho6-rho6
)
;
real
eta
=
SQRT
(
2*data1->eps.y*data2->eps.y/detG12
)
;
real
chi
=
2*dot
(
drUnit,
matrixVectorProduct
(
B12inv,
drUnit
))
;
chi
*=
chi
;
real
energy
=
u*eta*chi
;
//
Compute
the
terms
needed
for
the
force.
real3
kappa
=
matrixVectorProduct
(
G12inv,
dr
)
;
real3
iota
=
matrixVectorProduct
(
B12inv,
dr
)
;
real
rInv2
=
rInv*rInv
;
real
dUSLJdr
=
24*epsilon*
(
2*rho6-1
)
*rho6*
rho/sigma
;
real
temp
=
0.5f*sigma12*sigma12*sigma12*rInv2
;
real3
dudr
=
(
drUnit
+
(
kappa-drUnit*dot
(
kappa,
drUnit
))
*temp
)
*dUSLJdr
;
real3
dchidr
=
(
iota-drUnit*dot
(
iota,
drUnit
))
*
(
-8*rInv2*SQRT
(
chi
))
;
real3
force
=
(
dchidr*u
+
dudr*chi
)
*
(
eta*switchValue
)
-
drUnit*
(
energy*switchDeriv
)
;
*force1
+=
force
;
*force2
-=
force
;
//
Compute
the
terms
needed
for
the
torque.
for
(
int
j
=
0
; j < 2; j++) {
real
(
*a
)
[3]
=
(
j
==
0
?
data1->a
:
data2->a
)
;
real
(
*b
)
[3]
=
(
j
==
0
?
data1->b
:
data2->b
)
;
real
(
*g
)
[3]
=
(
j
==
0
?
data1->g
:
data2->g
)
;
float4
sig
=
(
j
==
0
?
data1->sig
:
data2->sig
)
;
real3
dudq
=
cross
(
vectorMatrixProduct
(
kappa,
g
)
,
kappa*
(
temp*dUSLJdr
))
;
real3
dchidq
=
cross
(
vectorMatrixProduct
(
iota,
b
)
,
iota
)
*
(
-4*rInv2
)
;
real3
scale
=
(
real3
)
(
sig.y,
sig.z,
sig.w
)
*
(
-0.5f*eta/detG12
)
;
real
d[3][3]
;
d[0][0]
=
scale[0]*
(
2*a[0][0]*
(
G12[1][1]*G12[2][2]
-
G12[1][2]*G12[2][1]
)
+
a[0][2]*
(
G12[1][2]*G12[0][1]
+
G12[1][0]*G12[2][1]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
))
+
a[0][1]*
(
G12[0][2]*G12[2][1]
+
G12[2][0]*G12[1][2]
-
G12[2][2]*
(
G12[0][1]
+
G12[1][0]
)))
;
d[0][1]
=
scale[0]*
(
a[0][0]*
(
G12[0][2]*G12[2][1]
+
G12[2][0]*G12[1][2]
-
G12[2][2]*
(
G12[0][1]
+
G12[1][0]
))
+
2*a[0][1]*
(
G12[0][0]*G12[2][2]
-
G12[2][0]*G12[0][2]
)
+
a[0][2]*
(
G12[1][0]*G12[0][2]
+
G12[2][0]*G12[0][1]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
)))
;
d[0][2]
=
scale[0]*
(
a[0][0]*
(
G12[0][1]*G12[1][2]
+
G12[1][0]*G12[2][1]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
))
+
a[0][1]*
(
G12[1][0]*G12[0][2]
+
G12[2][0]*G12[0][1]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
))
+
2*a[0][2]*
(
G12[1][1]*G12[0][0]
-
G12[1][0]*G12[0][1]
))
;
d[1][0]
=
scale[1]*
(
2*a[1][0]*
(
G12[1][1]*G12[2][2]
-
G12[1][2]*G12[2][1]
)
+
a[1][1]*
(
G12[0][2]*G12[2][1]
+
G12[2][0]*G12[1][2]
-
G12[2][2]*
(
G12[0][1]
+
G12[1][0]
))
+
a[1][2]*
(
G12[1][2]*G12[0][1]
+
G12[1][0]*G12[2][1]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
)))
;
d[1][1]
=
scale[1]*
(
a[1][0]*
(
G12[0][2]*G12[2][1]
+
G12[2][0]*G12[1][2]
-
G12[2][2]*
(
G12[0][1]
+
G12[1][0]
))
+
2*a[1][1]*
(
G12[2][2]*G12[0][0]
-
G12[2][0]*G12[0][2]
)
+
a[1][2]*
(
G12[1][0]*G12[0][2]
+
G12[0][1]*G12[2][0]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
)))
;
d[1][2]
=
scale[1]*
(
a[1][0]*
(
G12[0][1]*G12[1][2]
+
G12[1][0]*G12[2][1]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
))
+
a[1][1]*
(
G12[1][0]*G12[0][2]
+
G12[0][1]*G12[2][0]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
))
+
2*a[1][2]*
(
G12[1][1]*G12[0][0]
-
G12[1][0]*G12[0][1]
))
;
d[2][0]
=
scale[2]*
(
2*a[2][0]*
(
G12[1][1]*G12[2][2]
-
G12[2][1]*G12[1][2]
)
+
a[2][1]*
(
G12[0][2]*G12[2][1]
+
G12[1][2]*G12[2][0]
-
G12[2][2]*
(
G12[0][1]
+
G12[1][0]
))
+
a[2][2]*
(
G12[0][1]*G12[1][2]
+
G12[2][1]*G12[1][0]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
)))
;
d[2][1]
=
scale[2]*
(
a[2][0]*
(
G12[0][2]*G12[2][1]
+
G12[1][2]*G12[2][0]
-
G12[2][2]*
(
G12[0][1]
+
G12[1][0]
))
+
2*a[2][1]*
(
G12[0][0]*G12[2][2]
-
G12[0][2]*G12[2][0]
)
+
a[2][2]*
(
G12[1][0]*G12[0][2]
+
G12[0][1]*G12[2][0]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
)))
;
d[2][2]
=
scale[2]*
(
a[2][0]*
(
G12[0][1]*G12[1][2]
+
G12[2][1]*G12[1][0]
-
G12[1][1]*
(
G12[0][2]
+
G12[2][0]
))
+
a[2][1]*
(
G12[1][0]*G12[0][2]
+
G12[2][0]*G12[0][1]
-
G12[0][0]*
(
G12[1][2]
+
G12[2][1]
))
+
2*a[2][2]*
(
G12[1][1]*G12[0][0]
-
G12[1][0]*G12[0][1]
))
;
real3
detadq
;
for
(
int
i
=
0
; i < 3; i++)
detadq
+=
cross
((
real3
)
(
a[i][0],
a[i][1],
a[i][2]
)
,
(
real3
)
(
d[i][0],
d[i][1],
d[i][2]
))
;
real3
torque
=
(
dchidq*
(
u*eta
)
+
detadq*
(
u*chi
)
+
dudq*
(
eta*chi
))
*switchValue
;
*
(
j
==
0
?
torque1
:
torque2
)
-=
torque
;
}
*totalEnergy
+=
switchValue*energy
;
}
/**
*
Compute
the
interactions.
*/
__kernel
void
computeForce
(
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
restrict
forceBuffers,
__global
long*
restrict
torqueBuffers,
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
torqueBuffers,
#
endif
int
numAtoms,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
pos,
__global
const
float4*
restrict
sigParams,
__global
const
float2*
restrict
epsParams,
__global
const
int*
restrict
sortedAtoms,
__global
const
real*
restrict
aMatrix,
__global
const
real*
restrict
bMatrix,
__global
const
real*
restrict
gMatrix,
__global
const
int*
restrict
exclusions,
__global
const
int*
restrict
exclusionStartIndex
#
ifdef
USE_CUTOFF
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms,
#
endif
)
{
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
mixed
energy
=
0
;
for
(
int
atom1
=
get_global_id
(
0
)
; atom1 < numAtoms; atom1 += get_global_size(0)) {
//
Load
parameters
for
atom1.
int
index1
=
sortedAtoms[atom1]
;
AtomData
data1
;
loadAtomData
(
&data1,
atom1,
index1,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force1
=
0.0f
;
real3
torque1
=
0.0f
;
int
nextExclusion
=
exclusionStartIndex[atom1]
;
int
lastExclusion
=
exclusionStartIndex[atom1+1]
;
for
(
int
atom2
=
atom1+1
; atom2 < numAtoms; atom2++) {
//
Skip
over
excluded
interactions.
if
(
nextExclusion
<
lastExclusion
&&
exclusions[nextExclusion]
==
atom2
)
{
nextExclusion++
;
continue
;
}
//
Load
parameters
for
atom2.
int
index2
=
sortedAtoms[atom2]
;
AtomData
data2
;
loadAtomData
(
&data2,
atom2,
index2,
pos,
sigParams,
epsParams,
aMatrix,
bMatrix,
gMatrix
)
;
real3
force2
=
0.0f
;
real3
torque2
=
0.0f
;
//
Compute
the
interaction.
real3
delta
=
data1.pos-data2.pos
;
#
ifdef
USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
real
sigma
=
data1.sig.x+data2.sig.x
;
real
epsilon
=
data1.eps.x*data2.eps.x
;
computeOneInteraction
(
&data1,
&data2,
sigma,
epsilon,
delta,
r2,
&force1,
&force2,
&torque1,
&torque2,
&energy
)
;
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index2],
(
long
)
(
force2.x*0x100000000
))
;
atom_add
(
&forceBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
force2.y*0x100000000
))
;
atom_add
(
&forceBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
force2.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index2],
(
long
)
(
torque2.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+PADDED_NUM_ATOMS],
(
long
)
(
torque2.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index2+2*PADDED_NUM_ATOMS],
(
long
)
(
torque2.z*0x100000000
))
;
#
else
unsigned
int
offset
=
index2
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force2.xyz
;
torqueBuffers[offset].xyz
+=
torque2.xyz
;
#
endif
#
ifdef
USE_CUTOFF
}
#
endif
}
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[index1],
(
long
)
(
force1.x*0x100000000
))
;
atom_add
(
&forceBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
force1.y*0x100000000
))
;
atom_add
(
&forceBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
force1.z*0x100000000
))
;
atom_add
(
&torqueBuffers[index1],
(
long
)
(
torque1.x*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+PADDED_NUM_ATOMS],
(
long
)
(
torque1.y*0x100000000
))
;
atom_add
(
&torqueBuffers[index1+2*PADDED_NUM_ATOMS],
(
long
)
(
torque1.z*0x100000000
))
;
#
else
unsigned
int
offset
=
index1
+
warp*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
+=
force1.xyz
;
torqueBuffers[offset].xyz
+=
torque1.xyz
;
#
endif
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
}
platforms/opencl/tests/TestOpenCLGayBerneForce.cpp
0 → 100644
View file @
69cab567
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLTests.h"
#include "TestGayBerneForce.h"
void
runPlatformTests
()
{
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment