Commit 69cab567 authored by peastman's avatar peastman
Browse files

Began creating OpenCL implementation of GayBerneForce

parent 9f8910cd
......@@ -1074,6 +1074,68 @@ private:
cl::Kernel forceKernel, blockBoundsKernel, neighborsKernel, startIndicesKernel, copyPairsKernel;
};
/**
* This kernel is invoked by GayBerneForce to calculate the forces acting on the system.
*/
class OpenCLCalcGayBerneForceKernel : public CalcGayBerneForceKernel {
public:
OpenCLCalcGayBerneForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGayBerneForceKernel(name, platform), cl(cl),
hasInitializedKernels(false), sortedParticles(NULL), axisParticleIndices(NULL), sigParams(NULL), epsParams(NULL), scale(NULL), aMatrix(NULL),
bMatrix(NULL), gMatrix(NULL), exclusions(NULL), exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedPos(NULL),
torque(NULL) {
}
~OpenCLCalcGayBerneForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the GayBerneForce this kernel will be used for
*/
void initialize(const System& system, const GayBerneForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the GayBerneForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const GayBerneForce& force);
private:
class ReorderListener;
void sortAtoms();
OpenCLContext& cl;
bool hasInitializedKernels;
int numRealParticles;
GayBerneForce::NonbondedMethod nonbondedMethod;
OpenCLArray* sortedParticles;
OpenCLArray* axisParticleIndices;
OpenCLArray* sigParams;
OpenCLArray* epsParams;
OpenCLArray* scale;
OpenCLArray* exceptionParticles;
OpenCLArray* exceptionParams;
OpenCLArray* aMatrix;
OpenCLArray* bMatrix;
OpenCLArray* gMatrix;
OpenCLArray* exclusions;
OpenCLArray* exclusionStartIndex;
OpenCLArray* blockCenter;
OpenCLArray* blockBoundingBox;
OpenCLArray* sortedPos;
OpenCLArray* torque;
std::vector<bool> isRealParticle;
std::vector<std::pair<int, int> > exceptionAtoms;
std::vector<std::pair<int, int> > excludedPairs;
cl::Kernel framesKernel, blockBoundsKernel, neighborsKernel, forceKernel;
};
/**
* This kernel is invoked by VerletIntegrator to take one time step.
*/
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -108,6 +108,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return new OpenCLCalcCustomCompoundBondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomManyParticleForceKernel::Name())
return new OpenCLCalcCustomManyParticleForceKernel(name, platform, cl, context.getSystem());
if (name == CalcGayBerneForceKernel::Name())
return new OpenCLCalcGayBerneForceKernel(name, platform, cl);
if (name == IntegrateVerletStepKernel::Name())
return new OpenCLIntegrateVerletStepKernel(name, platform, cl);
if (name == IntegrateLangevinStepKernel::Name())
......
......@@ -5946,6 +5946,371 @@ void OpenCLCalcCustomManyParticleForceKernel::copyParametersToContext(ContextImp
cl.invalidateMolecules();
}
class OpenCLGayBerneForceInfo : public OpenCLForceInfo {
public:
OpenCLGayBerneForceInfo(int requiredBuffers, const GayBerneForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
int xparticle1, yparticle1;
double sigma1, epsilon1, sx1, sy1, sz1, ex1, ey1, ez1;
int xparticle2, yparticle2;
double sigma2, epsilon2, sx2, sy2, sz2, ex2, ey2, ez2;
force.getParticleParameters(particle1, sigma1, epsilon1, xparticle1, yparticle1, sx1, sy1, sz1, ex1, ey1, ez1);
force.getParticleParameters(particle2, sigma2, epsilon2, xparticle2, yparticle2, sx2, sy2, sz2, ex2, ey2, ez2);
return (sigma1 == sigma2 && epsilon1 == epsilon2 && sx1 == sx2 && sy1 == sy2 && sz1 == sz2 && ex1 == ex2 && ey1 == ey2 && ez1 == ez2);
}
int getNumParticleGroups() {
return force.getNumExceptions()+force.getNumParticles();
}
void getParticlesInGroup(int index, vector<int>& particles) {
if (index < force.getNumExceptions()) {
int particle1, particle2;
double sigma, epsilon;
force.getExceptionParameters(index, particle1, particle2, sigma, epsilon);
particles.resize(2);
particles[0] = particle1;
particles[1] = particle2;
}
else {
int particle = index-force.getNumExceptions();
int xparticle, yparticle;
double sigma, epsilon, sx, sy, sz, ex, ey, ez;
force.getParticleParameters(particle, sigma, epsilon, xparticle, yparticle, sx, sy, sz, ex, ey, ez);
particles.clear();
particles.push_back(particle);
if (xparticle > -1)
particles.push_back(xparticle);
if (yparticle > -1)
particles.push_back(yparticle);
}
}
bool areGroupsIdentical(int group1, int group2) {
if (group1 < force.getNumExceptions() && group2 < force.getNumExceptions()) {
int particle1, particle2;
double sigma1, sigma2, epsilon1, epsilon2;
force.getExceptionParameters(group1, particle1, particle2, sigma1, epsilon1);
force.getExceptionParameters(group2, particle1, particle2, sigma2, epsilon2);
return (sigma1 == sigma2 && epsilon1 == epsilon2);
}
return true;
}
private:
const GayBerneForce& force;
};
class OpenCLCalcGayBerneForceKernel::ReorderListener : public OpenCLContext::ReorderListener {
public:
ReorderListener(OpenCLCalcGayBerneForceKernel& owner) : owner(owner) {
}
void execute() {
owner.sortAtoms();
}
private:
OpenCLCalcGayBerneForceKernel& owner;
};
OpenCLCalcGayBerneForceKernel::~OpenCLCalcGayBerneForceKernel() {
if (sortedParticles != NULL)
delete sortedParticles;
if (axisParticleIndices != NULL)
delete axisParticleIndices;
if (sigParams != NULL)
delete sigParams;
if (epsParams != NULL)
delete epsParams;
if (scale != NULL)
delete scale;
if (exceptionParticles != NULL)
delete exceptionParticles;
if (exceptionParams != NULL)
delete exceptionParams;
if (aMatrix != NULL)
delete aMatrix;
if (bMatrix != NULL)
delete bMatrix;
if (gMatrix != NULL)
delete gMatrix;
if (exclusions != NULL)
delete exclusions;
if (exclusionStartIndex != NULL)
delete exclusionStartIndex;
if (blockCenter != NULL)
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (sortedPos != NULL)
delete sortedPos;
if (torque != NULL)
delete torque;
}
void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBerneForce& force) {
// Initialize interactions.
int numParticles = force.getNumParticles();
sigParams = OpenCLArray::create<mm_float4>(cl, cl.getPaddedNumAtoms(), "sigParams");
epsParams = OpenCLArray::create<mm_float2>(cl, cl.getPaddedNumAtoms(), "epsParams");
scale = OpenCLArray::create<mm_float4>(cl, cl.getPaddedNumAtoms(), "scale");
axisParticleIndices = OpenCLArray::create<mm_int2>(cl, cl.getPaddedNumAtoms(), "axisParticleIndices");
sortedParticles = OpenCLArray::create<cl_int>(cl, cl.getPaddedNumAtoms(), "sortedParticles");
aMatrix = OpenCLArray::create<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "aMatrix");
bMatrix = OpenCLArray::create<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "bMatrix");
gMatrix = OpenCLArray::create<cl_float>(cl, 9*cl.getPaddedNumAtoms(), "gMatrix");
vector<mm_float4> sigParamsVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_float2> epsParamsVector(cl.getPaddedNumAtoms(), mm_float2(0, 0));
vector<mm_float4> scaleVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_int2> axisParticleVector(cl.getPaddedNumAtoms(), mm_int2(0, 0));
isRealParticle.resize(cl.getPaddedNumAtoms());
vector<vector<int> > exclusionList(numParticles);
numRealParticles = 0;
for (int i = 0; i < numParticles; i++) {
int xparticle, yparticle;
double sigma, epsilon, sx, sy, sz, ex, ey, ez;
force.getParticleParameters(i, sigma, epsilon, xparticle, yparticle, sx, sy, sz, ex, ey, ez);
axisParticleVector[i] = mm_int2(xparticle, yparticle);
sigParamsVector[i] = mm_float4((float) (0.5*sigma), (float) (0.25*sx*sx), (float) (0.25*sy*sy), (float) (0.25*sz*sz));
epsParamsVector[i] = mm_float2((float) sqrt(epsilon), (float) (0.125*(sx*sy + sz*sz)*sqrt(sx*sy)));
scaleVector[i] = mm_float4((float) (1/sqrt(ex)), (float) (1/sqrt(ey)), (float) (1/sqrt(ez)), 0);
if (epsilon != 0.0) {
numRealParticles++;
isRealParticle[i] = true;
}
else
isRealParticle[i] = false;
exclusionList[i].push_back(i);
}
sigParams->upload(sigParamsVector);
epsParams->upload(epsParamsVector);
scale->upload(scaleVector);
axisParticleIndices->upload(axisParticleVector);
// Create data structures used for the neighbor list.
int numAtomBlocks = (numRealParticles+31)/32;
int elementSize = (cl.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
blockCenter = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos");
// Create arrays for accumulating torques.
if (cl.getSupports64BitGlobalAtomics())
torque = OpenCLArray::create<cl_long>(cl, 3*cl.getPaddedNumAtoms(), "torque");
else
torque = new OpenCLArray(cl, cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), elementSize, "torque");
cl.addAutoclearBuffer(*torque);
// Create the kernels.
nonbondedMethod = force.getNonbondedMethod();
bool useCutoff = (nonbondedMethod != GayBerneForce::NoCutoff);
bool usePeriodic = (nonbondedMethod == GayBerneForce::CutoffPeriodic);
map<string, string> defines;
defines["USE_SWITCH"] = (useCutoff && force.getUseSwitchingFunction() ? "1" : "0");
if (useCutoff) {
// Compute the switching coefficients.
if (force.getUseSwitchingFunction()) {
defines["SWITCH_CUTOFF"] = cl.doubleToString(force.getSwitchingDistance());
defines["SWITCH_C3"] = cl.doubleToString(10/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 3.0));
defines["SWITCH_C4"] = cl.doubleToString(15/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 4.0));
defines["SWITCH_C5"] = cl.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0));
}
}
if (!cl.getSupports64BitGlobalAtomics()) {
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
}
cl::Program program = cl.createProgram(OpenCLKernelSources::gayBerne, defines);
framesKernel = cl::Kernel(program, "computeEllipsoidFrames");
blockBoundsKernel = cl::Kernel(program, "findBlockBounds");
neighborsKernel = cl::Kernel(program, "findNeighbors");
forceKernel = cl::Kernel(program, "computeForce");
// Add the interaction to the default nonbonded kernel.
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
// cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigParams", "float", 4, sizeof(cl_float4), sigParams->getDeviceBuffer()));
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("epsParams", "float", 2, sizeof(cl_float2), epsParams->getDeviceBuffer()));
// Record exceptions and exclusions.
vector<int> exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, sigma, epsilon);
if (isRealParticle[particle1] && isRealParticle[particle2])
excludedPairs.push_back(pair<int, int>(particle1, particle2));
if (epsilon != 0.0)
exceptions.push_back(i);
}
exclusions = OpenCLArray::create<cl_int>(cl, max(1, (int) excludedPairs.size()), "exclusions");
exclusionStartIndex = OpenCLArray::create<cl_int>(cl, numRealParticles+1, "exclusionStartIndex");
// Initialize the exceptions.
int numExceptions = exceptions.size();
if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams = OpenCLArray::create<mm_float2>(cl, numExceptions, "exceptionParams");
vector<mm_float2> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double sigma, epsilon;
force.getExceptionParameters(exceptions[i], atoms[i][0], atoms[i][1], sigma, epsilon);
exceptionParamsVector[i] = mm_float2((float) sigma, (float) (4.0*epsilon));
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
exceptionParams->upload(exceptionParamsVector);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exceptionParams->getDeviceBuffer(), "float2");
// cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
cl.addForce(new OpenCLGayBerneForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
cl.addReorderListener(new ReorderListener(*this));
}
double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (!hasInitializedKernels) {
hasInitializedKernels = true;
sortAtoms();
framesKernel.setArg<cl_int>(0, numRealParticles);
framesKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(2, axisParticleIndices->getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(3, sigParams->getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(4, scale->getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(5, aMatrix->getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(6, bMatrix->getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(7, gMatrix->getDeviceBuffer());
framesKernel.setArg<cl::Buffer>(8, sortedParticles->getDeviceBuffer());
blockBoundsKernel.setArg<cl_int>(0, numRealParticles);
blockBoundsKernel.setArg<cl::Buffer>(6, sortedParticles->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(7, cl.getPosq().getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(8, sortedPos->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(9, blockCenter->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(10, blockBoundingBox->getDeviceBuffer());
bool useLong = cl.getSupports64BitGlobalAtomics();
int index = 0;
forceKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
forceKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
forceKernel.setArg<cl_int>(index++, numRealParticles);
forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedPos->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sigParams->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, epsParams->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedParticles->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, aMatrix->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, bMatrix->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, gMatrix->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusions->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer());
}
cl.executeKernel(framesKernel, cl.getNumAtoms());
setPeriodicBoxArgs(cl, blockBoundsKernel, 1);
cl.executeKernel(blockBoundsKernel, (numRealParticles+31)/32);
if (nonbondedMethod != GayBerneForce::NoCutoff) {
}
cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize());
return 0.0;
}
void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context, const GayBerneForce& force) {
// Make sure the new parameters are acceptable.
if (force.getNumParticles() != cl.getNumAtoms())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
vector<int> exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, sigma, epsilon);
if (exceptionAtoms.size() > exceptions.size() && make_pair(particle1, particle2) == exceptionAtoms[exceptions.size()])
exceptions.push_back(i);
else if (epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
}
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cl.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
// Record the per-particle parameters.
vector<mm_float4> sigParamsVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_float2> epsParamsVector(cl.getPaddedNumAtoms(), mm_float2(0, 0));
vector<mm_float4> scaleVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
numRealParticles = 0;
for (int i = 0; i < force.getNumParticles(); i++) {
int xparticle, yparticle;
double sigma, epsilon, sx, sy, sz, ex, ey, ez;
force.getParticleParameters(i, sigma, epsilon, xparticle, yparticle, sx, sy, sz, ex, ey, ez);
sigParamsVector[i] = mm_float4((float) (0.5*sigma), (float) (0.5*sx), (float) (0.5*sy), (float) (0.5*sz));
epsParamsVector[i] = mm_float2((float) sqrt(epsilon), (float) (0.125*(sx*sy + sz*sz)*sqrt(sx*sy)));
scaleVector[i] = mm_float4((float) (1/sqrt(ex)), (float) (1/sqrt(ey)), (float) (1/sqrt(ez)), 0);
if (epsilon != 0.0) {
numRealParticles++;
isRealParticle[i] = true;
}
else
isRealParticle[i] = false;
}
sigParams->upload(sigParamsVector);
epsParams->upload(epsParamsVector);
scale->upload(scaleVector);
// Record the exceptions.
if (numExceptions > 0) {
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<mm_float2> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], sigma, epsilon);
exceptionParamsVector[i] = mm_float2((float) sigma, (float) (4.0*epsilon));
}
exceptionParams->upload(exceptionParamsVector);
}
cl.invalidateMolecules();
sortAtoms();
}
void OpenCLCalcGayBerneForceKernel::sortAtoms() {
// Sort the list of atoms by type to avoid thread divergence. This is executed every time
// the atoms are reordered.
int nextIndex = 0;
vector<cl_int> particles(cl.getPaddedNumAtoms(), 0);
const vector<int>& order = cl.getAtomIndex();
for (int i = 0; i < cl.getNumAtoms(); i++) {
int atom = order[i];
if (isRealParticle[atom])
particles[nextIndex++] = atom;
}
sortedParticles->upload(particles);
// Rebuild the list of exclusions.
vector<vector<int> > excludedAtoms(numRealParticles);
for (int i = 0; i < excludedPairs.size(); i++) {
int first = min(excludedPairs[i].first, excludedPairs[i].second);
int second = max(excludedPairs[i].first, excludedPairs[i].second);
excludedAtoms[first].push_back(second);
}
int index = 0;
vector<int> exclusionVec(exclusions->getSize());
vector<int> startIndexVec(exclusionStartIndex->getSize());
for (int i = 0; i < numRealParticles; i++) {
startIndexVec[i] = index;
for (int j = 0; j < excludedAtoms[i].size(); j++)
exclusionVec[index++] = excludedAtoms[i][j];
}
startIndexVec[numRealParticles] = index;
exclusions->upload(exclusionVec);
exclusionStartIndex->upload(startIndexVec);
}
OpenCLIntegrateVerletStepKernel::~OpenCLIntegrateVerletStepKernel() {
}
......
......@@ -83,6 +83,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory(CalcCustomCentroidBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomCompoundBondForceKernel::Name(), factory);
registerKernelFactory(CalcCustomManyParticleForceKernel::Name(), factory);
registerKernelFactory(CalcGayBerneForceKernel::Name(), factory);
registerKernelFactory(IntegrateVerletStepKernel::Name(), factory);
registerKernelFactory(IntegrateLangevinStepKernel::Name(), factory);
registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
......
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#define TILE_SIZE 32
/**
* Calculate the ellipsoid coordinate frames and associated matrices.
*/
__kernel void computeEllipsoidFrames(int numParticles, __global const real4* restrict posq, __global int2* const restrict axisParticleIndices,
__global const float4* restrict sigParams, __global const float4* restrict scale, __global real* restrict aMatrix,
__global real* restrict bMatrix, __global real* restrict gMatrix, __global const int* sortedParticles) {
for (int sortedIndex = get_global_id(0); sortedIndex < numParticles; sortedIndex += get_global_size(0)) {
// Compute the local coordinate system of the ellipsoid;
int originalIndex = sortedParticles[sortedIndex];
real3 pos = posq[originalIndex].xyz;
int2 axisParticles = axisParticleIndices[originalIndex];
real3 xdir, ydir, zdir;
if (axisParticles.x == -1) {
xdir = (real3) (1, 0, 0);
ydir = (real3) (0, 1, 0);
}
else {
xdir = pos-posq[axisParticles.x].xyz;
xdir = normalize(xdir);
if (axisParticles.y == -1) {
if (xdir.y > -0.5f && xdir.y < 0.5f)
ydir = (real3) (0, 1, 0);
else
ydir = (real3) (1, 0, 0);
}
else
ydir = pos-posq[axisParticles.y].xyz;
ydir -= xdir*dot(xdir, ydir);
ydir = normalize(ydir);
}
zdir = cross(xdir, ydir);
// Compute matrices we will need later.
__global real (*a)[3] = (__global real (*)[3]) (aMatrix+sortedIndex*9);
__global real (*b)[3] = (__global real (*)[3]) (bMatrix+sortedIndex*9);
__global real (*g)[3] = (__global real (*)[3]) (gMatrix+sortedIndex*9);
a[0][0] = xdir.x;
a[0][1] = xdir.y;
a[0][2] = xdir.z;
a[1][0] = ydir.x;
a[1][1] = ydir.y;
a[1][2] = ydir.z;
a[2][0] = zdir.x;
a[2][1] = zdir.y;
a[2][2] = zdir.z;
float4 sig = sigParams[originalIndex];
float3 r2 = sig.yzw;
float3 e2 = scale[originalIndex].xyz;
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++) {
real belem = 0;
real gelem = 0;
for (int k = 0; k < 3; k++) {
belem += a[k][i]*e2[k]*a[k][j];
gelem += a[k][i]*r2[k]*a[k][j];
}
b[i][j] = belem;
g[i][j] = gelem;
}
}
}
/**
* Find a bounding box for the atoms in each block.
*/
__kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global const int* sortedAtoms, __global const real4* restrict posq, __global real4* restrict sortedPos, __global real4* restrict blockCenter,
__global real4* restrict blockBoundingBox) {
int index = get_global_id(0);
int base = index*TILE_SIZE;
while (base < numAtoms) {
real4 pos = posq[sortedAtoms[base]];
sortedPos[base] = pos;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_POS(pos)
#endif
real4 minPos = pos;
real4 maxPos = pos;
int last = min(base+TILE_SIZE, numAtoms);
for (int i = base+1; i < last; i++) {
pos = posq[sortedAtoms[i]];
sortedPos[i] = pos;
#ifdef USE_PERIODIC
real4 center = 0.5f*(maxPos+minPos);
APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)
#endif
minPos = min(minPos, pos);
maxPos = max(maxPos, pos);
}
real4 blockSize = 0.5f*(maxPos-minPos);
blockBoundingBox[index] = blockSize;
blockCenter[index] = 0.5f*(maxPos+minPos);
index += get_global_size(0);
base = index*TILE_SIZE;
}
}
/**
* Build a list of neighbors for each atom.
*/
__kernel void findNeighbors(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global const int* sortedAtoms, __global const real4* restrict posq, __global real4* restrict sortedPos, __global real4* restrict blockCenter,
__global real4* restrict blockBoundingBox) {
}
typedef struct {
float4 sig;
float2 eps;
real3 pos;
real a[3][3], b[3][3], g[3][3];
} AtomData;
void loadAtomData(AtomData* data, int sortedIndex, int originalIndex, __global const real4* restrict pos, __global const float4* restrict sigParams,
__global const float2* restrict epsParams, __global const real* restrict aMatrix, __global const real* restrict bMatrix, __global const real* restrict gMatrix) {
data->sig = sigParams[originalIndex];
data->eps = epsParams[originalIndex];
data->pos = pos[sortedIndex].xyz;
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++) {
int k = 9*sortedIndex+3*i+j;
data->a[i][j] = aMatrix[k];
data->b[i][j] = bMatrix[k];
data->g[i][j] = gMatrix[k];
}
}
real3 matrixVectorProduct(real (*m)[3], real3 v) {
return (real3) (m[0][0]*v[0] + m[0][1]*v[1] + m[0][2]*v[2],
m[1][0]*v[0] + m[1][1]*v[1] + m[1][2]*v[2],
m[2][0]*v[0] + m[2][1]*v[1] + m[2][2]*v[2]);
}
real3 vectorMatrixProduct(real3 v, real (*m)[3]) {
return (real3) (m[0][0]*v[0] + m[1][0]*v[1] + m[2][0]*v[2],
m[0][1]*v[0] + m[1][1]*v[1] + m[2][1]*v[2],
m[0][2]*v[0] + m[1][2]*v[1] + m[2][2]*v[2]);
}
void matrixSum(real (*result)[3], real (*a)[3], real (*b)[3]) {
result[0][0] = a[0][0]+b[0][0];
result[0][1] = a[0][1]+b[0][1];
result[0][2] = a[0][2]+b[0][2];
result[1][0] = a[1][0]+b[1][0];
result[1][1] = a[1][1]+b[1][1];
result[1][2] = a[1][2]+b[1][2];
result[2][0] = a[2][0]+b[2][0];
result[2][1] = a[2][1]+b[2][1];
result[2][2] = a[2][2]+b[2][2];
}
real determinant(real (*m)[3]) {
return (m[0][0]*m[1][1]*m[2][2] + m[0][1]*m[1][2]*m[2][0] + m[0][2]*m[1][0]*m[2][1] -
m[0][0]*m[1][2]*m[2][1] - m[0][1]*m[1][0]*m[2][2] - m[0][2]*m[1][1]*m[2][0]);
}
void matrixInverse(real (*result)[3], real (*m)[3]) {
real invDet = RECIP(determinant(m));
result[0][0] = invDet*(m[1][1]*m[2][2] - m[1][2]*m[2][1]);
result[1][0] = -invDet*(m[1][0]*m[2][2] - m[1][2]*m[2][0]);
result[2][0] = invDet*(m[1][0]*m[2][1] - m[1][1]*m[2][0]);
result[0][1] = -invDet*(m[0][1]*m[2][2] - m[0][2]*m[2][1]);
result[1][1] = invDet*(m[0][0]*m[2][2] - m[0][2]*m[2][0]);
result[2][1] = -invDet*(m[0][0]*m[2][1] - m[0][1]*m[2][0]);
result[0][2] = invDet*(m[0][1]*m[1][2] - m[0][2]*m[1][1]);
result[1][2] = -invDet*(m[0][0]*m[1][2] - m[0][2]*m[1][0]);
result[2][2] = invDet*(m[0][0]*m[1][1] - m[0][1]*m[1][0]);
}
void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real epsilon, real3 dr, real r2, real3* force1, real3* force2, real3* torque1, real3* torque2, real *totalEnergy) {
real rInv = RSQRT(r2);
real r = r2*rInv;
real3 drUnit = dr*rInv;
// Compute the switching function.
real switchValue = 1, switchDeriv = 0;
#if USE_LJ_SWITCH
if (r > LJ_SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF;
switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
}
#endif
// Compute vectors and matrices we'll be needing.
real B12[3][3], G12[3][3], B12inv[3][3], G12inv[3][3];
matrixSum(B12, data1->b, data2->b);
matrixSum(G12, data1->g, data2->g);
matrixInverse(B12inv, B12);
matrixInverse(G12inv, G12);
real detG12 = determinant(G12);
// Estimate the distance between the ellipsoids and compute the first terms needed for the energy.
real sigma12 = 1/SQRT(0.5f*dot(drUnit, matrixVectorProduct(G12inv, drUnit)));
real h12 = r - sigma12;
real rho = sigma/(h12+sigma);
real rho2 = rho*rho;
real rho6 = rho2*rho2*rho2;
real u = 4*epsilon*(rho6*rho6-rho6);
real eta = SQRT(2*data1->eps.y*data2->eps.y/detG12);
real chi = 2*dot(drUnit, matrixVectorProduct(B12inv, drUnit));
chi *= chi;
real energy = u*eta*chi;
// Compute the terms needed for the force.
real3 kappa = matrixVectorProduct(G12inv, dr);
real3 iota = matrixVectorProduct(B12inv, dr);
real rInv2 = rInv*rInv;
real dUSLJdr = 24*epsilon*(2*rho6-1)*rho6*rho/sigma;
real temp = 0.5f*sigma12*sigma12*sigma12*rInv2;
real3 dudr = (drUnit + (kappa-drUnit*dot(kappa, drUnit))*temp)*dUSLJdr;
real3 dchidr = (iota-drUnit*dot(iota, drUnit))*(-8*rInv2*SQRT(chi));
real3 force = (dchidr*u + dudr*chi)*(eta*switchValue) - drUnit*(energy*switchDeriv);
*force1 += force;
*force2 -= force;
// Compute the terms needed for the torque.
for (int j = 0; j < 2; j++) {
real (*a)[3] = (j == 0 ? data1->a : data2->a);
real (*b)[3] = (j == 0 ? data1->b : data2->b);
real (*g)[3] = (j == 0 ? data1->g : data2->g);
float4 sig = (j == 0 ? data1->sig : data2->sig);
real3 dudq = cross(vectorMatrixProduct(kappa, g), kappa*(temp*dUSLJdr));
real3 dchidq = cross(vectorMatrixProduct(iota, b), iota)*(-4*rInv2);
real3 scale = (real3) (sig.y, sig.z, sig.w)*(-0.5f*eta/detG12);
real d[3][3];
d[0][0] = scale[0]*(2*a[0][0]*(G12[1][1]*G12[2][2] - G12[1][2]*G12[2][1]) +
a[0][2]*(G12[1][2]*G12[0][1] + G12[1][0]*G12[2][1] - G12[1][1]*(G12[0][2] + G12[2][0])) +
a[0][1]*(G12[0][2]*G12[2][1] + G12[2][0]*G12[1][2] - G12[2][2]*(G12[0][1] + G12[1][0])));
d[0][1] = scale[0]*( a[0][0]*(G12[0][2]*G12[2][1] + G12[2][0]*G12[1][2] - G12[2][2]*(G12[0][1] + G12[1][0])) +
2*a[0][1]*(G12[0][0]*G12[2][2] - G12[2][0]*G12[0][2]) +
a[0][2]*(G12[1][0]*G12[0][2] + G12[2][0]*G12[0][1] - G12[0][0]*(G12[1][2] + G12[2][1])));
d[0][2] = scale[0]*( a[0][0]*(G12[0][1]*G12[1][2] + G12[1][0]*G12[2][1] - G12[1][1]*(G12[0][2] + G12[2][0])) +
a[0][1]*(G12[1][0]*G12[0][2] + G12[2][0]*G12[0][1] - G12[0][0]*(G12[1][2] + G12[2][1])) +
2*a[0][2]*(G12[1][1]*G12[0][0] - G12[1][0]*G12[0][1]));
d[1][0] = scale[1]*(2*a[1][0]*(G12[1][1]*G12[2][2] - G12[1][2]*G12[2][1]) +
a[1][1]*(G12[0][2]*G12[2][1] + G12[2][0]*G12[1][2] - G12[2][2]*(G12[0][1] + G12[1][0])) +
a[1][2]*(G12[1][2]*G12[0][1] + G12[1][0]*G12[2][1] - G12[1][1]*(G12[0][2] + G12[2][0])));
d[1][1] = scale[1]*( a[1][0]*(G12[0][2]*G12[2][1] + G12[2][0]*G12[1][2] - G12[2][2]*(G12[0][1] + G12[1][0])) +
2*a[1][1]*(G12[2][2]*G12[0][0] - G12[2][0]*G12[0][2]) +
a[1][2]*(G12[1][0]*G12[0][2] + G12[0][1]*G12[2][0] - G12[0][0]*(G12[1][2] + G12[2][1])));
d[1][2] = scale[1]*( a[1][0]*(G12[0][1]*G12[1][2] + G12[1][0]*G12[2][1] - G12[1][1]*(G12[0][2] + G12[2][0])) +
a[1][1]*(G12[1][0]*G12[0][2] + G12[0][1]*G12[2][0] - G12[0][0]*(G12[1][2] + G12[2][1])) +
2*a[1][2]*(G12[1][1]*G12[0][0] - G12[1][0]*G12[0][1]));
d[2][0] = scale[2]*(2*a[2][0]*(G12[1][1]*G12[2][2] - G12[2][1]*G12[1][2]) +
a[2][1]*(G12[0][2]*G12[2][1] + G12[1][2]*G12[2][0] - G12[2][2]*(G12[0][1] + G12[1][0])) +
a[2][2]*(G12[0][1]*G12[1][2] + G12[2][1]*G12[1][0] - G12[1][1]*(G12[0][2] + G12[2][0])));
d[2][1] = scale[2]*( a[2][0]*(G12[0][2]*G12[2][1] + G12[1][2]*G12[2][0] - G12[2][2]*(G12[0][1] + G12[1][0])) +
2*a[2][1]*(G12[0][0]*G12[2][2] - G12[0][2]*G12[2][0]) +
a[2][2]*(G12[1][0]*G12[0][2] + G12[0][1]*G12[2][0] - G12[0][0]*(G12[1][2] + G12[2][1])));
d[2][2] = scale[2]*( a[2][0]*(G12[0][1]*G12[1][2] + G12[2][1]*G12[1][0] - G12[1][1]*(G12[0][2] + G12[2][0])) +
a[2][1]*(G12[1][0]*G12[0][2] + G12[2][0]*G12[0][1] - G12[0][0]*(G12[1][2] + G12[2][1])) +
2*a[2][2]*(G12[1][1]*G12[0][0] - G12[1][0]*G12[0][1]));
real3 detadq;
for (int i = 0; i < 3; i++)
detadq += cross((real3) (a[i][0], a[i][1], a[i][2]), (real3) (d[i][0], d[i][1], d[i][2]));
real3 torque = (dchidq*(u*eta) + detadq*(u*chi) + dudq*(eta*chi))*switchValue;
*(j == 0 ? torque1 : torque2) -= torque;
}
*totalEnergy += switchValue*energy;
}
/**
* Compute the interactions.
*/
__kernel void computeForce(
#ifdef SUPPORTS_64_BIT_ATOMICS
__global long* restrict forceBuffers, __global long* restrict torqueBuffers,
#else
__global real4* restrict forceBuffers, __global real4* restrict torqueBuffers,
#endif
int numAtoms, __global mixed* restrict energyBuffer, __global const real4* restrict pos, __global const float4* restrict sigParams,
__global const float2* restrict epsParams, __global const int* restrict sortedAtoms, __global const real* restrict aMatrix,
__global const real* restrict bMatrix, __global const real* restrict gMatrix, __global const int* restrict exclusions,
__global const int* restrict exclusionStartIndex
#ifdef USE_CUTOFF
__global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#endif
) {
const unsigned int warp = get_global_id(0)/TILE_SIZE;
mixed energy = 0;
for (int atom1 = get_global_id(0); atom1 < numAtoms; atom1 += get_global_size(0)) {
// Load parameters for atom1.
int index1 = sortedAtoms[atom1];
AtomData data1;
loadAtomData(&data1, atom1, index1, pos, sigParams, epsParams, aMatrix, bMatrix, gMatrix);
real3 force1 = 0.0f;
real3 torque1 = 0.0f;
int nextExclusion = exclusionStartIndex[atom1];
int lastExclusion = exclusionStartIndex[atom1+1];
for (int atom2 = atom1+1; atom2 < numAtoms; atom2++) {
// Skip over excluded interactions.
if (nextExclusion < lastExclusion && exclusions[nextExclusion] == atom2) {
nextExclusion++;
continue;
}
// Load parameters for atom2.
int index2 = sortedAtoms[atom2];
AtomData data2;
loadAtomData(&data2, atom2, index2, pos, sigParams, epsParams, aMatrix, bMatrix, gMatrix);
real3 force2 = 0.0f;
real3 torque2 = 0.0f;
// Compute the interaction.
real3 delta = data1.pos-data2.pos;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
real sigma = data1.sig.x+data2.sig.x;
real epsilon = data1.eps.x*data2.eps.x;
computeOneInteraction(&data1, &data2, sigma, epsilon, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[index2], (long) (force2.x*0x100000000));
atom_add(&forceBuffers[index2+PADDED_NUM_ATOMS], (long) (force2.y*0x100000000));
atom_add(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (long) (force2.z*0x100000000));
atom_add(&torqueBuffers[index2], (long) (torque2.x*0x100000000));
atom_add(&torqueBuffers[index2+PADDED_NUM_ATOMS], (long) (torque2.y*0x100000000));
atom_add(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (long) (torque2.z*0x100000000));
#else
unsigned int offset = index2 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force2.xyz;
torqueBuffers[offset].xyz += torque2.xyz;
#endif
#ifdef USE_CUTOFF
}
#endif
}
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[index1], (long) (force1.x*0x100000000));
atom_add(&forceBuffers[index1+PADDED_NUM_ATOMS], (long) (force1.y*0x100000000));
atom_add(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (long) (force1.z*0x100000000));
atom_add(&torqueBuffers[index1], (long) (torque1.x*0x100000000));
atom_add(&torqueBuffers[index1+PADDED_NUM_ATOMS], (long) (torque1.y*0x100000000));
atom_add(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (long) (torque1.z*0x100000000));
#else
unsigned int offset = index1 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force1.xyz;
torqueBuffers[offset].xyz += torque1.xyz;
#endif
}
energyBuffer[get_global_id(0)] += energy;
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "OpenCLTests.h"
#include "TestGayBerneForce.h"
void runPlatformTests() {
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment