Commit 522e133e authored by peastman's avatar peastman
Browse files

Continuing OpenCL implementation of GayBerneForce

parent 69cab567
...@@ -1080,9 +1080,10 @@ private: ...@@ -1080,9 +1080,10 @@ private:
class OpenCLCalcGayBerneForceKernel : public CalcGayBerneForceKernel { class OpenCLCalcGayBerneForceKernel : public CalcGayBerneForceKernel {
public: public:
OpenCLCalcGayBerneForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGayBerneForceKernel(name, platform), cl(cl), OpenCLCalcGayBerneForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGayBerneForceKernel(name, platform), cl(cl),
hasInitializedKernels(false), sortedParticles(NULL), axisParticleIndices(NULL), sigParams(NULL), epsParams(NULL), scale(NULL), aMatrix(NULL), hasInitializedKernels(false), sortedParticles(NULL), axisParticleIndices(NULL), sigParams(NULL), epsParams(NULL), scale(NULL), exceptionParticles(NULL),
bMatrix(NULL), gMatrix(NULL), exclusions(NULL), exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedPos(NULL), exceptionParams(NULL), aMatrix(NULL),
torque(NULL) { bMatrix(NULL), gMatrix(NULL), exclusions(NULL), exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighbors(NULL),
neighborIndex(NULL), neighborBlockCount(NULL), sortedPos(NULL), torque(NULL) {
} }
~OpenCLCalcGayBerneForceKernel(); ~OpenCLCalcGayBerneForceKernel();
/** /**
...@@ -1112,7 +1113,7 @@ private: ...@@ -1112,7 +1113,7 @@ private:
void sortAtoms(); void sortAtoms();
OpenCLContext& cl; OpenCLContext& cl;
bool hasInitializedKernels; bool hasInitializedKernels;
int numRealParticles; int numRealParticles, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod; GayBerneForce::NonbondedMethod nonbondedMethod;
OpenCLArray* sortedParticles; OpenCLArray* sortedParticles;
OpenCLArray* axisParticleIndices; OpenCLArray* axisParticleIndices;
...@@ -1128,12 +1129,15 @@ private: ...@@ -1128,12 +1129,15 @@ private:
OpenCLArray* exclusionStartIndex; OpenCLArray* exclusionStartIndex;
OpenCLArray* blockCenter; OpenCLArray* blockCenter;
OpenCLArray* blockBoundingBox; OpenCLArray* blockBoundingBox;
OpenCLArray* neighbors;
OpenCLArray* neighborIndex;
OpenCLArray* neighborBlockCount;
OpenCLArray* sortedPos; OpenCLArray* sortedPos;
OpenCLArray* torque; OpenCLArray* torque;
std::vector<bool> isRealParticle; std::vector<bool> isRealParticle;
std::vector<std::pair<int, int> > exceptionAtoms; std::vector<std::pair<int, int> > exceptionAtoms;
std::vector<std::pair<int, int> > excludedPairs; std::vector<std::pair<int, int> > excludedPairs;
cl::Kernel framesKernel, blockBoundsKernel, neighborsKernel, forceKernel; cl::Kernel framesKernel, blockBoundsKernel, neighborsKernel, forceKernel, torqueKernel;
}; };
/** /**
......
...@@ -6038,6 +6038,12 @@ OpenCLCalcGayBerneForceKernel::~OpenCLCalcGayBerneForceKernel() { ...@@ -6038,6 +6038,12 @@ OpenCLCalcGayBerneForceKernel::~OpenCLCalcGayBerneForceKernel() {
delete blockCenter; delete blockCenter;
if (blockBoundingBox != NULL) if (blockBoundingBox != NULL)
delete blockBoundingBox; delete blockBoundingBox;
if (neighbors != NULL)
delete neighbors;
if (neighborIndex != NULL)
delete neighborIndex;
if (neighborBlockCount != NULL)
delete neighborBlockCount;
if (sortedPos != NULL) if (sortedPos != NULL)
delete sortedPos; delete sortedPos;
if (torque != NULL) if (torque != NULL)
...@@ -6062,8 +6068,6 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe ...@@ -6062,8 +6068,6 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
vector<mm_float4> scaleVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0)); vector<mm_float4> scaleVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_int2> axisParticleVector(cl.getPaddedNumAtoms(), mm_int2(0, 0)); vector<mm_int2> axisParticleVector(cl.getPaddedNumAtoms(), mm_int2(0, 0));
isRealParticle.resize(cl.getPaddedNumAtoms()); isRealParticle.resize(cl.getPaddedNumAtoms());
vector<vector<int> > exclusionList(numParticles);
numRealParticles = 0;
for (int i = 0; i < numParticles; i++) { for (int i = 0; i < numParticles; i++) {
int xparticle, yparticle; int xparticle, yparticle;
double sigma, epsilon, sx, sy, sz, ex, ey, ez; double sigma, epsilon, sx, sy, sz, ex, ey, ez;
...@@ -6072,19 +6076,41 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe ...@@ -6072,19 +6076,41 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
sigParamsVector[i] = mm_float4((float) (0.5*sigma), (float) (0.25*sx*sx), (float) (0.25*sy*sy), (float) (0.25*sz*sz)); sigParamsVector[i] = mm_float4((float) (0.5*sigma), (float) (0.25*sx*sx), (float) (0.25*sy*sy), (float) (0.25*sz*sz));
epsParamsVector[i] = mm_float2((float) sqrt(epsilon), (float) (0.125*(sx*sy + sz*sz)*sqrt(sx*sy))); epsParamsVector[i] = mm_float2((float) sqrt(epsilon), (float) (0.125*(sx*sy + sz*sz)*sqrt(sx*sy)));
scaleVector[i] = mm_float4((float) (1/sqrt(ex)), (float) (1/sqrt(ey)), (float) (1/sqrt(ez)), 0); scaleVector[i] = mm_float4((float) (1/sqrt(ex)), (float) (1/sqrt(ey)), (float) (1/sqrt(ez)), 0);
if (epsilon != 0.0) { isRealParticle[i] = (epsilon != 0.0);
numRealParticles++;
isRealParticle[i] = true;
}
else
isRealParticle[i] = false;
exclusionList[i].push_back(i);
} }
sigParams->upload(sigParamsVector); sigParams->upload(sigParamsVector);
epsParams->upload(epsParamsVector); epsParams->upload(epsParamsVector);
scale->upload(scaleVector); scale->upload(scaleVector);
axisParticleIndices->upload(axisParticleVector); axisParticleIndices->upload(axisParticleVector);
// Record exceptions and exclusions.
vector<mm_float2> exceptionParamsVec;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, sigma, epsilon);
if (epsilon != 0.0) {
exceptionParamsVec.push_back(mm_float2((float) sigma, (float) epsilon));
exceptionAtoms.push_back(make_pair(particle1, particle2));
isRealParticle[particle1] = true;
isRealParticle[particle2] = true;
}
if (isRealParticle[particle1] && isRealParticle[particle2])
excludedPairs.push_back(pair<int, int>(particle1, particle2));
}
numRealParticles = 0;
for (int i = 0; i < isRealParticle.size(); i++)
if (isRealParticle[i])
numRealParticles++;
int numExceptions = exceptionParamsVec.size();
exclusions = OpenCLArray::create<cl_int>(cl, max(1, (int) excludedPairs.size()), "exclusions");
exclusionStartIndex = OpenCLArray::create<cl_int>(cl, numRealParticles+1, "exclusionStartIndex");
exceptionParticles = OpenCLArray::create<mm_int4>(cl, max(1, numExceptions), "exceptionParticles");
exceptionParams = OpenCLArray::create<mm_float2>(cl, max(1, numExceptions), "exceptionParams");
if (numExceptions > 0)
exceptionParams->upload(exceptionParamsVec);
// Create data structures used for the neighbor list. // Create data structures used for the neighbor list.
int numAtomBlocks = (numRealParticles+31)/32; int numAtomBlocks = (numRealParticles+31)/32;
...@@ -6092,6 +6118,10 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe ...@@ -6092,6 +6118,10 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
blockCenter = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockCenter"); blockCenter = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox"); blockBoundingBox = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos"); sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos");
maxNeighborBlocks = numRealParticles*2;
neighbors = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks*32, "neighbors");
neighborIndex = OpenCLArray::create<mm_int2>(cl, maxNeighborBlocks, "neighbors");
neighborBlockCount = OpenCLArray::create<cl_int>(cl, 1, "neighborBlockCount");
// Create arrays for accumulating torques. // Create arrays for accumulating torques.
...@@ -6126,48 +6156,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe ...@@ -6126,48 +6156,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
blockBoundsKernel = cl::Kernel(program, "findBlockBounds"); blockBoundsKernel = cl::Kernel(program, "findBlockBounds");
neighborsKernel = cl::Kernel(program, "findNeighbors"); neighborsKernel = cl::Kernel(program, "findNeighbors");
forceKernel = cl::Kernel(program, "computeForce"); forceKernel = cl::Kernel(program, "computeForce");
torqueKernel = cl::Kernel(program, "applyTorques");
// Add the interaction to the default nonbonded kernel.
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
// cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigParams", "float", 4, sizeof(cl_float4), sigParams->getDeviceBuffer()));
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("epsParams", "float", 2, sizeof(cl_float2), epsParams->getDeviceBuffer()));
// Record exceptions and exclusions.
vector<int> exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, sigma, epsilon);
if (isRealParticle[particle1] && isRealParticle[particle2])
excludedPairs.push_back(pair<int, int>(particle1, particle2));
if (epsilon != 0.0)
exceptions.push_back(i);
}
exclusions = OpenCLArray::create<cl_int>(cl, max(1, (int) excludedPairs.size()), "exclusions");
exclusionStartIndex = OpenCLArray::create<cl_int>(cl, numRealParticles+1, "exclusionStartIndex");
// Initialize the exceptions.
int numExceptions = exceptions.size();
if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams = OpenCLArray::create<mm_float2>(cl, numExceptions, "exceptionParams");
vector<mm_float2> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double sigma, epsilon;
force.getExceptionParameters(exceptions[i], atoms[i][0], atoms[i][1], sigma, epsilon);
exceptionParamsVector[i] = mm_float2((float) sigma, (float) (4.0*epsilon));
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
exceptionParams->upload(exceptionParamsVector);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exceptionParams->getDeviceBuffer(), "float2");
// cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
cl.addForce(new OpenCLGayBerneForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force)); cl.addForce(new OpenCLGayBerneForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
cl.addReorderListener(new ReorderListener(*this)); cl.addReorderListener(new ReorderListener(*this));
} }
...@@ -6191,11 +6180,21 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include ...@@ -6191,11 +6180,21 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
blockBoundsKernel.setArg<cl::Buffer>(8, sortedPos->getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(8, sortedPos->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(9, blockCenter->getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(9, blockCenter->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(10, blockBoundingBox->getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(10, blockBoundingBox->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(11, neighborBlockCount->getDeviceBuffer());
neighborsKernel.setArg<cl_int>(0, numRealParticles);
neighborsKernel.setArg<cl_int>(1, maxNeighborBlocks);
neighborsKernel.setArg<cl::Buffer>(7, sortedPos->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(8, blockCenter->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(9, blockBoundingBox->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(10, neighbors->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(12, neighborBlockCount->getDeviceBuffer());
bool useLong = cl.getSupports64BitGlobalAtomics(); bool useLong = cl.getSupports64BitGlobalAtomics();
int index = 0; int index = 0;
forceKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer())); forceKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
forceKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
forceKernel.setArg<cl_int>(index++, numRealParticles); forceKernel.setArg<cl_int>(index++, numRealParticles);
forceKernel.setArg<cl_int>(index++, exceptionAtoms.size());
forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedPos->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, sortedPos->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sigParams->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, sigParams->getDeviceBuffer());
...@@ -6206,13 +6205,27 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include ...@@ -6206,13 +6205,27 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
forceKernel.setArg<cl::Buffer>(index++, gMatrix->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, gMatrix->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusions->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, exclusions->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer());
} forceKernel.setArg<cl::Buffer>(index++, exceptionParticles->getDeviceBuffer());
cl.executeKernel(framesKernel, cl.getNumAtoms()); forceKernel.setArg<cl::Buffer>(index++, exceptionParams->getDeviceBuffer());
index = 0;
torqueKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
torqueKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
if (!useLong)
torqueKernel.setArg<cl_int>(index++, cl.getNumForceBuffers());
torqueKernel.setArg<cl_int>(index++, numRealParticles);
torqueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, axisParticleIndices->getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, sortedParticles->getDeviceBuffer());
}
cl.executeKernel(framesKernel, numRealParticles);
setPeriodicBoxArgs(cl, blockBoundsKernel, 1); setPeriodicBoxArgs(cl, blockBoundsKernel, 1);
cl.executeKernel(blockBoundsKernel, (numRealParticles+31)/32); cl.executeKernel(blockBoundsKernel, (numRealParticles+31)/32);
if (nonbondedMethod != GayBerneForce::NoCutoff) { if (nonbondedMethod != GayBerneForce::NoCutoff) {
setPeriodicBoxArgs(cl, neighborsKernel, 2);
cl.executeKernel(neighborsKernel, numRealParticles);
} }
cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize()); cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize());
cl.executeKernel(torqueKernel, numRealParticles);
return 0.0; return 0.0;
} }
...@@ -6231,10 +6244,7 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context ...@@ -6231,10 +6244,7 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
else if (epsilon != 0.0) else if (epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed"); throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
} }
int numContexts = cl.getPlatformData().contexts.size(); int numExceptions = exceptionAtoms.size();
int startIndex = cl.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cl.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
// Record the per-particle parameters. // Record the per-particle parameters.
...@@ -6263,14 +6273,14 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context ...@@ -6263,14 +6273,14 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
// Record the exceptions. // Record the exceptions.
if (numExceptions > 0) { if (numExceptions > 0) {
vector<vector<int> > atoms(numExceptions, vector<int>(2)); vector<mm_float2> exceptionParamsVec(numExceptions);
vector<mm_float2> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) { for (int i = 0; i < numExceptions; i++) {
int atom1, atom2;
double sigma, epsilon; double sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], sigma, epsilon); force.getExceptionParameters(exceptions[i], atom1, atom2, sigma, epsilon);
exceptionParamsVector[i] = mm_float2((float) sigma, (float) (4.0*epsilon)); exceptionParamsVec[i] = mm_float2((float) sigma, (float) epsilon);
} }
exceptionParams->upload(exceptionParamsVector); exceptionParams->upload(exceptionParamsVec);
} }
cl.invalidateMolecules(); cl.invalidateMolecules();
sortAtoms(); sortAtoms();
...@@ -6283,13 +6293,26 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() { ...@@ -6283,13 +6293,26 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
int nextIndex = 0; int nextIndex = 0;
vector<cl_int> particles(cl.getPaddedNumAtoms(), 0); vector<cl_int> particles(cl.getPaddedNumAtoms(), 0);
const vector<int>& order = cl.getAtomIndex(); const vector<int>& order = cl.getAtomIndex();
vector<int> inverseOrder(order.size(), -1);
for (int i = 0; i < cl.getNumAtoms(); i++) { for (int i = 0; i < cl.getNumAtoms(); i++) {
int atom = order[i]; int atom = order[i];
if (isRealParticle[atom]) if (isRealParticle[atom]) {
inverseOrder[atom] = nextIndex;
particles[nextIndex++] = atom; particles[nextIndex++] = atom;
}
} }
sortedParticles->upload(particles); sortedParticles->upload(particles);
// Update the list of exception particles.
int numExceptions = exceptionAtoms.size();
if (numExceptions > 0) {
vector<mm_int4> exceptionParticlesVec(numExceptions);
for (int i = 0; i < numExceptions; i++)
exceptionParticlesVec[i] = mm_int4(exceptionAtoms[i].first, exceptionAtoms[i].second, inverseOrder[exceptionAtoms[i].first], inverseOrder[exceptionAtoms[i].second]);
exceptionParticles->upload(exceptionParticlesVec);
}
// Rebuild the list of exclusions. // Rebuild the list of exclusions.
vector<vector<int> > excludedAtoms(numRealParticles); vector<vector<int> > excludedAtoms(numRealParticles);
......
...@@ -71,7 +71,7 @@ __kernel void computeEllipsoidFrames(int numParticles, __global const real4* res ...@@ -71,7 +71,7 @@ __kernel void computeEllipsoidFrames(int numParticles, __global const real4* res
*/ */
__kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global const int* sortedAtoms, __global const real4* restrict posq, __global real4* restrict sortedPos, __global real4* restrict blockCenter, __global const int* sortedAtoms, __global const real4* restrict posq, __global real4* restrict sortedPos, __global real4* restrict blockCenter,
__global real4* restrict blockBoundingBox) { __global real4* restrict blockBoundingBox, __global int* restrict neighborBlockCount) {
int index = get_global_id(0); int index = get_global_id(0);
int base = index*TILE_SIZE; int base = index*TILE_SIZE;
while (base < numAtoms) { while (base < numAtoms) {
...@@ -99,14 +99,16 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri ...@@ -99,14 +99,16 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
index += get_global_size(0); index += get_global_size(0);
base = index*TILE_SIZE; base = index*TILE_SIZE;
} }
if (get_global_id(0) == 0)
*neighborBlockCount = 0;
} }
/** /**
* Build a list of neighbors for each atom. * Build a list of neighbors for each atom.
*/ */
__kernel void findNeighbors(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, __kernel void findNeighbors(int numAtoms, int maxNeighborBlocks, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global const int* sortedAtoms, __global const real4* restrict posq, __global real4* restrict sortedPos, __global real4* restrict blockCenter, __global real4* restrict sortedPos, __global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict neighbors,
__global real4* restrict blockBoundingBox) { __global int2* restrict neighborIndex, __global int* restrict neighborBlockCount) {
} }
typedef struct { typedef struct {
...@@ -281,10 +283,11 @@ __kernel void computeForce( ...@@ -281,10 +283,11 @@ __kernel void computeForce(
#else #else
__global real4* restrict forceBuffers, __global real4* restrict torqueBuffers, __global real4* restrict forceBuffers, __global real4* restrict torqueBuffers,
#endif #endif
int numAtoms, __global mixed* restrict energyBuffer, __global const real4* restrict pos, __global const float4* restrict sigParams, int numAtoms, int numExceptions, __global mixed* restrict energyBuffer, __global const real4* restrict pos,
__global const float2* restrict epsParams, __global const int* restrict sortedAtoms, __global const real* restrict aMatrix, __global const float4* restrict sigParams, __global const float2* restrict epsParams, __global const int* restrict sortedAtoms,
__global const real* restrict bMatrix, __global const real* restrict gMatrix, __global const int* restrict exclusions, __global const real* restrict aMatrix, __global const real* restrict bMatrix, __global const real* restrict gMatrix,
__global const int* restrict exclusionStartIndex __global const int* restrict exclusions, __global const int* restrict exclusionStartIndex,
__global const int4* restrict exceptionParticles, __global const float2* restrict exceptionParams
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...@@ -306,14 +309,14 @@ __kernel void computeForce( ...@@ -306,14 +309,14 @@ __kernel void computeForce(
for (int atom2 = atom1+1; atom2 < numAtoms; atom2++) { for (int atom2 = atom1+1; atom2 < numAtoms; atom2++) {
// Skip over excluded interactions. // Skip over excluded interactions.
if (nextExclusion < lastExclusion && exclusions[nextExclusion] == atom2) { int index2 = sortedAtoms[atom2];
if (nextExclusion < lastExclusion && exclusions[nextExclusion] == index2) {
nextExclusion++; nextExclusion++;
continue; continue;
} }
// Load parameters for atom2. // Load parameters for atom2.
int index2 = sortedAtoms[atom2];
AtomData data2; AtomData data2;
loadAtomData(&data2, atom2, index2, pos, sigParams, epsParams, aMatrix, bMatrix, gMatrix); loadAtomData(&data2, atom2, index2, pos, sigParams, epsParams, aMatrix, bMatrix, gMatrix);
real3 force2 = 0.0f; real3 force2 = 0.0f;
...@@ -361,5 +364,118 @@ __kernel void computeForce( ...@@ -361,5 +364,118 @@ __kernel void computeForce(
torqueBuffers[offset].xyz += torque1.xyz; torqueBuffers[offset].xyz += torque1.xyz;
#endif #endif
} }
// Now compute exceptions.
for (int index = get_global_id(0); index < numExceptions; index += get_global_size(0)) {
int4 atomIndices = exceptionParticles[index];
real2 params = exceptionParams[index];
int index1 = atomIndices.x, index2 = atomIndices.y;
int atom1 = atomIndices.z, atom2 = atomIndices.w;
AtomData data1, data2;
loadAtomData(&data1, atom1, index1, pos, sigParams, epsParams, aMatrix, bMatrix, gMatrix);
loadAtomData(&data2, atom2, index2, pos, sigParams, epsParams, aMatrix, bMatrix, gMatrix);
real3 force1 = 0, force2 = 0;
real3 torque1 = 0, torque2 = 0;
real3 delta = data1.pos-data2.pos;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
computeOneInteraction(&data1, &data2, params.x, params.y, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[index1], (long) (force1.x*0x100000000));
atom_add(&forceBuffers[index1+PADDED_NUM_ATOMS], (long) (force1.y*0x100000000));
atom_add(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (long) (force1.z*0x100000000));
atom_add(&forceBuffers[index2], (long) (force2.x*0x100000000));
atom_add(&forceBuffers[index2+PADDED_NUM_ATOMS], (long) (force2.y*0x100000000));
atom_add(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (long) (force2.z*0x100000000));
atom_add(&torqueBuffers[index1], (long) (torque1.x*0x100000000));
atom_add(&torqueBuffers[index1+PADDED_NUM_ATOMS], (long) (torque1.y*0x100000000));
atom_add(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (long) (torque1.z*0x100000000));
atom_add(&torqueBuffers[index2], (long) (torque2.x*0x100000000));
atom_add(&torqueBuffers[index2+PADDED_NUM_ATOMS], (long) (torque2.y*0x100000000));
atom_add(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (long) (torque2.z*0x100000000));
#else
int offset = index1 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force1.xyz;
torqueBuffers[offset].xyz += torque1.xyz;
offset = index2 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force2.xyz;
torqueBuffers[offset].xyz += torque2.xyz;
#endif
#ifdef USE_CUTOFF
}
#endif
}
energyBuffer[get_global_id(0)] += energy; energyBuffer[get_global_id(0)] += energy;
} }
/**
* Convert the torques to forces on the connected particles.
*/
__kernel void applyTorques(
#ifdef SUPPORTS_64_BIT_ATOMICS
__global long* restrict forceBuffers, __global long* restrict torqueBuffers,
#else
__global real4* restrict forceBuffers, __global real4* restrict torqueBuffers, int numBuffers,
#endif
int numParticles, __global const real4* restrict posq, __global int2* const restrict axisParticleIndices,
__global const int* sortedParticles) {
const unsigned int warp = get_global_id(0)/TILE_SIZE;
for (int sortedIndex = get_global_id(0); sortedIndex < numParticles; sortedIndex += get_global_size(0)) {
int originalIndex = sortedParticles[sortedIndex];
real3 pos = posq[originalIndex].xyz;
int2 axisParticles = axisParticleIndices[originalIndex];
if (axisParticles.x != -1) {
// Load the torque.
#ifdef SUPPORTS_64_BIT_ATOMICS
real scale = 1/(real) 0x100000000;
real3 torque = (real3) (scale*torqueBuffers[originalIndex], scale*torqueBuffers[originalIndex+PADDED_NUM_ATOMS], scale*torqueBuffers[originalIndex+2*PADDED_NUM_ATOMS]);
#else
real3 torque = 0;
for (int i = 0; i < numBuffers; i++)
torque += torqueBuffers[originalIndex+i*PADDED_NUM_ATOMS].xyz;
#endif
real3 force = 0, xforce = 0, yforce = 0;
// Apply a force to the x particle.
real3 dx = posq[axisParticles.x].xyz-pos;
real dx2 = dot(dx, dx);
real3 f = cross(torque, dx)/dx2;
xforce += f;
force -= f;
if (axisParticles.y != -1) {
// Apply a force to the y particle. This is based on the component of the torque
// that was not already applied to the x particle.
real3 dy = posq[axisParticles.y].xyz-pos;
real dy2 = dot(dy, dy);
real3 torque2 = dx*dot(torque, dx)/dx2;
f = cross(torque2, dy)/dy2;
yforce += f;
force -= f;
}
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[originalIndex], (long) (force.x*0x100000000));
atom_add(&forceBuffers[originalIndex+PADDED_NUM_ATOMS], (long) (force.y*0x100000000));
atom_add(&forceBuffers[originalIndex+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000));
atom_add(&forceBuffers[axisParticles.x], (long) (xforce.x*0x100000000));
atom_add(&forceBuffers[axisParticles.x+PADDED_NUM_ATOMS], (long) (xforce.y*0x100000000));
atom_add(&forceBuffers[axisParticles.x+2*PADDED_NUM_ATOMS], (long) (xforce.z*0x100000000));
if (axisParticles.y != -1) {
atom_add(&forceBuffers[axisParticles.y], (long) (yforce.x*0x100000000));
atom_add(&forceBuffers[axisParticles.y+PADDED_NUM_ATOMS], (long) (yforce.y*0x100000000));
atom_add(&forceBuffers[axisParticles.y+2*PADDED_NUM_ATOMS], (long) (yforce.z*0x100000000));
}
#else
forceBuffers[originalIndex + warp*PADDED_NUM_ATOMS].xyz += force.xyz;
forceBuffers[axisParticles.x + warp*PADDED_NUM_ATOMS].xyz += xforce.xyz;
if (axisParticles.y != -1)
forceBuffers[axisParticles.y + warp*PADDED_NUM_ATOMS].xyz += yforce.xyz;
#endif
}
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment