Commit 522e133e authored by peastman's avatar peastman
Browse files

Continuing OpenCL implementation of GayBerneForce

parent 69cab567
......@@ -1080,9 +1080,10 @@ private:
class OpenCLCalcGayBerneForceKernel : public CalcGayBerneForceKernel {
public:
OpenCLCalcGayBerneForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGayBerneForceKernel(name, platform), cl(cl),
hasInitializedKernels(false), sortedParticles(NULL), axisParticleIndices(NULL), sigParams(NULL), epsParams(NULL), scale(NULL), aMatrix(NULL),
bMatrix(NULL), gMatrix(NULL), exclusions(NULL), exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), sortedPos(NULL),
torque(NULL) {
hasInitializedKernels(false), sortedParticles(NULL), axisParticleIndices(NULL), sigParams(NULL), epsParams(NULL), scale(NULL), exceptionParticles(NULL),
exceptionParams(NULL), aMatrix(NULL),
bMatrix(NULL), gMatrix(NULL), exclusions(NULL), exclusionStartIndex(NULL), blockCenter(NULL), blockBoundingBox(NULL), neighbors(NULL),
neighborIndex(NULL), neighborBlockCount(NULL), sortedPos(NULL), torque(NULL) {
}
~OpenCLCalcGayBerneForceKernel();
/**
......@@ -1112,7 +1113,7 @@ private:
void sortAtoms();
OpenCLContext& cl;
bool hasInitializedKernels;
int numRealParticles;
int numRealParticles, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod;
OpenCLArray* sortedParticles;
OpenCLArray* axisParticleIndices;
......@@ -1128,12 +1129,15 @@ private:
OpenCLArray* exclusionStartIndex;
OpenCLArray* blockCenter;
OpenCLArray* blockBoundingBox;
OpenCLArray* neighbors;
OpenCLArray* neighborIndex;
OpenCLArray* neighborBlockCount;
OpenCLArray* sortedPos;
OpenCLArray* torque;
std::vector<bool> isRealParticle;
std::vector<std::pair<int, int> > exceptionAtoms;
std::vector<std::pair<int, int> > excludedPairs;
cl::Kernel framesKernel, blockBoundsKernel, neighborsKernel, forceKernel;
cl::Kernel framesKernel, blockBoundsKernel, neighborsKernel, forceKernel, torqueKernel;
};
/**
......
......@@ -6038,6 +6038,12 @@ OpenCLCalcGayBerneForceKernel::~OpenCLCalcGayBerneForceKernel() {
delete blockCenter;
if (blockBoundingBox != NULL)
delete blockBoundingBox;
if (neighbors != NULL)
delete neighbors;
if (neighborIndex != NULL)
delete neighborIndex;
if (neighborBlockCount != NULL)
delete neighborBlockCount;
if (sortedPos != NULL)
delete sortedPos;
if (torque != NULL)
......@@ -6062,8 +6068,6 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
vector<mm_float4> scaleVector(cl.getPaddedNumAtoms(), mm_float4(0, 0, 0, 0));
vector<mm_int2> axisParticleVector(cl.getPaddedNumAtoms(), mm_int2(0, 0));
isRealParticle.resize(cl.getPaddedNumAtoms());
vector<vector<int> > exclusionList(numParticles);
numRealParticles = 0;
for (int i = 0; i < numParticles; i++) {
int xparticle, yparticle;
double sigma, epsilon, sx, sy, sz, ex, ey, ez;
......@@ -6072,19 +6076,41 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
sigParamsVector[i] = mm_float4((float) (0.5*sigma), (float) (0.25*sx*sx), (float) (0.25*sy*sy), (float) (0.25*sz*sz));
epsParamsVector[i] = mm_float2((float) sqrt(epsilon), (float) (0.125*(sx*sy + sz*sz)*sqrt(sx*sy)));
scaleVector[i] = mm_float4((float) (1/sqrt(ex)), (float) (1/sqrt(ey)), (float) (1/sqrt(ez)), 0);
if (epsilon != 0.0) {
numRealParticles++;
isRealParticle[i] = true;
}
else
isRealParticle[i] = false;
exclusionList[i].push_back(i);
isRealParticle[i] = (epsilon != 0.0);
}
sigParams->upload(sigParamsVector);
epsParams->upload(epsParamsVector);
scale->upload(scaleVector);
axisParticleIndices->upload(axisParticleVector);
// Record exceptions and exclusions.
vector<mm_float2> exceptionParamsVec;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, sigma, epsilon);
if (epsilon != 0.0) {
exceptionParamsVec.push_back(mm_float2((float) sigma, (float) epsilon));
exceptionAtoms.push_back(make_pair(particle1, particle2));
isRealParticle[particle1] = true;
isRealParticle[particle2] = true;
}
if (isRealParticle[particle1] && isRealParticle[particle2])
excludedPairs.push_back(pair<int, int>(particle1, particle2));
}
numRealParticles = 0;
for (int i = 0; i < isRealParticle.size(); i++)
if (isRealParticle[i])
numRealParticles++;
int numExceptions = exceptionParamsVec.size();
exclusions = OpenCLArray::create<cl_int>(cl, max(1, (int) excludedPairs.size()), "exclusions");
exclusionStartIndex = OpenCLArray::create<cl_int>(cl, numRealParticles+1, "exclusionStartIndex");
exceptionParticles = OpenCLArray::create<mm_int4>(cl, max(1, numExceptions), "exceptionParticles");
exceptionParams = OpenCLArray::create<mm_float2>(cl, max(1, numExceptions), "exceptionParams");
if (numExceptions > 0)
exceptionParams->upload(exceptionParamsVec);
// Create data structures used for the neighbor list.
int numAtomBlocks = (numRealParticles+31)/32;
......@@ -6092,6 +6118,10 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
blockCenter = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockCenter");
blockBoundingBox = new OpenCLArray(cl, numAtomBlocks, 4*elementSize, "blockBoundingBox");
sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos");
maxNeighborBlocks = numRealParticles*2;
neighbors = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks*32, "neighbors");
neighborIndex = OpenCLArray::create<mm_int2>(cl, maxNeighborBlocks, "neighbors");
neighborBlockCount = OpenCLArray::create<cl_int>(cl, 1, "neighborBlockCount");
// Create arrays for accumulating torques.
......@@ -6126,48 +6156,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
blockBoundsKernel = cl::Kernel(program, "findBlockBounds");
neighborsKernel = cl::Kernel(program, "findNeighbors");
forceKernel = cl::Kernel(program, "computeForce");
// Add the interaction to the default nonbonded kernel.
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
// cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("sigParams", "float", 4, sizeof(cl_float4), sigParams->getDeviceBuffer()));
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo("epsParams", "float", 2, sizeof(cl_float2), epsParams->getDeviceBuffer()));
// Record exceptions and exclusions.
vector<int> exceptions;
for (int i = 0; i < force.getNumExceptions(); i++) {
int particle1, particle2;
double sigma, epsilon;
force.getExceptionParameters(i, particle1, particle2, sigma, epsilon);
if (isRealParticle[particle1] && isRealParticle[particle2])
excludedPairs.push_back(pair<int, int>(particle1, particle2));
if (epsilon != 0.0)
exceptions.push_back(i);
}
exclusions = OpenCLArray::create<cl_int>(cl, max(1, (int) excludedPairs.size()), "exclusions");
exclusionStartIndex = OpenCLArray::create<cl_int>(cl, numRealParticles+1, "exclusionStartIndex");
// Initialize the exceptions.
int numExceptions = exceptions.size();
if (numExceptions > 0) {
exceptionAtoms.resize(numExceptions);
vector<vector<int> > atoms(numExceptions, vector<int>(2));
exceptionParams = OpenCLArray::create<mm_float2>(cl, numExceptions, "exceptionParams");
vector<mm_float2> exceptionParamsVector(numExceptions);
for (int i = 0; i < numExceptions; i++) {
double sigma, epsilon;
force.getExceptionParameters(exceptions[i], atoms[i][0], atoms[i][1], sigma, epsilon);
exceptionParamsVector[i] = mm_float2((float) sigma, (float) (4.0*epsilon));
exceptionAtoms[i] = make_pair(atoms[i][0], atoms[i][1]);
}
exceptionParams->upload(exceptionParamsVector);
map<string, string> replacements;
replacements["PARAMS"] = cl.getBondedUtilities().addArgument(exceptionParams->getDeviceBuffer(), "float2");
// cl.getBondedUtilities().addInteraction(atoms, cl.replaceStrings(OpenCLKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
torqueKernel = cl::Kernel(program, "applyTorques");
cl.addForce(new OpenCLGayBerneForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
cl.addReorderListener(new ReorderListener(*this));
}
......@@ -6191,11 +6180,21 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
blockBoundsKernel.setArg<cl::Buffer>(8, sortedPos->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(9, blockCenter->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(10, blockBoundingBox->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(11, neighborBlockCount->getDeviceBuffer());
neighborsKernel.setArg<cl_int>(0, numRealParticles);
neighborsKernel.setArg<cl_int>(1, maxNeighborBlocks);
neighborsKernel.setArg<cl::Buffer>(7, sortedPos->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(8, blockCenter->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(9, blockBoundingBox->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(10, neighbors->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(12, neighborBlockCount->getDeviceBuffer());
bool useLong = cl.getSupports64BitGlobalAtomics();
int index = 0;
forceKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
forceKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
forceKernel.setArg<cl_int>(index++, numRealParticles);
forceKernel.setArg<cl_int>(index++, exceptionAtoms.size());
forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sortedPos->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, sigParams->getDeviceBuffer());
......@@ -6206,13 +6205,27 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
forceKernel.setArg<cl::Buffer>(index++, gMatrix->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusions->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer());
}
cl.executeKernel(framesKernel, cl.getNumAtoms());
forceKernel.setArg<cl::Buffer>(index++, exceptionParticles->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exceptionParams->getDeviceBuffer());
index = 0;
torqueKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
torqueKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
if (!useLong)
torqueKernel.setArg<cl_int>(index++, cl.getNumForceBuffers());
torqueKernel.setArg<cl_int>(index++, numRealParticles);
torqueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, axisParticleIndices->getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, sortedParticles->getDeviceBuffer());
}
cl.executeKernel(framesKernel, numRealParticles);
setPeriodicBoxArgs(cl, blockBoundsKernel, 1);
cl.executeKernel(blockBoundsKernel, (numRealParticles+31)/32);
if (nonbondedMethod != GayBerneForce::NoCutoff) {
setPeriodicBoxArgs(cl, neighborsKernel, 2);
cl.executeKernel(neighborsKernel, numRealParticles);
}
cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize());
cl.executeKernel(torqueKernel, numRealParticles);
return 0.0;
}
......@@ -6231,10 +6244,7 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
else if (epsilon != 0.0)
throw OpenMMException("updateParametersInContext: The set of non-excluded exceptions has changed");
}
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*exceptions.size()/numContexts;
int endIndex = (cl.getContextIndex()+1)*exceptions.size()/numContexts;
int numExceptions = endIndex-startIndex;
int numExceptions = exceptionAtoms.size();
// Record the per-particle parameters.
......@@ -6263,14 +6273,14 @@ void OpenCLCalcGayBerneForceKernel::copyParametersToContext(ContextImpl& context
// Record the exceptions.
if (numExceptions > 0) {
vector<vector<int> > atoms(numExceptions, vector<int>(2));
vector<mm_float2> exceptionParamsVector(numExceptions);
vector<mm_float2> exceptionParamsVec(numExceptions);
for (int i = 0; i < numExceptions; i++) {
int atom1, atom2;
double sigma, epsilon;
force.getExceptionParameters(exceptions[startIndex+i], atoms[i][0], atoms[i][1], sigma, epsilon);
exceptionParamsVector[i] = mm_float2((float) sigma, (float) (4.0*epsilon));
force.getExceptionParameters(exceptions[i], atom1, atom2, sigma, epsilon);
exceptionParamsVec[i] = mm_float2((float) sigma, (float) epsilon);
}
exceptionParams->upload(exceptionParamsVector);
exceptionParams->upload(exceptionParamsVec);
}
cl.invalidateMolecules();
sortAtoms();
......@@ -6283,13 +6293,26 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
int nextIndex = 0;
vector<cl_int> particles(cl.getPaddedNumAtoms(), 0);
const vector<int>& order = cl.getAtomIndex();
vector<int> inverseOrder(order.size(), -1);
for (int i = 0; i < cl.getNumAtoms(); i++) {
int atom = order[i];
if (isRealParticle[atom])
if (isRealParticle[atom]) {
inverseOrder[atom] = nextIndex;
particles[nextIndex++] = atom;
}
}
sortedParticles->upload(particles);
// Update the list of exception particles.
int numExceptions = exceptionAtoms.size();
if (numExceptions > 0) {
vector<mm_int4> exceptionParticlesVec(numExceptions);
for (int i = 0; i < numExceptions; i++)
exceptionParticlesVec[i] = mm_int4(exceptionAtoms[i].first, exceptionAtoms[i].second, inverseOrder[exceptionAtoms[i].first], inverseOrder[exceptionAtoms[i].second]);
exceptionParticles->upload(exceptionParticlesVec);
}
// Rebuild the list of exclusions.
vector<vector<int> > excludedAtoms(numRealParticles);
......
......@@ -71,7 +71,7 @@ __kernel void computeEllipsoidFrames(int numParticles, __global const real4* res
*/
__kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global const int* sortedAtoms, __global const real4* restrict posq, __global real4* restrict sortedPos, __global real4* restrict blockCenter,
__global real4* restrict blockBoundingBox) {
__global real4* restrict blockBoundingBox, __global int* restrict neighborBlockCount) {
int index = get_global_id(0);
int base = index*TILE_SIZE;
while (base < numAtoms) {
......@@ -99,14 +99,16 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
index += get_global_size(0);
base = index*TILE_SIZE;
}
if (get_global_id(0) == 0)
*neighborBlockCount = 0;
}
/**
* Build a list of neighbors for each atom.
*/
__kernel void findNeighbors(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global const int* sortedAtoms, __global const real4* restrict posq, __global real4* restrict sortedPos, __global real4* restrict blockCenter,
__global real4* restrict blockBoundingBox) {
__kernel void findNeighbors(int numAtoms, int maxNeighborBlocks, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global real4* restrict sortedPos, __global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict neighbors,
__global int2* restrict neighborIndex, __global int* restrict neighborBlockCount) {
}
typedef struct {
......@@ -281,10 +283,11 @@ __kernel void computeForce(
#else
__global real4* restrict forceBuffers, __global real4* restrict torqueBuffers,
#endif
int numAtoms, __global mixed* restrict energyBuffer, __global const real4* restrict pos, __global const float4* restrict sigParams,
__global const float2* restrict epsParams, __global const int* restrict sortedAtoms, __global const real* restrict aMatrix,
__global const real* restrict bMatrix, __global const real* restrict gMatrix, __global const int* restrict exclusions,
__global const int* restrict exclusionStartIndex
int numAtoms, int numExceptions, __global mixed* restrict energyBuffer, __global const real4* restrict pos,
__global const float4* restrict sigParams, __global const float2* restrict epsParams, __global const int* restrict sortedAtoms,
__global const real* restrict aMatrix, __global const real* restrict bMatrix, __global const real* restrict gMatrix,
__global const int* restrict exclusions, __global const int* restrict exclusionStartIndex,
__global const int4* restrict exceptionParticles, __global const float2* restrict exceptionParams
#ifdef USE_CUTOFF
__global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
......@@ -306,14 +309,14 @@ __kernel void computeForce(
for (int atom2 = atom1+1; atom2 < numAtoms; atom2++) {
// Skip over excluded interactions.
if (nextExclusion < lastExclusion && exclusions[nextExclusion] == atom2) {
int index2 = sortedAtoms[atom2];
if (nextExclusion < lastExclusion && exclusions[nextExclusion] == index2) {
nextExclusion++;
continue;
}
// Load parameters for atom2.
int index2 = sortedAtoms[atom2];
AtomData data2;
loadAtomData(&data2, atom2, index2, pos, sigParams, epsParams, aMatrix, bMatrix, gMatrix);
real3 force2 = 0.0f;
......@@ -361,5 +364,118 @@ __kernel void computeForce(
torqueBuffers[offset].xyz += torque1.xyz;
#endif
}
// Now compute exceptions.
for (int index = get_global_id(0); index < numExceptions; index += get_global_size(0)) {
int4 atomIndices = exceptionParticles[index];
real2 params = exceptionParams[index];
int index1 = atomIndices.x, index2 = atomIndices.y;
int atom1 = atomIndices.z, atom2 = atomIndices.w;
AtomData data1, data2;
loadAtomData(&data1, atom1, index1, pos, sigParams, epsParams, aMatrix, bMatrix, gMatrix);
loadAtomData(&data2, atom2, index2, pos, sigParams, epsParams, aMatrix, bMatrix, gMatrix);
real3 force1 = 0, force2 = 0;
real3 torque1 = 0, torque2 = 0;
real3 delta = data1.pos-data2.pos;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
computeOneInteraction(&data1, &data2, params.x, params.y, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[index1], (long) (force1.x*0x100000000));
atom_add(&forceBuffers[index1+PADDED_NUM_ATOMS], (long) (force1.y*0x100000000));
atom_add(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (long) (force1.z*0x100000000));
atom_add(&forceBuffers[index2], (long) (force2.x*0x100000000));
atom_add(&forceBuffers[index2+PADDED_NUM_ATOMS], (long) (force2.y*0x100000000));
atom_add(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (long) (force2.z*0x100000000));
atom_add(&torqueBuffers[index1], (long) (torque1.x*0x100000000));
atom_add(&torqueBuffers[index1+PADDED_NUM_ATOMS], (long) (torque1.y*0x100000000));
atom_add(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (long) (torque1.z*0x100000000));
atom_add(&torqueBuffers[index2], (long) (torque2.x*0x100000000));
atom_add(&torqueBuffers[index2+PADDED_NUM_ATOMS], (long) (torque2.y*0x100000000));
atom_add(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (long) (torque2.z*0x100000000));
#else
int offset = index1 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force1.xyz;
torqueBuffers[offset].xyz += torque1.xyz;
offset = index2 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force2.xyz;
torqueBuffers[offset].xyz += torque2.xyz;
#endif
#ifdef USE_CUTOFF
}
#endif
}
energyBuffer[get_global_id(0)] += energy;
}
/**
* Convert the torques to forces on the connected particles.
*/
__kernel void applyTorques(
#ifdef SUPPORTS_64_BIT_ATOMICS
__global long* restrict forceBuffers, __global long* restrict torqueBuffers,
#else
__global real4* restrict forceBuffers, __global real4* restrict torqueBuffers, int numBuffers,
#endif
int numParticles, __global const real4* restrict posq, __global int2* const restrict axisParticleIndices,
__global const int* sortedParticles) {
const unsigned int warp = get_global_id(0)/TILE_SIZE;
for (int sortedIndex = get_global_id(0); sortedIndex < numParticles; sortedIndex += get_global_size(0)) {
int originalIndex = sortedParticles[sortedIndex];
real3 pos = posq[originalIndex].xyz;
int2 axisParticles = axisParticleIndices[originalIndex];
if (axisParticles.x != -1) {
// Load the torque.
#ifdef SUPPORTS_64_BIT_ATOMICS
real scale = 1/(real) 0x100000000;
real3 torque = (real3) (scale*torqueBuffers[originalIndex], scale*torqueBuffers[originalIndex+PADDED_NUM_ATOMS], scale*torqueBuffers[originalIndex+2*PADDED_NUM_ATOMS]);
#else
real3 torque = 0;
for (int i = 0; i < numBuffers; i++)
torque += torqueBuffers[originalIndex+i*PADDED_NUM_ATOMS].xyz;
#endif
real3 force = 0, xforce = 0, yforce = 0;
// Apply a force to the x particle.
real3 dx = posq[axisParticles.x].xyz-pos;
real dx2 = dot(dx, dx);
real3 f = cross(torque, dx)/dx2;
xforce += f;
force -= f;
if (axisParticles.y != -1) {
// Apply a force to the y particle. This is based on the component of the torque
// that was not already applied to the x particle.
real3 dy = posq[axisParticles.y].xyz-pos;
real dy2 = dot(dy, dy);
real3 torque2 = dx*dot(torque, dx)/dx2;
f = cross(torque2, dy)/dy2;
yforce += f;
force -= f;
}
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[originalIndex], (long) (force.x*0x100000000));
atom_add(&forceBuffers[originalIndex+PADDED_NUM_ATOMS], (long) (force.y*0x100000000));
atom_add(&forceBuffers[originalIndex+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000));
atom_add(&forceBuffers[axisParticles.x], (long) (xforce.x*0x100000000));
atom_add(&forceBuffers[axisParticles.x+PADDED_NUM_ATOMS], (long) (xforce.y*0x100000000));
atom_add(&forceBuffers[axisParticles.x+2*PADDED_NUM_ATOMS], (long) (xforce.z*0x100000000));
if (axisParticles.y != -1) {
atom_add(&forceBuffers[axisParticles.y], (long) (yforce.x*0x100000000));
atom_add(&forceBuffers[axisParticles.y+PADDED_NUM_ATOMS], (long) (yforce.y*0x100000000));
atom_add(&forceBuffers[axisParticles.y+2*PADDED_NUM_ATOMS], (long) (yforce.z*0x100000000));
}
#else
forceBuffers[originalIndex + warp*PADDED_NUM_ATOMS].xyz += force.xyz;
forceBuffers[axisParticles.x + warp*PADDED_NUM_ATOMS].xyz += xforce.xyz;
if (axisParticles.y != -1)
forceBuffers[axisParticles.y + warp*PADDED_NUM_ATOMS].xyz += yforce.xyz;
#endif
}
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment