Commit 3250fb43 authored by peastman's avatar peastman
Browse files

Continuing OpenCL implementation of GayBerneForce

parent 522e133e
...@@ -6120,7 +6120,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe ...@@ -6120,7 +6120,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos"); sortedPos = new OpenCLArray(cl, numRealParticles, 4*elementSize, "sortedPos");
maxNeighborBlocks = numRealParticles*2; maxNeighborBlocks = numRealParticles*2;
neighbors = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks*32, "neighbors"); neighbors = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks*32, "neighbors");
neighborIndex = OpenCLArray::create<mm_int2>(cl, maxNeighborBlocks, "neighbors"); neighborIndex = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks, "neighbors");
neighborBlockCount = OpenCLArray::create<cl_int>(cl, 1, "neighborBlockCount"); neighborBlockCount = OpenCLArray::create<cl_int>(cl, 1, "neighborBlockCount");
// Create arrays for accumulating torques. // Create arrays for accumulating torques.
...@@ -6128,7 +6128,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe ...@@ -6128,7 +6128,7 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
if (cl.getSupports64BitGlobalAtomics()) if (cl.getSupports64BitGlobalAtomics())
torque = OpenCLArray::create<cl_long>(cl, 3*cl.getPaddedNumAtoms(), "torque"); torque = OpenCLArray::create<cl_long>(cl, 3*cl.getPaddedNumAtoms(), "torque");
else else
torque = new OpenCLArray(cl, cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), elementSize, "torque"); torque = new OpenCLArray(cl, cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), 4*elementSize, "torque");
cl.addAutoclearBuffer(*torque); cl.addAutoclearBuffer(*torque);
// Create the kernels. // Create the kernels.
...@@ -6138,14 +6138,20 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe ...@@ -6138,14 +6138,20 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
bool usePeriodic = (nonbondedMethod == GayBerneForce::CutoffPeriodic); bool usePeriodic = (nonbondedMethod == GayBerneForce::CutoffPeriodic);
map<string, string> defines; map<string, string> defines;
defines["USE_SWITCH"] = (useCutoff && force.getUseSwitchingFunction() ? "1" : "0"); defines["USE_SWITCH"] = (useCutoff && force.getUseSwitchingFunction() ? "1" : "0");
double cutoff = force.getCutoffDistance();
defines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
if (useCutoff) { if (useCutoff) {
defines["USE_CUTOFF"] = 1;
if (usePeriodic)
defines["USE_PERIODIC"] = "1";
// Compute the switching coefficients. // Compute the switching coefficients.
if (force.getUseSwitchingFunction()) { if (force.getUseSwitchingFunction()) {
defines["SWITCH_CUTOFF"] = cl.doubleToString(force.getSwitchingDistance()); defines["SWITCH_CUTOFF"] = cl.doubleToString(force.getSwitchingDistance());
defines["SWITCH_C3"] = cl.doubleToString(10/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 3.0)); defines["SWITCH_C3"] = cl.doubleToString(10/pow(force.getSwitchingDistance()-cutoff, 3.0));
defines["SWITCH_C4"] = cl.doubleToString(15/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 4.0)); defines["SWITCH_C4"] = cl.doubleToString(15/pow(force.getSwitchingDistance()-cutoff, 4.0));
defines["SWITCH_C5"] = cl.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0)); defines["SWITCH_C5"] = cl.doubleToString(6/pow(force.getSwitchingDistance()-cutoff, 5.0));
} }
} }
if (!cl.getSupports64BitGlobalAtomics()) { if (!cl.getSupports64BitGlobalAtomics()) {
...@@ -6189,6 +6195,8 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include ...@@ -6189,6 +6195,8 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
neighborsKernel.setArg<cl::Buffer>(10, neighbors->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(10, neighbors->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(11, neighborIndex->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(11, neighborIndex->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(12, neighborBlockCount->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(12, neighborBlockCount->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(13, exclusions->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(14, exclusionStartIndex->getDeviceBuffer());
bool useLong = cl.getSupports64BitGlobalAtomics(); bool useLong = cl.getSupports64BitGlobalAtomics();
int index = 0; int index = 0;
forceKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer())); forceKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
...@@ -6207,6 +6215,12 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include ...@@ -6207,6 +6215,12 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, exclusionStartIndex->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exceptionParticles->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, exceptionParticles->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, exceptionParams->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, exceptionParams->getDeviceBuffer());
if (nonbondedMethod != GayBerneForce::NoCutoff) {
forceKernel.setArg<cl_int>(index++, maxNeighborBlocks);
forceKernel.setArg<cl::Buffer>(index++, neighbors->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborIndex->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborBlockCount->getDeviceBuffer());
}
index = 0; index = 0;
torqueKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer())); torqueKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
torqueKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer()); torqueKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
...@@ -6223,6 +6237,7 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include ...@@ -6223,6 +6237,7 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
if (nonbondedMethod != GayBerneForce::NoCutoff) { if (nonbondedMethod != GayBerneForce::NoCutoff) {
setPeriodicBoxArgs(cl, neighborsKernel, 2); setPeriodicBoxArgs(cl, neighborsKernel, 2);
cl.executeKernel(neighborsKernel, numRealParticles); cl.executeKernel(neighborsKernel, numRealParticles);
setPeriodicBoxArgs(cl, forceKernel, 20);
} }
cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize()); cl.executeKernel(forceKernel, cl.getNonbondedUtilities().getNumForceThreadBlocks()*cl.getNonbondedUtilities().getForceThreadBlockSize());
cl.executeKernel(torqueKernel, numRealParticles); cl.executeKernel(torqueKernel, numRealParticles);
...@@ -6317,8 +6332,8 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() { ...@@ -6317,8 +6332,8 @@ void OpenCLCalcGayBerneForceKernel::sortAtoms() {
vector<vector<int> > excludedAtoms(numRealParticles); vector<vector<int> > excludedAtoms(numRealParticles);
for (int i = 0; i < excludedPairs.size(); i++) { for (int i = 0; i < excludedPairs.size(); i++) {
int first = min(excludedPairs[i].first, excludedPairs[i].second); int first = inverseOrder[min(excludedPairs[i].first, excludedPairs[i].second)];
int second = max(excludedPairs[i].first, excludedPairs[i].second); int second = inverseOrder[max(excludedPairs[i].first, excludedPairs[i].second)];
excludedAtoms[first].push_back(second); excludedAtoms[first].push_back(second);
} }
int index = 0; int index = 0;
......
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#define TILE_SIZE 32 #define TILE_SIZE 32
#define NEIGHBOR_BLOCK_SIZE 32
/** /**
* Calculate the ellipsoid coordinate frames and associated matrices. * Calculate the ellipsoid coordinate frames and associated matrices.
...@@ -103,12 +104,79 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri ...@@ -103,12 +104,79 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
*neighborBlockCount = 0; *neighborBlockCount = 0;
} }
/**
* This is called by findNeighbors() to write a block to the neighbor list.
*/
void storeNeighbors(int atom1, int* neighborBuffer, int numAtomsInBuffer, int maxNeighborBlocks, __global int* restrict neighbors,
__global int* restrict neighborIndex, __global int* restrict neighborBlockCount) {
int blockIndex = atom_add(neighborBlockCount, 1);
if (blockIndex >= maxNeighborBlocks)
return; // We don't have enough room for the neighbor list.
neighborIndex[blockIndex] = atom1;
int baseIndex = blockIndex*NEIGHBOR_BLOCK_SIZE;
for (int i = 0; i < numAtomsInBuffer; i++)
neighbors[baseIndex+i] = neighborBuffer[i];
for (int i = numAtomsInBuffer; i < NEIGHBOR_BLOCK_SIZE; i++)
neighbors[baseIndex+i] = -1;
}
/** /**
* Build a list of neighbors for each atom. * Build a list of neighbors for each atom.
*/ */
__kernel void findNeighbors(int numAtoms, int maxNeighborBlocks, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, __kernel void findNeighbors(int numAtoms, int maxNeighborBlocks, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global real4* restrict sortedPos, __global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict neighbors, __global real4* restrict sortedPos, __global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict neighbors,
__global int2* restrict neighborIndex, __global int* restrict neighborBlockCount) { __global int* restrict neighborIndex, __global int* restrict neighborBlockCount, __global const int* restrict exclusions, __global const int* restrict exclusionStartIndex) {
const int numBlocks = (numAtoms+TILE_SIZE-1)/TILE_SIZE;
int neighborBuffer[NEIGHBOR_BLOCK_SIZE];
for (int atom1 = get_global_id(0); atom1 < numAtoms; atom1 += get_global_size(0)) {
int nextExclusion = exclusionStartIndex[atom1];
int lastExclusion = exclusionStartIndex[atom1+1];
real4 pos = sortedPos[atom1];
int nextBufferIndex = 0;
// Loop over atom blocks and compute the distance of this atom from each one's bounding box.
for (int block = (atom1+1)/TILE_SIZE; block < numBlocks; block++) {
real4 center = blockCenter[block];
real4 blockSize = blockBoundingBox[block];
real4 blockDelta = center-pos;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(blockDelta)
#endif
blockDelta.x = max((real) 0, fabs(blockDelta.x)-blockSize.x);
blockDelta.y = max((real) 0, fabs(blockDelta.y)-blockSize.y);
blockDelta.z = max((real) 0, fabs(blockDelta.z)-blockSize.z);
if (blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z >= CUTOFF_SQUARED)
continue;
// Loop over atoms within this block.
int first = max(block*TILE_SIZE, atom1+1);
int last = min((block+1)*TILE_SIZE, numAtoms);
for (int atom2 = first; atom2 < last; atom2++) {
// Skip over excluded interactions.
if (nextExclusion < lastExclusion && exclusions[nextExclusion] >= atom2) {
nextExclusion++;
continue;
}
real4 delta = pos-sortedPos[atom2];
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
if (r2 < CUTOFF_SQUARED) {
neighborBuffer[nextBufferIndex++] = atom2;
if (nextBufferIndex == NEIGHBOR_BLOCK_SIZE) {
storeNeighbors(atom1, neighborBuffer, nextBufferIndex, maxNeighborBlocks, neighbors, neighborIndex, neighborBlockCount);
nextBufferIndex = 0;
}
}
}
}
if (nextBufferIndex > 0)
storeNeighbors(atom1, neighborBuffer, nextBufferIndex, maxNeighborBlocks, neighbors, neighborIndex, neighborBlockCount);
}
} }
typedef struct { typedef struct {
...@@ -184,11 +252,11 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep ...@@ -184,11 +252,11 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
// Compute the switching function. // Compute the switching function.
real switchValue = 1, switchDeriv = 0; real switchValue = 1, switchDeriv = 0;
#if USE_LJ_SWITCH #if USE_SWITCH
if (r > LJ_SWITCH_CUTOFF) { if (r > SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF; real x = r-SWITCH_CUTOFF;
switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5)); switchValue = 1+x*x*x*(SWITCH_C3+x*(SWITCH_C4+x*SWITCH_C5));
switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5)); switchDeriv = x*x*(3*SWITCH_C3+x*(4*SWITCH_C4+x*5*SWITCH_C5));
} }
#endif #endif
...@@ -265,7 +333,7 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep ...@@ -265,7 +333,7 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
d[2][2] = scale[2]*( a[2][0]*(G12[0][1]*G12[1][2] + G12[2][1]*G12[1][0] - G12[1][1]*(G12[0][2] + G12[2][0])) + d[2][2] = scale[2]*( a[2][0]*(G12[0][1]*G12[1][2] + G12[2][1]*G12[1][0] - G12[1][1]*(G12[0][2] + G12[2][0])) +
a[2][1]*(G12[1][0]*G12[0][2] + G12[2][0]*G12[0][1] - G12[0][0]*(G12[1][2] + G12[2][1])) + a[2][1]*(G12[1][0]*G12[0][2] + G12[2][0]*G12[0][1] - G12[0][0]*(G12[1][2] + G12[2][1])) +
2*a[2][2]*(G12[1][1]*G12[0][0] - G12[1][0]*G12[0][1])); 2*a[2][2]*(G12[1][1]*G12[0][0] - G12[1][0]*G12[0][1]));
real3 detadq; real3 detadq = 0;
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
detadq += cross((real3) (a[i][0], a[i][1], a[i][2]), (real3) (d[i][0], d[i][1], d[i][2])); detadq += cross((real3) (a[i][0], a[i][1], a[i][2]), (real3) (d[i][0], d[i][1], d[i][2]));
real3 torque = (dchidq*(u*eta) + detadq*(u*chi) + dudq*(eta*chi))*switchValue; real3 torque = (dchidq*(u*eta) + detadq*(u*chi) + dudq*(eta*chi))*switchValue;
...@@ -289,13 +357,72 @@ __kernel void computeForce( ...@@ -289,13 +357,72 @@ __kernel void computeForce(
__global const int* restrict exclusions, __global const int* restrict exclusionStartIndex, __global const int* restrict exclusions, __global const int* restrict exclusionStartIndex,
__global const int4* restrict exceptionParticles, __global const float2* restrict exceptionParams __global const int4* restrict exceptionParticles, __global const float2* restrict exceptionParams
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, , int maxNeighborBlocks, __global int* restrict neighbors, __global int* restrict neighborIndex, __global int* restrict neighborBlockCount,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#endif #endif
) { ) {
const unsigned int warp = get_global_id(0)/TILE_SIZE; const unsigned int warp = get_global_id(0)/TILE_SIZE;
mixed energy = 0; mixed energy = 0;
#ifdef USE_CUTOFF
const int numBlocks = *neighborBlockCount;
for (int block = get_global_id(0); block < numBlocks; block += get_global_size(0)) {
// Load parameters for atom1.
int atom1 = neighborIndex[block];
int index1 = sortedAtoms[atom1];
AtomData data1;
loadAtomData(&data1, atom1, index1, pos, sigParams, epsParams, aMatrix, bMatrix, gMatrix);
real3 force1 = 0.0f;
real3 torque1 = 0.0f;
for (int indexInBlock = 0; indexInBlock < NEIGHBOR_BLOCK_SIZE; indexInBlock++) {
// Load parameters for atom2.
int atom2 = neighbors[NEIGHBOR_BLOCK_SIZE*block+indexInBlock];
if (atom2 == -1)
continue;
int index2 = sortedAtoms[atom2];
AtomData data2;
loadAtomData(&data2, atom2, index2, pos, sigParams, epsParams, aMatrix, bMatrix, gMatrix);
real3 force2 = 0.0f;
real3 torque2 = 0.0f;
// Compute the interaction.
real3 delta = data1.pos-data2.pos;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real sigma = data1.sig.x+data2.sig.x;
real epsilon = data1.eps.x*data2.eps.x;
computeOneInteraction(&data1, &data2, sigma, epsilon, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[index2], (long) (force2.x*0x100000000));
atom_add(&forceBuffers[index2+PADDED_NUM_ATOMS], (long) (force2.y*0x100000000));
atom_add(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (long) (force2.z*0x100000000));
atom_add(&torqueBuffers[index2], (long) (torque2.x*0x100000000));
atom_add(&torqueBuffers[index2+PADDED_NUM_ATOMS], (long) (torque2.y*0x100000000));
atom_add(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (long) (torque2.z*0x100000000));
#else
unsigned int offset = index2 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force2.xyz;
torqueBuffers[offset].xyz += torque2.xyz;
#endif
}
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[index1], (long) (force1.x*0x100000000));
atom_add(&forceBuffers[index1+PADDED_NUM_ATOMS], (long) (force1.y*0x100000000));
atom_add(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (long) (force1.z*0x100000000));
atom_add(&torqueBuffers[index1], (long) (torque1.x*0x100000000));
atom_add(&torqueBuffers[index1+PADDED_NUM_ATOMS], (long) (torque1.y*0x100000000));
atom_add(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (long) (torque1.z*0x100000000));
#else
unsigned int offset = index1 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force1.xyz;
torqueBuffers[offset].xyz += torque1.xyz;
#endif
}
#else
for (int atom1 = get_global_id(0); atom1 < numAtoms; atom1 += get_global_size(0)) { for (int atom1 = get_global_id(0); atom1 < numAtoms; atom1 += get_global_size(0)) {
// Load parameters for atom1. // Load parameters for atom1.
...@@ -309,14 +436,14 @@ __kernel void computeForce( ...@@ -309,14 +436,14 @@ __kernel void computeForce(
for (int atom2 = atom1+1; atom2 < numAtoms; atom2++) { for (int atom2 = atom1+1; atom2 < numAtoms; atom2++) {
// Skip over excluded interactions. // Skip over excluded interactions.
int index2 = sortedAtoms[atom2]; if (nextExclusion < lastExclusion && exclusions[nextExclusion] == atom2) {
if (nextExclusion < lastExclusion && exclusions[nextExclusion] == index2) {
nextExclusion++; nextExclusion++;
continue; continue;
} }
// Load parameters for atom2. // Load parameters for atom2.
int index2 = sortedAtoms[atom2];
AtomData data2; AtomData data2;
loadAtomData(&data2, atom2, index2, pos, sigParams, epsParams, aMatrix, bMatrix, gMatrix); loadAtomData(&data2, atom2, index2, pos, sigParams, epsParams, aMatrix, bMatrix, gMatrix);
real3 force2 = 0.0f; real3 force2 = 0.0f;
...@@ -325,13 +452,7 @@ __kernel void computeForce( ...@@ -325,13 +452,7 @@ __kernel void computeForce(
// Compute the interaction. // Compute the interaction.
real3 delta = data1.pos-data2.pos; real3 delta = data1.pos-data2.pos;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
real sigma = data1.sig.x+data2.sig.x; real sigma = data1.sig.x+data2.sig.x;
real epsilon = data1.eps.x*data2.eps.x; real epsilon = data1.eps.x*data2.eps.x;
computeOneInteraction(&data1, &data2, sigma, epsilon, delta, r2, &force1, &force2, &torque1, &torque2, &energy); computeOneInteraction(&data1, &data2, sigma, epsilon, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
...@@ -346,9 +467,6 @@ __kernel void computeForce( ...@@ -346,9 +467,6 @@ __kernel void computeForce(
unsigned int offset = index2 + warp*PADDED_NUM_ATOMS; unsigned int offset = index2 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force2.xyz; forceBuffers[offset].xyz += force2.xyz;
torqueBuffers[offset].xyz += torque2.xyz; torqueBuffers[offset].xyz += torque2.xyz;
#endif
#ifdef USE_CUTOFF
}
#endif #endif
} }
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
...@@ -364,12 +482,13 @@ __kernel void computeForce( ...@@ -364,12 +482,13 @@ __kernel void computeForce(
torqueBuffers[offset].xyz += torque1.xyz; torqueBuffers[offset].xyz += torque1.xyz;
#endif #endif
} }
#endif
// Now compute exceptions. // Now compute exceptions.
for (int index = get_global_id(0); index < numExceptions; index += get_global_size(0)) { for (int index = get_global_id(0); index < numExceptions; index += get_global_size(0)) {
int4 atomIndices = exceptionParticles[index]; int4 atomIndices = exceptionParticles[index];
real2 params = exceptionParams[index]; float2 params = exceptionParams[index];
int index1 = atomIndices.x, index2 = atomIndices.y; int index1 = atomIndices.x, index2 = atomIndices.y;
int atom1 = atomIndices.z, atom2 = atomIndices.w; int atom1 = atomIndices.z, atom2 = atomIndices.w;
AtomData data1, data2; AtomData data1, data2;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment