"plugins/vscode:/vscode.git/clone" did not exist on "af51d7518132cd329b986cef57520fcc5ddbf7d5"
Commit 5ecc8e00 authored by Peter Eastman's avatar Peter Eastman
Browse files

OpenCL version of GayBerneForce requires 64 bit atomics

parent f1ca1e6d
......@@ -6051,6 +6051,8 @@ OpenCLCalcGayBerneForceKernel::~OpenCLCalcGayBerneForceKernel() {
}
void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBerneForce& force) {
if (!cl.getSupports64BitGlobalAtomics())
throw OpenMMException("GayBerneForce requires a device that supports 64 bit atomic operations");
// Initialize interactions.
......@@ -6123,12 +6125,9 @@ void OpenCLCalcGayBerneForceKernel::initialize(const System& system, const GayBe
neighborIndex = OpenCLArray::create<cl_int>(cl, maxNeighborBlocks, "neighbors");
neighborBlockCount = OpenCLArray::create<cl_int>(cl, 1, "neighborBlockCount");
// Create arrays for accumulating torques.
// Create array for accumulating torques.
if (cl.getSupports64BitGlobalAtomics())
torque = OpenCLArray::create<cl_long>(cl, 3*cl.getPaddedNumAtoms(), "torque");
else
torque = new OpenCLArray(cl, cl.getPaddedNumAtoms()*cl.getNonbondedUtilities().getNumForceBuffers(), 4*elementSize, "torque");
torque = OpenCLArray::create<cl_long>(cl, 3*cl.getPaddedNumAtoms(), "torque");
cl.addAutoclearBuffer(*torque);
// Create the kernels.
......@@ -6195,9 +6194,8 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
neighborsKernel.setArg<cl::Buffer>(12, neighborBlockCount->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(13, exclusions->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(14, exclusionStartIndex->getDeviceBuffer());
bool useLong = cl.getSupports64BitGlobalAtomics();
int index = 0;
forceKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
forceKernel.setArg<cl::Buffer>(index++, cl.getLongForceBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
forceKernel.setArg<cl_int>(index++, numRealParticles);
forceKernel.setArg<cl_int>(index++, exceptionAtoms.size());
......@@ -6220,10 +6218,8 @@ double OpenCLCalcGayBerneForceKernel::execute(ContextImpl& context, bool include
forceKernel.setArg<cl::Buffer>(index++, neighborBlockCount->getDeviceBuffer());
}
index = 0;
torqueKernel.setArg<cl::Buffer>(index++, (useLong ? cl.getLongForceBuffer().getDeviceBuffer() : cl.getForceBuffers().getDeviceBuffer()));
torqueKernel.setArg<cl::Buffer>(index++, cl.getLongForceBuffer().getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, torque->getDeviceBuffer());
if (!useLong)
torqueKernel.setArg<cl_int>(index++, cl.getNumForceBuffers());
torqueKernel.setArg<cl_int>(index++, numRealParticles);
torqueKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
torqueKernel.setArg<cl::Buffer>(index++, axisParticleIndices->getDeviceBuffer());
......
#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable
#ifdef SUPPORTS_64_BIT_ATOMICS
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#endif
#define TILE_SIZE 32
#define NEIGHBOR_BLOCK_SIZE 32
......@@ -343,11 +341,7 @@ void computeOneInteraction(AtomData* data1, AtomData* data2, real sigma, real ep
* Compute the interactions.
*/
__kernel void computeForce(
#ifdef SUPPORTS_64_BIT_ATOMICS
__global long* restrict forceBuffers, __global long* restrict torqueBuffers,
#else
__global real4* restrict forceBuffers, __global real4* restrict torqueBuffers,
#endif
int numAtoms, int numExceptions, __global mixed* restrict energyBuffer, __global const real4* restrict pos,
__global const float4* restrict sigParams, __global const float2* restrict epsParams, __global const int* restrict sortedAtoms,
__global const real* restrict aMatrix, __global const real* restrict bMatrix, __global const real* restrict gMatrix,
......@@ -395,31 +389,19 @@ __kernel void computeForce(
real sigma = data1.sig.x+data2.sig.x;
real epsilon = data1.eps.x*data2.eps.x;
computeOneInteraction(&data1, &data2, sigma, epsilon, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[index2], (long) (force2.x*0x100000000));
atom_add(&forceBuffers[index2+PADDED_NUM_ATOMS], (long) (force2.y*0x100000000));
atom_add(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (long) (force2.z*0x100000000));
atom_add(&torqueBuffers[index2], (long) (torque2.x*0x100000000));
atom_add(&torqueBuffers[index2+PADDED_NUM_ATOMS], (long) (torque2.y*0x100000000));
atom_add(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (long) (torque2.z*0x100000000));
#else
unsigned int offset = index2 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force2.xyz;
torqueBuffers[offset].xyz += torque2.xyz;
#endif
}
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[index1], (long) (force1.x*0x100000000));
atom_add(&forceBuffers[index1+PADDED_NUM_ATOMS], (long) (force1.y*0x100000000));
atom_add(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (long) (force1.z*0x100000000));
atom_add(&torqueBuffers[index1], (long) (torque1.x*0x100000000));
atom_add(&torqueBuffers[index1+PADDED_NUM_ATOMS], (long) (torque1.y*0x100000000));
atom_add(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (long) (torque1.z*0x100000000));
#else
unsigned int offset = index1 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force1.xyz;
torqueBuffers[offset].xyz += torque1.xyz;
#endif
}
#else
for (int atom1 = get_global_id(0); atom1 < numAtoms; atom1 += get_global_size(0)) {
......@@ -455,31 +437,19 @@ __kernel void computeForce(
real sigma = data1.sig.x+data2.sig.x;
real epsilon = data1.eps.x*data2.eps.x;
computeOneInteraction(&data1, &data2, sigma, epsilon, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[index2], (long) (force2.x*0x100000000));
atom_add(&forceBuffers[index2+PADDED_NUM_ATOMS], (long) (force2.y*0x100000000));
atom_add(&forceBuffers[index2+2*PADDED_NUM_ATOMS], (long) (force2.z*0x100000000));
atom_add(&torqueBuffers[index2], (long) (torque2.x*0x100000000));
atom_add(&torqueBuffers[index2+PADDED_NUM_ATOMS], (long) (torque2.y*0x100000000));
atom_add(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (long) (torque2.z*0x100000000));
#else
unsigned int offset = index2 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force2.xyz;
torqueBuffers[offset].xyz += torque2.xyz;
#endif
}
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[index1], (long) (force1.x*0x100000000));
atom_add(&forceBuffers[index1+PADDED_NUM_ATOMS], (long) (force1.y*0x100000000));
atom_add(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (long) (force1.z*0x100000000));
atom_add(&torqueBuffers[index1], (long) (torque1.x*0x100000000));
atom_add(&torqueBuffers[index1+PADDED_NUM_ATOMS], (long) (torque1.y*0x100000000));
atom_add(&torqueBuffers[index1+2*PADDED_NUM_ATOMS], (long) (torque1.z*0x100000000));
#else
unsigned int offset = index1 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force1.xyz;
torqueBuffers[offset].xyz += torque1.xyz;
#endif
}
#endif
......@@ -501,7 +471,6 @@ __kernel void computeForce(
if (r2 < CUTOFF_SQUARED) {
#endif
computeOneInteraction(&data1, &data2, params.x, params.y, delta, r2, &force1, &force2, &torque1, &torque2, &energy);
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[index1], (long) (force1.x*0x100000000));
atom_add(&forceBuffers[index1+PADDED_NUM_ATOMS], (long) (force1.y*0x100000000));
atom_add(&forceBuffers[index1+2*PADDED_NUM_ATOMS], (long) (force1.z*0x100000000));
......@@ -514,14 +483,6 @@ __kernel void computeForce(
atom_add(&torqueBuffers[index2], (long) (torque2.x*0x100000000));
atom_add(&torqueBuffers[index2+PADDED_NUM_ATOMS], (long) (torque2.y*0x100000000));
atom_add(&torqueBuffers[index2+2*PADDED_NUM_ATOMS], (long) (torque2.z*0x100000000));
#else
int offset = index1 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force1.xyz;
torqueBuffers[offset].xyz += torque1.xyz;
offset = index2 + warp*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz += force2.xyz;
torqueBuffers[offset].xyz += torque2.xyz;
#endif
#ifdef USE_CUTOFF
}
#endif
......@@ -533,11 +494,7 @@ __kernel void computeForce(
* Convert the torques to forces on the connected particles.
*/
__kernel void applyTorques(
#ifdef SUPPORTS_64_BIT_ATOMICS
__global long* restrict forceBuffers, __global long* restrict torqueBuffers,
#else
__global real4* restrict forceBuffers, __global real4* restrict torqueBuffers, int numBuffers,
#endif
int numParticles, __global const real4* restrict posq, __global int2* const restrict axisParticleIndices,
__global const int* sortedParticles) {
const unsigned int warp = get_global_id(0)/TILE_SIZE;
......@@ -548,14 +505,8 @@ __kernel void applyTorques(
if (axisParticles.x != -1) {
// Load the torque.
#ifdef SUPPORTS_64_BIT_ATOMICS
real scale = 1/(real) 0x100000000;
real3 torque = (real3) (scale*torqueBuffers[originalIndex], scale*torqueBuffers[originalIndex+PADDED_NUM_ATOMS], scale*torqueBuffers[originalIndex+2*PADDED_NUM_ATOMS]);
#else
real3 torque = 0;
for (int i = 0; i < numBuffers; i++)
torque += torqueBuffers[originalIndex+i*PADDED_NUM_ATOMS].xyz;
#endif
real3 force = 0, xforce = 0, yforce = 0;
// Apply a force to the x particle.
......@@ -576,7 +527,6 @@ __kernel void applyTorques(
yforce += f;
force -= f;
}
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[originalIndex], (long) (force.x*0x100000000));
atom_add(&forceBuffers[originalIndex+PADDED_NUM_ATOMS], (long) (force.y*0x100000000));
atom_add(&forceBuffers[originalIndex+2*PADDED_NUM_ATOMS], (long) (force.z*0x100000000));
......@@ -588,12 +538,6 @@ __kernel void applyTorques(
atom_add(&forceBuffers[axisParticles.y+PADDED_NUM_ATOMS], (long) (yforce.y*0x100000000));
atom_add(&forceBuffers[axisParticles.y+2*PADDED_NUM_ATOMS], (long) (yforce.z*0x100000000));
}
#else
forceBuffers[originalIndex + warp*PADDED_NUM_ATOMS].xyz += force.xyz;
forceBuffers[axisParticles.x + warp*PADDED_NUM_ATOMS].xyz += xforce.xyz;
if (axisParticles.y != -1)
forceBuffers[axisParticles.y + warp*PADDED_NUM_ATOMS].xyz += yforce.xyz;
#endif
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment