Commit b11c8061 authored by peastman's avatar peastman
Browse files

Bug fixes to CUDA version of CustomManyParticleForce

parent 3134fbb9
......@@ -4971,6 +4971,8 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
startIndicesArgs.push_back(&numNeighborsForAtom->getDevicePointer());
startIndicesArgs.push_back(&neighborStartIndex->getDevicePointer());
startIndicesArgs.push_back(&numNeighborPairs->getDevicePointer());
startIndicesArgs.push_back(&maxNeighborPairs);
// Set arguments for the kernel to assemble the final neighbor list.
......
......@@ -281,8 +281,17 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
* Sum the neighbor counts to compute the start position of each atom. This kernel
* is executed as a single work group.
*/
extern "C" __global__ void computeNeighborStartIndices(int* __restrict__ numNeighborsForAtom, int* __restrict__ neighborStartIndex) {
extern "C" __global__ void computeNeighborStartIndices(int* __restrict__ numNeighborsForAtom, int* __restrict__ neighborStartIndex,
int* __restrict__ numNeighborPairs, int maxNeighborPairs) {
extern __shared__ unsigned int posBuffer[];
if (*numNeighborPairs > maxNeighborPairs) {
// There wasn't enough memory for the neighbor list, so we'll need to rebuild it. Set the neighbor start
// indices to indicate no neighbors for any atom.
for (int i = threadIdx.x; i <= NUM_ATOMS; i += blockDim.x)
neighborStartIndex[i] = 0;
return;
}
unsigned int globalOffset = 0;
for (unsigned int startAtom = 0; startAtom < NUM_ATOMS; startAtom += blockDim.x) {
// Load the neighbor counts into local memory.
......@@ -302,9 +311,10 @@ extern "C" __global__ void computeNeighborStartIndices(int* __restrict__ numNeig
// Write the results back to global memory.
if (globalIndex < NUM_ATOMS)
if (globalIndex < NUM_ATOMS) {
neighborStartIndex[globalIndex+1] = posBuffer[threadIdx.x]+globalOffset;
numNeighborsForAtom[globalIndex] = 0; // Clear this so the next kernel can use it as a counter
numNeighborsForAtom[globalIndex] = 0; // Clear this so the next kernel can use it as a counter
}
globalOffset += posBuffer[blockDim.x-1];
}
if (threadIdx.x == 0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment