"plugins/vscode:/vscode.git/clone" did not exist on "bbbc31604210d617dfb897eced8654b1023dbca3"
Commit b11c8061 authored by peastman's avatar peastman
Browse files

Bug fixes to CUDA version of CustomManyParticleForce

parent 3134fbb9
...@@ -4971,6 +4971,8 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool ...@@ -4971,6 +4971,8 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
startIndicesArgs.push_back(&numNeighborsForAtom->getDevicePointer()); startIndicesArgs.push_back(&numNeighborsForAtom->getDevicePointer());
startIndicesArgs.push_back(&neighborStartIndex->getDevicePointer()); startIndicesArgs.push_back(&neighborStartIndex->getDevicePointer());
startIndicesArgs.push_back(&numNeighborPairs->getDevicePointer());
startIndicesArgs.push_back(&maxNeighborPairs);
// Set arguments for the kernel to assemble the final neighbor list. // Set arguments for the kernel to assemble the final neighbor list.
......
...@@ -281,8 +281,17 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi ...@@ -281,8 +281,17 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
* Sum the neighbor counts to compute the start position of each atom. This kernel * Sum the neighbor counts to compute the start position of each atom. This kernel
* is executed as a single work group. * is executed as a single work group.
*/ */
extern "C" __global__ void computeNeighborStartIndices(int* __restrict__ numNeighborsForAtom, int* __restrict__ neighborStartIndex) { extern "C" __global__ void computeNeighborStartIndices(int* __restrict__ numNeighborsForAtom, int* __restrict__ neighborStartIndex,
int* __restrict__ numNeighborPairs, int maxNeighborPairs) {
extern __shared__ unsigned int posBuffer[]; extern __shared__ unsigned int posBuffer[];
if (*numNeighborPairs > maxNeighborPairs) {
// There wasn't enough memory for the neighbor list, so we'll need to rebuild it. Set the neighbor start
// indices to indicate no neighbors for any atom.
for (int i = threadIdx.x; i <= NUM_ATOMS; i += blockDim.x)
neighborStartIndex[i] = 0;
return;
}
unsigned int globalOffset = 0; unsigned int globalOffset = 0;
for (unsigned int startAtom = 0; startAtom < NUM_ATOMS; startAtom += blockDim.x) { for (unsigned int startAtom = 0; startAtom < NUM_ATOMS; startAtom += blockDim.x) {
// Load the neighbor counts into local memory. // Load the neighbor counts into local memory.
...@@ -302,9 +311,10 @@ extern "C" __global__ void computeNeighborStartIndices(int* __restrict__ numNeig ...@@ -302,9 +311,10 @@ extern "C" __global__ void computeNeighborStartIndices(int* __restrict__ numNeig
// Write the results back to global memory. // Write the results back to global memory.
if (globalIndex < NUM_ATOMS) if (globalIndex < NUM_ATOMS) {
neighborStartIndex[globalIndex+1] = posBuffer[threadIdx.x]+globalOffset; neighborStartIndex[globalIndex+1] = posBuffer[threadIdx.x]+globalOffset;
numNeighborsForAtom[globalIndex] = 0; // Clear this so the next kernel can use it as a counter numNeighborsForAtom[globalIndex] = 0; // Clear this so the next kernel can use it as a counter
}
globalOffset += posBuffer[blockDim.x-1]; globalOffset += posBuffer[blockDim.x-1];
} }
if (threadIdx.x == 0) if (threadIdx.x == 0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment