Unverified Commit 94d7225b authored by peastman's avatar peastman Committed by GitHub
Browse files

Re-enabled single pair list (#2863)

parent 938afab0
...@@ -498,8 +498,7 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) { ...@@ -498,8 +498,7 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
if (context.getBoxIsTriclinic()) if (context.getBoxIsTriclinic())
defines["TRICLINIC"] = "1"; defines["TRICLINIC"] = "1";
defines["MAX_EXCLUSIONS"] = context.intToString(maxExclusions); defines["MAX_EXCLUSIONS"] = context.intToString(maxExclusions);
// Temporarily disable the pair list until we figure out why it's failing on some GPUs. defines["MAX_BITS_FOR_PAIRS"] = (canUsePairList ? (context.getComputeCapability() < 8.0 ? "2" : "4") : "0");
defines["MAX_BITS_FOR_PAIRS"] = "0";//(canUsePairList ? "2" : "0");
CUmodule interactingBlocksProgram = context.createModule(CudaKernelSources::vectorOps+CudaKernelSources::findInteractingBlocks, defines); CUmodule interactingBlocksProgram = context.createModule(CudaKernelSources::vectorOps+CudaKernelSources::findInteractingBlocks, defines);
kernels.findBlockBoundsKernel = context.getKernel(interactingBlocksProgram, "findBlockBounds"); kernels.findBlockBoundsKernel = context.getKernel(interactingBlocksProgram, "findBlockBounds");
kernels.sortBoxDataKernel = context.getKernel(interactingBlocksProgram, "sortBoxData"); kernels.sortBoxDataKernel = context.getKernel(interactingBlocksProgram, "sortBoxData");
......
...@@ -85,15 +85,16 @@ __device__ int saveSinglePairs(int x, int* atoms, int* flags, int length, unsign ...@@ -85,15 +85,16 @@ __device__ int saveSinglePairs(int x, int* atoms, int* flags, int length, unsign
int count = __popc(flags[i]); int count = __popc(flags[i]);
sum += (count <= MAX_BITS_FOR_PAIRS ? count : 0); sum += (count <= MAX_BITS_FOR_PAIRS ? count : 0);
} }
sumBuffer[indexInWarp] = sum; for (int i = 1; i < 32; i *= 2) {
for (int step = 1; step < 32; step *= 2) { int n = __shfl_up_sync(0xffffffff, sum, i);
int add = (indexInWarp >= step ? sumBuffer[indexInWarp-step] : 0); if (indexInWarp >= i)
sumBuffer[indexInWarp] += add; sum += n;
} }
int pairsToStore = sumBuffer[31]; if (indexInWarp == 31)
if (indexInWarp == 0) pairStartIndex = atomicAdd(singlePairCount,(unsigned int) sum);
pairStartIndex = atomicAdd(singlePairCount, pairsToStore); __syncwarp();
int pairIndex = pairStartIndex + (indexInWarp > 0 ? sumBuffer[indexInWarp-1] : 0); int prevSum = __shfl_up_sync(0xffffffff, sum, 1);
int pairIndex = pairStartIndex + (indexInWarp > 0 ? prevSum : 0);
for (int i = indexInWarp; i < length; i += 32) { for (int i = indexInWarp; i < length; i += 32) {
int count = __popc(flags[i]); int count = __popc(flags[i]);
if (count <= MAX_BITS_FOR_PAIRS && pairIndex+count < maxSinglePairs) { if (count <= MAX_BITS_FOR_PAIRS && pairIndex+count < maxSinglePairs) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment