Commit 0071c334 authored by peastman's avatar peastman Committed by GitHub
Browse files

Merge pull request #1682 from peastman/triclinicerror

Fixed error building neighbor list with triclinic boxes
parents 0305b4b7 5e535679
...@@ -513,6 +513,8 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) { ...@@ -513,6 +513,8 @@ void CudaNonbondedUtilities::createKernelsForGroups(int groups) {
defines["NUM_TILES_WITH_EXCLUSIONS"] = context.intToString(exclusionTiles->getSize()); defines["NUM_TILES_WITH_EXCLUSIONS"] = context.intToString(exclusionTiles->getSize());
if (usePeriodic) if (usePeriodic)
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
if (context.getBoxIsTriclinic())
defines["TRICLINIC"] = "1";
defines["MAX_EXCLUSIONS"] = context.intToString(maxExclusions); defines["MAX_EXCLUSIONS"] = context.intToString(maxExclusions);
// Temporarily disable the pair list until we figure out why it's failing on some GPUs. // Temporarily disable the pair list until we figure out why it's failing on some GPUs.
defines["MAX_BITS_FOR_PAIRS"] = "0";//(canUsePairList ? "2" : "0"); defines["MAX_BITS_FOR_PAIRS"] = "0";//(canUsePairList ? "2" : "0");
......
...@@ -255,6 +255,13 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea ...@@ -255,6 +255,13 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
blockDelta.y = max(0.0f, fabs(blockDelta.y)-blockSizeX.y-blockSizeY.y); blockDelta.y = max(0.0f, fabs(blockDelta.y)-blockSizeX.y-blockSizeY.y);
blockDelta.z = max(0.0f, fabs(blockDelta.z)-blockSizeX.z-blockSizeY.z); blockDelta.z = max(0.0f, fabs(blockDelta.z)-blockSizeX.z-blockSizeY.z);
includeBlock2 &= (blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z < PADDED_CUTOFF_SQUARED); includeBlock2 &= (blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z < PADDED_CUTOFF_SQUARED);
#ifdef TRICLINIC
// The calculation to find the nearest periodic copy is only guaranteed to work if the nearest copy is less than half a box width away.
// If there's any possibility we might have missed it, do a detailed check.
if (periodicBoxSize.z/2-blockSizeX.z-blockSizeY.z < PADDED_CUTOFF || periodicBoxSize.y/2-blockSizeX.y-blockSizeY.y < PADDED_CUTOFF)
includeBlock2 = true;
#endif
if (includeBlock2) { if (includeBlock2) {
unsigned short y = (unsigned short) sortedBlocks[block2].y; unsigned short y = (unsigned short) sortedBlocks[block2].y;
for (int k = 0; k < numExclusions; k++) for (int k = 0; k < numExclusions; k++)
......
...@@ -511,6 +511,8 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) { ...@@ -511,6 +511,8 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
defines["SIMD_WIDTH"] = context.intToString(context.getSIMDWidth()); defines["SIMD_WIDTH"] = context.intToString(context.getSIMDWidth());
if (usePeriodic) if (usePeriodic)
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
if (context.getBoxIsTriclinic())
defines["TRICLINIC"] = "1";
defines["MAX_EXCLUSIONS"] = context.intToString(maxExclusions); defines["MAX_EXCLUSIONS"] = context.intToString(maxExclusions);
defines["BUFFER_GROUPS"] = (deviceIsCpu ? "4" : "2"); defines["BUFFER_GROUPS"] = (deviceIsCpu ? "4" : "2");
string file = (deviceIsCpu ? OpenCLKernelSources::findInteractingBlocks_cpu : OpenCLKernelSources::findInteractingBlocks); string file = (deviceIsCpu ? OpenCLKernelSources::findInteractingBlocks_cpu : OpenCLKernelSources::findInteractingBlocks);
......
...@@ -105,7 +105,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -105,7 +105,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
__local volatile int* tileStartIndex = workgroupTileIndex+(warpStart/32); __local volatile int* tileStartIndex = workgroupTileIndex+(warpStart/32);
// Loop over blocks. // Loop over blocks.
for (int block1 = startBlockIndex+warpIndex; block1 < startBlockIndex+numBlocks; block1 += totalWarps) { for (int block1 = startBlockIndex+warpIndex; block1 < startBlockIndex+numBlocks; block1 += totalWarps) {
// Load data for this block. Note that all threads in a warp are processing the same block. // Load data for this block. Note that all threads in a warp are processing the same block.
...@@ -158,6 +158,13 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -158,6 +158,13 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
blockDelta.y = max((real) 0, fabs(blockDelta.y)-blockSizeX.y-blockSizeY.y); blockDelta.y = max((real) 0, fabs(blockDelta.y)-blockSizeX.y-blockSizeY.y);
blockDelta.z = max((real) 0, fabs(blockDelta.z)-blockSizeX.z-blockSizeY.z); blockDelta.z = max((real) 0, fabs(blockDelta.z)-blockSizeX.z-blockSizeY.z);
includeBlock2 &= (blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z < PADDED_CUTOFF_SQUARED); includeBlock2 &= (blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z < PADDED_CUTOFF_SQUARED);
#ifdef TRICLINIC
// The calculation to find the nearest periodic copy is only guaranteed to work if the nearest copy is less than half a box width away.
// If there's any possibility we might have missed it, do a detailed check.
if (periodicBoxSize.z/2-blockSizeX.z-blockSizeY.z < PADDED_CUTOFF || periodicBoxSize.y/2-blockSizeX.y-blockSizeY.y < PADDED_CUTOFF)
includeBlock2 = true;
#endif
if (includeBlock2) { if (includeBlock2) {
unsigned short y = (unsigned short) sortedBlocks[block2].y; unsigned short y = (unsigned short) sortedBlocks[block2].y;
for (int k = 0; k < numExclusions; k++) for (int k = 0; k < numExclusions; k++)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment