"platforms/reference/src/SimTKReference/GBVIParameters.cpp" did not exist on "8148f51d4e83c7b61b68d99ab055343e50f3e805"
Commit 282c6f2c authored by Yutong Zhao's avatar Yutong Zhao
Browse files

Fixes a hard to catch bug when a boundingBoxSize increases in size in between...

Fixes a hard to catch bug when a boundingBoxSize increases in size in between a timestep, but does not trigger a rebuild of the neighbourlist. Affects the usage of singlePeriodicCopy in nonbonded force.  
parent ce2cd27b
...@@ -2055,6 +2055,7 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor ...@@ -2055,6 +2055,7 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
if (cu.getComputeCapability() >= 3.0 && !cu.getUseDoublePrecision()) if (cu.getComputeCapability() >= 3.0 && !cu.getUseDoublePrecision())
defines["ENABLE_SHUFFLE"] = "1"; defines["ENABLE_SHUFFLE"] = "1";
defines["CUTOFF_SQUARED"] = cu.doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance()); defines["CUTOFF_SQUARED"] = cu.doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance());
defines["CUTOFF"] = cu.doubleToString(nb.getCutoffDistance());
defines["PREFACTOR"] = cu.doubleToString(prefactor); defines["PREFACTOR"] = cu.doubleToString(prefactor);
defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms()); defines["NUM_ATOMS"] = cu.intToString(cu.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
...@@ -2081,6 +2082,7 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor ...@@ -2081,6 +2082,7 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
computeSumArgs.push_back(cu.getInvPeriodicBoxSizePointer()); computeSumArgs.push_back(cu.getInvPeriodicBoxSizePointer());
computeSumArgs.push_back(&maxTiles); computeSumArgs.push_back(&maxTiles);
computeSumArgs.push_back(&nb.getBlockCenters().getDevicePointer()); computeSumArgs.push_back(&nb.getBlockCenters().getDevicePointer());
computeSumArgs.push_back(&nb.getBlockBoundingBoxes().getDevicePointer());
computeSumArgs.push_back(&nb.getInteractingAtoms().getDevicePointer()); computeSumArgs.push_back(&nb.getInteractingAtoms().getDevicePointer());
} }
else else
...@@ -2099,6 +2101,7 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor ...@@ -2099,6 +2101,7 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
force1Args.push_back(cu.getInvPeriodicBoxSizePointer()); force1Args.push_back(cu.getInvPeriodicBoxSizePointer());
force1Args.push_back(&maxTiles); force1Args.push_back(&maxTiles);
force1Args.push_back(&nb.getBlockCenters().getDevicePointer()); force1Args.push_back(&nb.getBlockCenters().getDevicePointer());
force1Args.push_back(&nb.getBlockBoundingBoxes().getDevicePointer());
force1Args.push_back(&nb.getInteractingAtoms().getDevicePointer()); force1Args.push_back(&nb.getInteractingAtoms().getDevicePointer());
} }
else else
...@@ -2112,8 +2115,8 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor ...@@ -2112,8 +2115,8 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
maxTiles = nb.getInteractingTiles().getSize(); maxTiles = nb.getInteractingTiles().getSize();
computeSumArgs[3] = &nb.getInteractingTiles().getDevicePointer(); computeSumArgs[3] = &nb.getInteractingTiles().getDevicePointer();
force1Args[5] = &nb.getInteractingTiles().getDevicePointer(); force1Args[5] = &nb.getInteractingTiles().getDevicePointer();
computeSumArgs[9] = &nb.getInteractingAtoms().getDevicePointer(); computeSumArgs[11] = &nb.getInteractingAtoms().getDevicePointer();
force1Args[11] = &nb.getInteractingAtoms().getDevicePointer(); force1Args[12] = &nb.getInteractingAtoms().getDevicePointer();
} }
} }
cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize()); cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
...@@ -2842,6 +2845,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -2842,6 +2845,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
int endExclusionIndex = (cu.getContextIndex()+1)*numExclusionTiles/numContexts; int endExclusionIndex = (cu.getContextIndex()+1)*numExclusionTiles/numContexts;
pairValueDefines["FIRST_EXCLUSION_TILE"] = cu.intToString(startExclusionIndex); pairValueDefines["FIRST_EXCLUSION_TILE"] = cu.intToString(startExclusionIndex);
pairValueDefines["LAST_EXCLUSION_TILE"] = cu.intToString(endExclusionIndex); pairValueDefines["LAST_EXCLUSION_TILE"] = cu.intToString(endExclusionIndex);
pairValueDefines["CUTOFF"] = cu.doubleToString(nb.getCutoffDistance());
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+pairValueSrc, pairValueDefines); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+pairValueSrc, pairValueDefines);
pairValueKernel = cu.getKernel(module, "computeN2Value"); pairValueKernel = cu.getKernel(module, "computeN2Value");
pairValueSrc = ""; pairValueSrc = "";
...@@ -2855,6 +2859,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -2855,6 +2859,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
int endExclusionIndex = (cu.getContextIndex()+1)*numExclusionTiles/numContexts; int endExclusionIndex = (cu.getContextIndex()+1)*numExclusionTiles/numContexts;
pairEnergyDefines["FIRST_EXCLUSION_TILE"] = cu.intToString(startExclusionIndex); pairEnergyDefines["FIRST_EXCLUSION_TILE"] = cu.intToString(startExclusionIndex);
pairEnergyDefines["LAST_EXCLUSION_TILE"] = cu.intToString(endExclusionIndex); pairEnergyDefines["LAST_EXCLUSION_TILE"] = cu.intToString(endExclusionIndex);
pairEnergyDefines["CUTOFF"] = cu.doubleToString(nb.getCutoffDistance());
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+pairEnergySrc, pairEnergyDefines); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+pairEnergySrc, pairEnergyDefines);
pairEnergyKernel = cu.getKernel(module, "computeN2Energy"); pairEnergyKernel = cu.getKernel(module, "computeN2Energy");
pairEnergySrc = ""; pairEnergySrc = "";
...@@ -2878,6 +2883,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -2878,6 +2883,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
pairValueArgs.push_back(cu.getInvPeriodicBoxSizePointer()); pairValueArgs.push_back(cu.getInvPeriodicBoxSizePointer());
pairValueArgs.push_back(&maxTiles); pairValueArgs.push_back(&maxTiles);
pairValueArgs.push_back(&nb.getBlockCenters().getDevicePointer()); pairValueArgs.push_back(&nb.getBlockCenters().getDevicePointer());
pairValueArgs.push_back(&nb.getBlockBoundingBoxes().getDevicePointer());
pairValueArgs.push_back(&nb.getInteractingAtoms().getDevicePointer()); pairValueArgs.push_back(&nb.getInteractingAtoms().getDevicePointer());
} }
else else
...@@ -2918,6 +2924,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -2918,6 +2924,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
pairEnergyArgs.push_back(cu.getInvPeriodicBoxSizePointer()); pairEnergyArgs.push_back(cu.getInvPeriodicBoxSizePointer());
pairEnergyArgs.push_back(&maxTiles); pairEnergyArgs.push_back(&maxTiles);
pairEnergyArgs.push_back(&nb.getBlockCenters().getDevicePointer()); pairEnergyArgs.push_back(&nb.getBlockCenters().getDevicePointer());
pairEnergyArgs.push_back(&nb.getBlockBoundingBoxes().getDevicePointer());
pairEnergyArgs.push_back(&nb.getInteractingAtoms().getDevicePointer()); pairEnergyArgs.push_back(&nb.getInteractingAtoms().getDevicePointer());
} }
else else
...@@ -2984,8 +2991,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -2984,8 +2991,8 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
maxTiles = nb.getInteractingTiles().getSize(); maxTiles = nb.getInteractingTiles().getSize();
pairValueArgs[4] = &nb.getInteractingTiles().getDevicePointer(); pairValueArgs[4] = &nb.getInteractingTiles().getDevicePointer();
pairEnergyArgs[5] = &nb.getInteractingTiles().getDevicePointer(); pairEnergyArgs[5] = &nb.getInteractingTiles().getDevicePointer();
pairValueArgs[10] = &nb.getInteractingAtoms().getDevicePointer(); pairValueArgs[11] = &nb.getInteractingAtoms().getDevicePointer();
pairEnergyArgs[11] = &nb.getInteractingAtoms().getDevicePointer(); pairEnergyArgs[12] = &nb.getInteractingAtoms().getDevicePointer();
} }
} }
cu.executeKernel(pairValueKernel, &pairValueArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize()); cu.executeKernel(pairValueKernel, &pairValueArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
......
...@@ -507,6 +507,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, ...@@ -507,6 +507,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
defines["USE_SYMMETRIC"] = "1"; defines["USE_SYMMETRIC"] = "1";
defines["THREAD_BLOCK_SIZE"] = context.intToString(forceThreadBlockSize); defines["THREAD_BLOCK_SIZE"] = context.intToString(forceThreadBlockSize);
defines["CUTOFF_SQUARED"] = context.doubleToString(cutoff*cutoff); defines["CUTOFF_SQUARED"] = context.doubleToString(cutoff*cutoff);
defines["CUTOFF"] = context.doubleToString(cutoff);
defines["NUM_ATOMS"] = context.intToString(context.getNumAtoms()); defines["NUM_ATOMS"] = context.intToString(context.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks()); defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks());
...@@ -542,6 +543,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source, ...@@ -542,6 +543,7 @@ CUfunction CudaNonbondedUtilities::createInteractionKernel(const string& source,
forceArgs.push_back(context.getInvPeriodicBoxSizePointer()); forceArgs.push_back(context.getInvPeriodicBoxSizePointer());
forceArgs.push_back(&maxTiles); forceArgs.push_back(&maxTiles);
forceArgs.push_back(&blockCenter->getDevicePointer()); forceArgs.push_back(&blockCenter->getDevicePointer());
forceArgs.push_back(&blockBoundingBox->getDevicePointer());
forceArgs.push_back(&interactingAtoms->getDevicePointer()); forceArgs.push_back(&interactingAtoms->getDevicePointer());
} }
for (int i = 0; i < (int) params.size(); i++) for (int i = 0; i < (int) params.size(); i++)
......
...@@ -16,7 +16,8 @@ typedef struct { ...@@ -16,7 +16,8 @@ typedef struct {
extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer,
const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, const ushort2* __restrict__ exclusionTiles, const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, const ushort2* __restrict__ exclusionTiles,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -195,7 +196,10 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -195,7 +196,10 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
......
...@@ -13,7 +13,8 @@ typedef struct { ...@@ -13,7 +13,8 @@ typedef struct {
extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions,
const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ global_value, const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ global_value,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -171,7 +172,10 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -171,7 +172,10 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
......
...@@ -69,7 +69,8 @@ typedef struct { ...@@ -69,7 +69,8 @@ typedef struct {
*/ */
extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ global_bornSum, const real4* __restrict__ posq, const float2* __restrict__ global_params, extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ global_bornSum, const real4* __restrict__ posq, const float2* __restrict__ global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms, const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
#endif #endif
...@@ -231,7 +232,10 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -231,7 +232,10 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
...@@ -411,7 +415,8 @@ typedef struct { ...@@ -411,7 +415,8 @@ typedef struct {
extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ global_bornForce, extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ global_bornForce,
real* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ global_bornRadii, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ global_bornRadii,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms, const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
#endif #endif
...@@ -583,7 +588,10 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -583,7 +588,10 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
......
...@@ -17,7 +17,8 @@ extern "C" __global__ void computeNonbonded( ...@@ -17,7 +17,8 @@ extern "C" __global__ void computeNonbonded(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions, unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms , const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
...@@ -210,7 +211,10 @@ extern "C" __global__ void computeNonbonded( ...@@ -210,7 +211,10 @@ extern "C" __global__ void computeNonbonded(
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
......
...@@ -2072,6 +2072,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2072,6 +2072,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
if (nb.getUsePeriodic()) if (nb.getUsePeriodic())
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
defines["CUTOFF_SQUARED"] = cl.doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance()); defines["CUTOFF_SQUARED"] = cl.doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance());
defines["CUTOFF"] = cl.doubleToString(nb.getCutoffDistance());
defines["PREFACTOR"] = cl.doubleToString(prefactor); defines["PREFACTOR"] = cl.doubleToString(prefactor);
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms()); defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
...@@ -2106,6 +2107,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2106,6 +2107,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
index += 2; // The periodic box size arguments are set when the kernel is executed. index += 2; // The periodic box size arguments are set when the kernel is executed.
computeBornSumKernel.setArg<cl_uint>(index++, maxTiles); computeBornSumKernel.setArg<cl_uint>(index++, maxTiles);
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractingAtoms().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractingAtoms().getDeviceBuffer());
} }
else else
...@@ -2124,6 +2126,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2124,6 +2126,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
index += 2; // The periodic box size arguments are set when the kernel is executed. index += 2; // The periodic box size arguments are set when the kernel is executed.
force1Kernel.setArg<cl_uint>(index++, maxTiles); force1Kernel.setArg<cl_uint>(index++, maxTiles);
force1Kernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingAtoms().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingAtoms().getDeviceBuffer());
} }
else else
...@@ -2161,10 +2164,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2161,10 +2164,10 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
maxTiles = nb.getInteractingTiles().getSize(); maxTiles = nb.getInteractingTiles().getSize();
computeBornSumKernel.setArg<cl::Buffer>(3, nb.getInteractingTiles().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(3, nb.getInteractingTiles().getDeviceBuffer());
computeBornSumKernel.setArg<cl_uint>(7, maxTiles); computeBornSumKernel.setArg<cl_uint>(7, maxTiles);
computeBornSumKernel.setArg<cl::Buffer>(9, nb.getInteractingAtoms().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(10, nb.getInteractingAtoms().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(5, nb.getInteractingTiles().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(5, nb.getInteractingTiles().getDeviceBuffer());
force1Kernel.setArg<cl_uint>(9, maxTiles); force1Kernel.setArg<cl_uint>(9, maxTiles);
force1Kernel.setArg<cl::Buffer>(11, nb.getInteractingAtoms().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(12, nb.getInteractingAtoms().getDeviceBuffer());
} }
} }
cl.executeKernel(computeBornSumKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize()); cl.executeKernel(computeBornSumKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
...@@ -2929,6 +2932,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -2929,6 +2932,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
int endExclusionIndex = (cl.getContextIndex()+1)*numExclusionTiles/numContexts; int endExclusionIndex = (cl.getContextIndex()+1)*numExclusionTiles/numContexts;
pairValueDefines["FIRST_EXCLUSION_TILE"] = cl.intToString(startExclusionIndex); pairValueDefines["FIRST_EXCLUSION_TILE"] = cl.intToString(startExclusionIndex);
pairValueDefines["LAST_EXCLUSION_TILE"] = cl.intToString(endExclusionIndex); pairValueDefines["LAST_EXCLUSION_TILE"] = cl.intToString(endExclusionIndex);
pairValueDefines["CUTOFF"] = cl.doubleToString(nb.getCutoffDistance());
cl::Program program = cl.createProgram(pairValueSrc, pairValueDefines); cl::Program program = cl.createProgram(pairValueSrc, pairValueDefines);
pairValueKernel = cl::Kernel(program, "computeN2Value"); pairValueKernel = cl::Kernel(program, "computeN2Value");
pairValueSrc = ""; pairValueSrc = "";
...@@ -2942,6 +2946,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -2942,6 +2946,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
int endExclusionIndex = (cl.getContextIndex()+1)*numExclusionTiles/numContexts; int endExclusionIndex = (cl.getContextIndex()+1)*numExclusionTiles/numContexts;
pairEnergyDefines["FIRST_EXCLUSION_TILE"] = cl.intToString(startExclusionIndex); pairEnergyDefines["FIRST_EXCLUSION_TILE"] = cl.intToString(startExclusionIndex);
pairEnergyDefines["LAST_EXCLUSION_TILE"] = cl.intToString(endExclusionIndex); pairEnergyDefines["LAST_EXCLUSION_TILE"] = cl.intToString(endExclusionIndex);
pairEnergyDefines["CUTOFF"] = cl.doubleToString(nb.getCutoffDistance());
cl::Program program = cl.createProgram(pairEnergySrc, pairEnergyDefines); cl::Program program = cl.createProgram(pairEnergySrc, pairEnergyDefines);
pairEnergyKernel = cl::Kernel(program, "computeN2Energy"); pairEnergyKernel = cl::Kernel(program, "computeN2Energy");
pairEnergySrc = ""; pairEnergySrc = "";
...@@ -2975,6 +2980,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -2975,6 +2980,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
index += 2; // Periodic box size arguments are set when the kernel is executed. index += 2; // Periodic box size arguments are set when the kernel is executed.
pairValueKernel.setArg<cl_uint>(index++, maxTiles); pairValueKernel.setArg<cl_uint>(index++, maxTiles);
pairValueKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractingAtoms().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractingAtoms().getDeviceBuffer());
} }
else else
...@@ -3023,6 +3029,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -3023,6 +3029,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
index += 2; // Periodic box size arguments are set when the kernel is executed. index += 2; // Periodic box size arguments are set when the kernel is executed.
pairEnergyKernel.setArg<cl_uint>(index++, maxTiles); pairEnergyKernel.setArg<cl_uint>(index++, maxTiles);
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractingAtoms().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractingAtoms().getDeviceBuffer());
} }
else else
...@@ -3114,11 +3121,11 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -3114,11 +3121,11 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
if (maxTiles < nb.getInteractingTiles().getSize()) { if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize(); maxTiles = nb.getInteractingTiles().getSize();
pairValueKernel.setArg<cl::Buffer>(6, nb.getInteractingTiles().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(6, nb.getInteractingTiles().getDeviceBuffer());
pairValueKernel.setArg<cl_uint>(11, maxTiles); pairValueKernel.setArg<cl_uint>(10, maxTiles);
pairValueKernel.setArg<cl::Buffer>(12, nb.getInteractingAtoms().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(13, nb.getInteractingAtoms().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(7, nb.getInteractingTiles().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(7, nb.getInteractingTiles().getDeviceBuffer());
pairEnergyKernel.setArg<cl_uint>(12, maxTiles); pairEnergyKernel.setArg<cl_uint>(11, maxTiles);
pairEnergyKernel.setArg<cl::Buffer>(13, nb.getInteractingAtoms().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(14, nb.getInteractingAtoms().getDeviceBuffer());
} }
} }
cl.executeKernel(pairValueKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize()); cl.executeKernel(pairValueKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
......
...@@ -564,6 +564,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -564,6 +564,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines["USE_SYMMETRIC"] = "1"; defines["USE_SYMMETRIC"] = "1";
defines["FORCE_WORK_GROUP_SIZE"] = context.intToString(forceThreadBlockSize); defines["FORCE_WORK_GROUP_SIZE"] = context.intToString(forceThreadBlockSize);
defines["CUTOFF_SQUARED"] = context.doubleToString(cutoff*cutoff); defines["CUTOFF_SQUARED"] = context.doubleToString(cutoff*cutoff);
defines["CUTOFF"] = context.doubleToString(cutoff);
defines["NUM_ATOMS"] = context.intToString(context.getNumAtoms()); defines["NUM_ATOMS"] = context.intToString(context.getNumAtoms());
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks()); defines["NUM_BLOCKS"] = context.intToString(context.getNumAtomBlocks());
...@@ -604,6 +605,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc ...@@ -604,6 +605,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
index += 2; // The periodic box size arguments are set when the kernel is executed. index += 2; // The periodic box size arguments are set when the kernel is executed.
kernel.setArg<cl_uint>(index++, interactingTiles->getSize()); kernel.setArg<cl_uint>(index++, interactingTiles->getSize());
kernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, blockBoundingBox->getDeviceBuffer());
kernel.setArg<cl::Buffer>(index++, interactingAtoms->getDeviceBuffer()); kernel.setArg<cl::Buffer>(index++, interactingAtoms->getDeviceBuffer());
} }
for (int i = 0; i < (int) params.size(); i++) { for (int i = 0; i < (int) params.size(); i++) {
......
...@@ -20,7 +20,8 @@ __kernel void computeN2Energy( ...@@ -20,7 +20,8 @@ __kernel void computeN2Energy(
__global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions, __global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global const ushort2* exclusionTiles, __global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const int* restrict interactingAtoms __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -205,7 +206,10 @@ __kernel void computeN2Energy( ...@@ -205,7 +206,10 @@ __kernel void computeN2Energy(
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
......
...@@ -20,7 +20,8 @@ __kernel void computeN2Energy( ...@@ -20,7 +20,8 @@ __kernel void computeN2Energy(
__global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions, __global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global const ushort2* exclusionTiles, __global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const int* restrict interactingAtoms __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -221,7 +222,10 @@ __kernel void computeN2Energy( ...@@ -221,7 +222,10 @@ __kernel void computeN2Energy(
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
......
...@@ -14,7 +14,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -14,7 +14,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#endif #endif
__local real* restrict local_value, __local real* restrict local_value,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const int* restrict interactingAtoms __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -179,7 +180,10 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -179,7 +180,10 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
......
...@@ -14,7 +14,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -14,7 +14,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#endif #endif
__local real* restrict local_value, __local real* restrict local_value,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const int* restrict interactingAtoms __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -189,7 +190,10 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -189,7 +190,10 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
......
...@@ -21,7 +21,8 @@ __kernel void computeBornSum( ...@@ -21,7 +21,8 @@ __kernel void computeBornSum(
#endif #endif
__global const real4* restrict posq, __global const float2* restrict global_params, __global const real4* restrict posq, __global const float2* restrict global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const int* restrict interactingAtoms, __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
#endif #endif
...@@ -191,7 +192,10 @@ __kernel void computeBornSum( ...@@ -191,7 +192,10 @@ __kernel void computeBornSum(
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
...@@ -387,7 +391,8 @@ __kernel void computeGBSAForce1( ...@@ -387,7 +391,8 @@ __kernel void computeGBSAForce1(
#endif #endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const int* restrict interactingAtoms, __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
#endif #endif
...@@ -565,7 +570,10 @@ __kernel void computeGBSAForce1( ...@@ -565,7 +570,10 @@ __kernel void computeGBSAForce1(
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
......
...@@ -20,7 +20,8 @@ __kernel void computeBornSum( ...@@ -20,7 +20,8 @@ __kernel void computeBornSum(
#endif #endif
__global const real4* restrict posq, __global const float2* restrict global_params, __global const real4* restrict posq, __global const float2* restrict global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const int* restrict interactingAtoms, __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
#endif #endif
...@@ -197,7 +198,10 @@ __kernel void computeBornSum( ...@@ -197,7 +198,10 @@ __kernel void computeBornSum(
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
...@@ -408,7 +412,8 @@ __kernel void computeGBSAForce1( ...@@ -408,7 +412,8 @@ __kernel void computeGBSAForce1(
#endif #endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const int* restrict interactingAtoms, __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
#endif #endif
...@@ -600,7 +605,10 @@ __kernel void computeGBSAForce1( ...@@ -600,7 +605,10 @@ __kernel void computeGBSAForce1(
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
......
...@@ -25,7 +25,8 @@ __kernel void computeNonbonded( ...@@ -25,7 +25,8 @@ __kernel void computeNonbonded(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices __global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const int* restrict interactingAtoms , __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = get_global_size(0)/TILE_SIZE; const unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
...@@ -218,7 +219,10 @@ __kernel void computeNonbonded( ...@@ -218,7 +219,10 @@ __kernel void computeNonbonded(
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
......
...@@ -22,7 +22,8 @@ __kernel void computeNonbonded( ...@@ -22,7 +22,8 @@ __kernel void computeNonbonded(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices __global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const int* restrict interactingAtoms , __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
real energy = 0; real energy = 0;
...@@ -235,7 +236,10 @@ __kernel void computeNonbonded( ...@@ -235,7 +236,10 @@ __kernel void computeNonbonded(
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; ushort2 tileIndices = tiles[pos];
x = tileIndices.x; x = tileIndices.x;
singlePeriodicCopy = tileIndices.y; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
} }
else else
#endif #endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment