Commit 2ff0b0ae authored by peastman's avatar peastman
Browse files

Converted the array containing atom block indices for the neighbor list from...

Converted the array containing atom block indices for the neighbor list from ushort2 to int.  This removes the hard limit of 2 million atoms.
parent 7ae8e13a
...@@ -254,7 +254,7 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -254,7 +254,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
maxTiles = numTiles; maxTiles = numTiles;
if (maxTiles < 1) if (maxTiles < 1)
maxTiles = 1; maxTiles = 1;
interactingTiles = CudaArray::create<ushort2>(context, maxTiles, "interactingTiles"); interactingTiles = CudaArray::create<int>(context, maxTiles, "interactingTiles");
interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms");
interactionCount = CudaArray::create<unsigned int>(context, 1, "interactionCount"); interactionCount = CudaArray::create<unsigned int>(context, 1, "interactionCount");
int elementSize = (context.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (context.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
...@@ -384,7 +384,7 @@ void CudaNonbondedUtilities::updateNeighborListSize() { ...@@ -384,7 +384,7 @@ void CudaNonbondedUtilities::updateNeighborListSize() {
delete interactingAtoms; delete interactingAtoms;
interactingTiles = NULL; // Avoid an error in the destructor if the following allocation fails interactingTiles = NULL; // Avoid an error in the destructor if the following allocation fails
interactingAtoms = NULL; interactingAtoms = NULL;
interactingTiles = CudaArray::create<ushort2>(context, maxTiles, "interactingTiles"); interactingTiles = CudaArray::create<int>(context, maxTiles, "interactingTiles");
interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms");
if (forceArgs.size() > 0) if (forceArgs.size() > 0)
forceArgs[7] = &interactingTiles->getDevicePointer(); forceArgs[7] = &interactingTiles->getDevicePointer();
......
...@@ -16,7 +16,7 @@ typedef struct { ...@@ -16,7 +16,7 @@ typedef struct {
extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer,
const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, const ushort2* __restrict__ exclusionTiles, const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, const ushort2* __restrict__ exclusionTiles,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
...@@ -194,8 +194,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -194,8 +194,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -13,7 +13,7 @@ typedef struct { ...@@ -13,7 +13,7 @@ typedef struct {
extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions,
const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ global_value, const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ global_value,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
...@@ -170,8 +170,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -170,8 +170,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -122,8 +122,8 @@ __device__ void prefixSum(short* sum, ushort2* temp) { ...@@ -122,8 +122,8 @@ __device__ void prefixSum(short* sum, ushort2* temp) {
* This is called by findBlocksWithInteractions(). It compacts the list of blocks, identifies interactions * This is called by findBlocksWithInteractions(). It compacts the list of blocks, identifies interactions
* in them, and writes the result to global memory. * in them, and writes the result to global memory.
*/ */
__device__ void storeInteractionData(unsigned short x, unsigned short* buffer, short* sum, ushort2* temp, int* atoms, int& numAtoms, __device__ void storeInteractionData(int x, unsigned short* buffer, short* sum, ushort2* temp, int* atoms, int& numAtoms,
int& baseIndex, unsigned int* interactionCount, ushort2* interactingTiles, unsigned int* interactingAtoms, real4 periodicBoxSize, int& baseIndex, unsigned int* interactionCount, int* interactingTiles, unsigned int* interactingAtoms, real4 periodicBoxSize,
real4 invPeriodicBoxSize, const real4* posq, real3* posBuffer, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) { real4 invPeriodicBoxSize, const real4* posq, real3* posBuffer, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) {
const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF && const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF &&
...@@ -223,7 +223,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s ...@@ -223,7 +223,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s
numAtoms = atomsToStore-tilesToStore*TILE_SIZE; numAtoms = atomsToStore-tilesToStore*TILE_SIZE;
if (baseIndex+tilesToStore <= maxTiles) { if (baseIndex+tilesToStore <= maxTiles) {
if (threadIdx.x < tilesToStore) if (threadIdx.x < tilesToStore)
interactingTiles[baseIndex+threadIdx.x] = make_ushort2(x, singlePeriodicCopy); interactingTiles[baseIndex+threadIdx.x] = x;
for (int i = threadIdx.x; i < tilesToStore*TILE_SIZE; i += blockDim.x) for (int i = threadIdx.x; i < tilesToStore*TILE_SIZE; i += blockDim.x)
interactingAtoms[baseIndex*TILE_SIZE+i] = (i < atomsToStore ? atoms[i] : NUM_ATOMS); interactingAtoms[baseIndex*TILE_SIZE+i] = (i < atomsToStore ? atoms[i] : NUM_ATOMS);
} }
...@@ -247,7 +247,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s ...@@ -247,7 +247,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s
__syncthreads(); __syncthreads();
if (baseIndex < maxTiles) { if (baseIndex < maxTiles) {
if (threadIdx.x == 0) if (threadIdx.x == 0)
interactingTiles[baseIndex] = make_ushort2(x, singlePeriodicCopy); interactingTiles[baseIndex] = x;
if (threadIdx.x < TILE_SIZE) if (threadIdx.x < TILE_SIZE)
interactingAtoms[baseIndex*TILE_SIZE+threadIdx.x] = (threadIdx.x < numAtoms ? atoms[threadIdx.x] : NUM_ATOMS); interactingAtoms[baseIndex*TILE_SIZE+threadIdx.x] = (threadIdx.x < numAtoms ? atoms[threadIdx.x] : NUM_ATOMS);
} }
...@@ -290,7 +290,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s ...@@ -290,7 +290,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s
* [in] blockCenter - the center of each bounding box * [in] blockCenter - the center of each bounding box
* [in] blockBoundingBox - bounding box of each atom block * [in] blockBoundingBox - bounding box of each atom block
* [out] interactionCount - total number of tiles that have interactions * [out] interactionCount - total number of tiles that have interactions
* [out] interactingTiles - set of tiles that have interactions * [out] interactingTiles - set of blocks that have interactions
* [out] interactingAtoms - a list of atoms that interact with each atom block * [out] interactingAtoms - a list of atoms that interact with each atom block
* [in] posq - x,y,z coordinates of each atom and charge q * [in] posq - x,y,z coordinates of each atom and charge q
* [in] maxTiles - maximum number of tiles to process, used for multi-GPUs * [in] maxTiles - maximum number of tiles to process, used for multi-GPUs
...@@ -313,7 +313,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s ...@@ -313,7 +313,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s
* *
*/ */
extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int* __restrict__ interactionCount, extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int* __restrict__ interactionCount,
ushort2* __restrict__ interactingTiles, unsigned int* __restrict__ interactingAtoms, const real4* __restrict__ posq, unsigned int maxTiles, unsigned int startBlockIndex, int* __restrict__ interactingTiles, unsigned int* __restrict__ interactingAtoms, const real4* __restrict__ posq, unsigned int maxTiles, unsigned int startBlockIndex,
unsigned int numBlocks, real2* __restrict__ sortedBlocks, const real4* __restrict__ sortedBlockCenter, const real4* __restrict__ sortedBlockBoundingBox, unsigned int numBlocks, real2* __restrict__ sortedBlocks, const real4* __restrict__ sortedBlockCenter, const real4* __restrict__ sortedBlockBoundingBox,
const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices, real4* __restrict__ oldPositions, const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices, real4* __restrict__ oldPositions,
const int* __restrict__ rebuildNeighborList) { const int* __restrict__ rebuildNeighborList) {
...@@ -343,7 +343,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea ...@@ -343,7 +343,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
if (threadIdx.x == blockDim.x-1) if (threadIdx.x == blockDim.x-1)
numAtoms = 0; numAtoms = 0;
real2 sortedKey = sortedBlocks[i]; real2 sortedKey = sortedBlocks[i];
unsigned short x = (unsigned short) sortedKey.y; int x = (int) sortedKey.y;
real4 blockCenterX = sortedBlockCenter[i]; real4 blockCenterX = sortedBlockCenter[i];
real4 blockSizeX = sortedBlockBoundingBox[i]; real4 blockSizeX = sortedBlockBoundingBox[i];
......
...@@ -69,7 +69,7 @@ typedef struct { ...@@ -69,7 +69,7 @@ typedef struct {
*/ */
extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ global_bornSum, const real4* __restrict__ posq, const float2* __restrict__ global_params, extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ global_bornSum, const real4* __restrict__ posq, const float2* __restrict__ global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms, unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
...@@ -230,8 +230,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -230,8 +230,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
...@@ -415,7 +414,7 @@ typedef struct { ...@@ -415,7 +414,7 @@ typedef struct {
extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ global_bornForce, extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ global_bornForce,
real* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ global_bornRadii, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ global_bornRadii,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms, unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
...@@ -586,8 +585,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -586,8 +585,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -84,8 +84,7 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) { ...@@ -84,8 +84,7 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
* [in]exclusionTiles - x,y denotes the indices of tiles that have an exclusion * [in]exclusionTiles - x,y denotes the indices of tiles that have an exclusion
* [in]startTileIndex - index into first tile to be processed * [in]startTileIndex - index into first tile to be processed
* [in]numTileIndices - number of tiles this context is responsible for processing * [in]numTileIndices - number of tiles this context is responsible for processing
* [in]ushort2 tiles - x component lists the tiles that interact with each tile * [in]int tiles - the atom block for each tile
* - y component not used currently
* [in]interactionCount - total number of tiles that have an interaction * [in]interactionCount - total number of tiles that have an interaction
* [in]maxTiles - stores the size of the neighbourlist in case it needs * [in]maxTiles - stores the size of the neighbourlist in case it needs
* - to be expanded * - to be expanded
...@@ -104,7 +103,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -104,7 +103,7 @@ extern "C" __global__ void computeNonbonded(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions, unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, , const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
...@@ -338,8 +337,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -338,8 +337,7 @@ extern "C" __global__ void computeNonbonded(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -272,7 +272,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -272,7 +272,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
if (maxTiles < 1) if (maxTiles < 1)
maxTiles = 1; maxTiles = 1;
int numAtoms = context.getNumAtoms(); int numAtoms = context.getNumAtoms();
interactingTiles = OpenCLArray::create<mm_ushort2>(context, maxTiles, "interactingTiles"); interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles");
interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms");
interactionCount = OpenCLArray::create<cl_uint>(context, 1, "interactionCount"); interactionCount = OpenCLArray::create<cl_uint>(context, 1, "interactionCount");
int elementSize = (context.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float)); int elementSize = (context.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
...@@ -423,7 +423,7 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() { ...@@ -423,7 +423,7 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
delete interactingAtoms; delete interactingAtoms;
interactingTiles = NULL; // Avoid an error in the destructor if the following allocation fails interactingTiles = NULL; // Avoid an error in the destructor if the following allocation fails
interactingAtoms = NULL; interactingAtoms = NULL;
interactingTiles = OpenCLArray::create<mm_ushort2>(context, maxTiles, "interactingTiles"); interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles");
interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms");
forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
forceKernel.setArg<cl_uint>(11, maxTiles); forceKernel.setArg<cl_uint>(11, maxTiles);
......
...@@ -20,7 +20,7 @@ __kernel void computeN2Energy( ...@@ -20,7 +20,7 @@ __kernel void computeN2Energy(
__global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions, __global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global const ushort2* exclusionTiles, __global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
...@@ -204,8 +204,7 @@ __kernel void computeN2Energy( ...@@ -204,8 +204,7 @@ __kernel void computeN2Energy(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -20,7 +20,7 @@ __kernel void computeN2Energy( ...@@ -20,7 +20,7 @@ __kernel void computeN2Energy(
__global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions, __global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global const ushort2* exclusionTiles, __global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
...@@ -220,8 +220,7 @@ __kernel void computeN2Energy( ...@@ -220,8 +220,7 @@ __kernel void computeN2Energy(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -14,7 +14,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -14,7 +14,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#endif #endif
__local real* restrict local_value, __local real* restrict local_value,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
...@@ -178,8 +178,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -178,8 +178,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -14,7 +14,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -14,7 +14,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#endif #endif
__local real* restrict local_value, __local real* restrict local_value,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
...@@ -188,8 +188,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -188,8 +188,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -98,8 +98,8 @@ void prefixSum(__local short* sum, __local ushort2* temp) { ...@@ -98,8 +98,8 @@ void prefixSum(__local short* sum, __local ushort2* temp) {
* This is called by findBlocksWithInteractions(). It compacts the list of blocks, identifies interactions * This is called by findBlocksWithInteractions(). It compacts the list of blocks, identifies interactions
* in them, and writes the result to global memory. * in them, and writes the result to global memory.
*/ */
void storeInteractionData(unsigned short x, __local unsigned short* buffer, __local short* sum, __local ushort2* temp, __local int* atoms, __local int* numAtoms, void storeInteractionData(int x, __local unsigned short* buffer, __local short* sum, __local ushort2* temp, __local int* atoms, __local int* numAtoms,
__local int* baseIndex, __global unsigned int* interactionCount, __global ushort2* interactingTiles, __global unsigned int* interactingAtoms, real4 periodicBoxSize, __local int* baseIndex, __global unsigned int* interactionCount, __global int* interactingTiles, __global unsigned int* interactingAtoms, real4 periodicBoxSize,
real4 invPeriodicBoxSize, __global const real4* posq, __local real4* posBuffer, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) { real4 invPeriodicBoxSize, __global const real4* posq, __local real4* posBuffer, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) {
const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF && const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF &&
...@@ -192,7 +192,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo ...@@ -192,7 +192,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo
*numAtoms = atomsToStore-tilesToStore*TILE_SIZE; *numAtoms = atomsToStore-tilesToStore*TILE_SIZE;
if (*baseIndex+tilesToStore <= maxTiles) { if (*baseIndex+tilesToStore <= maxTiles) {
if (get_local_id(0) < tilesToStore) if (get_local_id(0) < tilesToStore)
interactingTiles[*baseIndex+get_local_id(0)] = (ushort2) (x, singlePeriodicCopy); interactingTiles[*baseIndex+get_local_id(0)] = x;
for (int i = get_local_id(0); i < tilesToStore*TILE_SIZE; i += get_local_size(0)) for (int i = get_local_id(0); i < tilesToStore*TILE_SIZE; i += get_local_size(0))
interactingAtoms[*baseIndex*TILE_SIZE+i] = (i < atomsToStore ? atoms[i] : NUM_ATOMS); interactingAtoms[*baseIndex*TILE_SIZE+i] = (i < atomsToStore ? atoms[i] : NUM_ATOMS);
} }
...@@ -216,7 +216,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo ...@@ -216,7 +216,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (*baseIndex < maxTiles) { if (*baseIndex < maxTiles) {
if (get_local_id(0) == 0) if (get_local_id(0) == 0)
interactingTiles[*baseIndex] = (ushort2) (x, singlePeriodicCopy); interactingTiles[*baseIndex] = x;
if (get_local_id(0) < TILE_SIZE) if (get_local_id(0) < TILE_SIZE)
interactingAtoms[*baseIndex*TILE_SIZE+get_local_id(0)] = (get_local_id(0) < *numAtoms ? atoms[get_local_id(0)] : NUM_ATOMS); interactingAtoms[*baseIndex*TILE_SIZE+get_local_id(0)] = (get_local_id(0) < *numAtoms ? atoms[get_local_id(0)] : NUM_ATOMS);
} }
...@@ -234,7 +234,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo ...@@ -234,7 +234,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo
* mark them as non-interacting. * mark them as non-interacting.
*/ */
__kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, __global unsigned int* restrict interactionCount, __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, __global unsigned int* restrict interactionCount,
__global ushort2* restrict interactingTiles, __global unsigned int* restrict interactingAtoms, __global const real4* restrict posq, unsigned int maxTiles, unsigned int startBlockIndex, __global int* restrict interactingTiles, __global unsigned int* restrict interactingAtoms, __global const real4* restrict posq, unsigned int maxTiles, unsigned int startBlockIndex,
unsigned int numBlocks, __global real2* restrict sortedBlocks, __global const real4* restrict sortedBlockCenter, __global const real4* restrict sortedBlockBoundingBox, unsigned int numBlocks, __global real2* restrict sortedBlocks, __global const real4* restrict sortedBlockCenter, __global const real4* restrict sortedBlockBoundingBox,
__global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global real4* restrict oldPositions, __global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global real4* restrict oldPositions,
__global const int* restrict rebuildNeighborList) { __global const int* restrict rebuildNeighborList) {
...@@ -272,7 +272,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -272,7 +272,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
if (get_local_id(0) == get_local_size(0)-1) if (get_local_id(0) == get_local_size(0)-1)
numAtoms = 0; numAtoms = 0;
real2 sortedKey = sortedBlocks[i]; real2 sortedKey = sortedBlocks[i];
unsigned short x = (unsigned short) sortedKey.y; int x = (int) sortedKey.y;
real4 blockCenterX = sortedBlockCenter[i]; real4 blockCenterX = sortedBlockCenter[i];
real4 blockSizeX = sortedBlockBoundingBox[i]; real4 blockSizeX = sortedBlockBoundingBox[i];
......
...@@ -21,7 +21,7 @@ __kernel void computeBornSum( ...@@ -21,7 +21,7 @@ __kernel void computeBornSum(
#endif #endif
__global const real4* restrict posq, __global const float2* restrict global_params, __global const real4* restrict posq, __global const float2* restrict global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
...@@ -190,8 +190,7 @@ __kernel void computeBornSum( ...@@ -190,8 +190,7 @@ __kernel void computeBornSum(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
...@@ -391,7 +390,7 @@ __kernel void computeGBSAForce1( ...@@ -391,7 +390,7 @@ __kernel void computeGBSAForce1(
#endif #endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
...@@ -568,8 +567,7 @@ __kernel void computeGBSAForce1( ...@@ -568,8 +567,7 @@ __kernel void computeGBSAForce1(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -20,7 +20,7 @@ __kernel void computeBornSum( ...@@ -20,7 +20,7 @@ __kernel void computeBornSum(
#endif #endif
__global const real4* restrict posq, __global const float2* restrict global_params, __global const real4* restrict posq, __global const float2* restrict global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
...@@ -196,8 +196,7 @@ __kernel void computeBornSum( ...@@ -196,8 +196,7 @@ __kernel void computeBornSum(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
...@@ -412,7 +411,7 @@ __kernel void computeGBSAForce1( ...@@ -412,7 +411,7 @@ __kernel void computeGBSAForce1(
#endif #endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
...@@ -603,8 +602,7 @@ __kernel void computeGBSAForce1( ...@@ -603,8 +602,7 @@ __kernel void computeGBSAForce1(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -25,7 +25,7 @@ __kernel void computeNonbonded( ...@@ -25,7 +25,7 @@ __kernel void computeNonbonded(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices __global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, , __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
...@@ -217,8 +217,7 @@ __kernel void computeNonbonded( ...@@ -217,8 +217,7 @@ __kernel void computeNonbonded(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -22,7 +22,7 @@ __kernel void computeNonbonded( ...@@ -22,7 +22,7 @@ __kernel void computeNonbonded(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices __global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, , __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
...@@ -234,8 +234,7 @@ __kernel void computeNonbonded( ...@@ -234,8 +234,7 @@ __kernel void computeNonbonded(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -61,7 +61,7 @@ extern "C" __global__ void computeElectrostatics( ...@@ -61,7 +61,7 @@ extern "C" __global__ void computeElectrostatics(
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices, const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#endif #endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) { const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
...@@ -230,10 +230,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -230,10 +230,8 @@ extern "C" __global__ void computeElectrostatics(
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles)
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
}
else else
#endif #endif
{ {
......
...@@ -400,7 +400,7 @@ extern "C" __global__ void computeFixedField( ...@@ -400,7 +400,7 @@ extern "C" __global__ void computeFixedField(
const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const ushort2* __restrict__ exclusionTiles, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const ushort2* __restrict__ exclusionTiles,
unsigned int startTileIndex, unsigned int numTileIndices, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#elif defined USE_GK #elif defined USE_GK
const real* __restrict__ bornRadii, unsigned long long* __restrict__ gkFieldBuffers, const real* __restrict__ bornRadii, unsigned long long* __restrict__ gkFieldBuffers,
#endif #endif
...@@ -569,10 +569,8 @@ extern "C" __global__ void computeFixedField( ...@@ -569,10 +569,8 @@ extern "C" __global__ void computeFixedField(
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles)
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
}
else else
#endif #endif
{ {
......
...@@ -201,7 +201,7 @@ extern "C" __global__ void computeInducedField( ...@@ -201,7 +201,7 @@ extern "C" __global__ void computeInducedField(
unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const ushort2* __restrict__ exclusionTiles,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#elif defined USE_GK #elif defined USE_GK
unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real* __restrict__ inducedDipoleS, unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real* __restrict__ inducedDipoleS,
const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii, const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii,
...@@ -339,10 +339,8 @@ extern "C" __global__ void computeInducedField( ...@@ -339,10 +339,8 @@ extern "C" __global__ void computeInducedField(
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles)
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
}
else else
#endif #endif
{ {
......
...@@ -184,7 +184,7 @@ extern "C" __global__ void computeElectrostatics( ...@@ -184,7 +184,7 @@ extern "C" __global__ void computeElectrostatics(
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices, const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#endif #endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) { const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
...@@ -312,10 +312,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -312,10 +312,8 @@ extern "C" __global__ void computeElectrostatics(
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles)
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
}
else else
#endif #endif
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment