Commit 2ff0b0ae authored by peastman's avatar peastman
Browse files

Converted the array containing atom block indices for the neighbor list from...

Converted the array containing atom block indices for the neighbor list from ushort2 to int.  This removes the hard limit of 2 million atoms.
parent 7ae8e13a
......@@ -254,7 +254,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
maxTiles = numTiles;
if (maxTiles < 1)
maxTiles = 1;
interactingTiles = CudaArray::create<ushort2>(context, maxTiles, "interactingTiles");
interactingTiles = CudaArray::create<int>(context, maxTiles, "interactingTiles");
interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms");
interactionCount = CudaArray::create<unsigned int>(context, 1, "interactionCount");
int elementSize = (context.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
......@@ -384,7 +384,7 @@ void CudaNonbondedUtilities::updateNeighborListSize() {
delete interactingAtoms;
interactingTiles = NULL; // Avoid an error in the destructor if the following allocation fails
interactingAtoms = NULL;
interactingTiles = CudaArray::create<ushort2>(context, maxTiles, "interactingTiles");
interactingTiles = CudaArray::create<int>(context, maxTiles, "interactingTiles");
interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms");
if (forceArgs.size() > 0)
forceArgs[7] = &interactingTiles->getDevicePointer();
......
......@@ -16,7 +16,7 @@ typedef struct {
extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer,
const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, const ushort2* __restrict__ exclusionTiles,
#ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#else
unsigned int numTiles
......@@ -194,8 +194,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
......@@ -13,7 +13,7 @@ typedef struct {
extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions,
const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ global_value,
#ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#else
unsigned int numTiles
......@@ -170,8 +170,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
......@@ -122,8 +122,8 @@ __device__ void prefixSum(short* sum, ushort2* temp) {
* This is called by findBlocksWithInteractions(). It compacts the list of blocks, identifies interactions
* in them, and writes the result to global memory.
*/
__device__ void storeInteractionData(unsigned short x, unsigned short* buffer, short* sum, ushort2* temp, int* atoms, int& numAtoms,
int& baseIndex, unsigned int* interactionCount, ushort2* interactingTiles, unsigned int* interactingAtoms, real4 periodicBoxSize,
__device__ void storeInteractionData(int x, unsigned short* buffer, short* sum, ushort2* temp, int* atoms, int& numAtoms,
int& baseIndex, unsigned int* interactionCount, int* interactingTiles, unsigned int* interactingAtoms, real4 periodicBoxSize,
real4 invPeriodicBoxSize, const real4* posq, real3* posBuffer, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) {
const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF &&
......@@ -223,7 +223,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s
numAtoms = atomsToStore-tilesToStore*TILE_SIZE;
if (baseIndex+tilesToStore <= maxTiles) {
if (threadIdx.x < tilesToStore)
interactingTiles[baseIndex+threadIdx.x] = make_ushort2(x, singlePeriodicCopy);
interactingTiles[baseIndex+threadIdx.x] = x;
for (int i = threadIdx.x; i < tilesToStore*TILE_SIZE; i += blockDim.x)
interactingAtoms[baseIndex*TILE_SIZE+i] = (i < atomsToStore ? atoms[i] : NUM_ATOMS);
}
......@@ -247,7 +247,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s
__syncthreads();
if (baseIndex < maxTiles) {
if (threadIdx.x == 0)
interactingTiles[baseIndex] = make_ushort2(x, singlePeriodicCopy);
interactingTiles[baseIndex] = x;
if (threadIdx.x < TILE_SIZE)
interactingAtoms[baseIndex*TILE_SIZE+threadIdx.x] = (threadIdx.x < numAtoms ? atoms[threadIdx.x] : NUM_ATOMS);
}
......@@ -290,7 +290,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s
* [in] blockCenter - the center of each bounding box
* [in] blockBoundingBox - bounding box of each atom block
* [out] interactionCount - total number of tiles that have interactions
* [out] interactingTiles - set of tiles that have interactions
* [out] interactingTiles - set of blocks that have interactions
* [out] interactingAtoms - a list of atoms that interact with each atom block
* [in] posq - x,y,z coordinates of each atom and charge q
* [in] maxTiles - maximum number of tiles to process, used for multi-GPUs
......@@ -313,7 +313,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s
*
*/
extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int* __restrict__ interactionCount,
ushort2* __restrict__ interactingTiles, unsigned int* __restrict__ interactingAtoms, const real4* __restrict__ posq, unsigned int maxTiles, unsigned int startBlockIndex,
int* __restrict__ interactingTiles, unsigned int* __restrict__ interactingAtoms, const real4* __restrict__ posq, unsigned int maxTiles, unsigned int startBlockIndex,
unsigned int numBlocks, real2* __restrict__ sortedBlocks, const real4* __restrict__ sortedBlockCenter, const real4* __restrict__ sortedBlockBoundingBox,
const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices, real4* __restrict__ oldPositions,
const int* __restrict__ rebuildNeighborList) {
......@@ -343,7 +343,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
if (threadIdx.x == blockDim.x-1)
numAtoms = 0;
real2 sortedKey = sortedBlocks[i];
unsigned short x = (unsigned short) sortedKey.y;
int x = (int) sortedKey.y;
real4 blockCenterX = sortedBlockCenter[i];
real4 blockSizeX = sortedBlockBoundingBox[i];
......
......@@ -69,7 +69,7 @@ typedef struct {
*/
extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ global_bornSum, const real4* __restrict__ posq, const float2* __restrict__ global_params,
#ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
#else
unsigned int numTiles,
......@@ -230,8 +230,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......@@ -415,7 +414,7 @@ typedef struct {
extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ global_bornForce,
real* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ global_bornRadii,
#ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
#else
unsigned int numTiles,
......@@ -586,8 +585,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
......@@ -84,8 +84,7 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
* [in]exclusionTiles - x,y denotes the indices of tiles that have an exclusion
* [in]startTileIndex - index into first tile to be processed
* [in]numTileIndices - number of tiles this context is responsible for processing
* [in]ushort2 tiles - x component lists the tiles that interact with each tile
* - y component not used currently
* [in]int tiles - the atom block for each tile
* [in]interactionCount - total number of tiles that have an interaction
* [in]maxTiles - stores the size of the neighbourlist in case it needs
* - to be expanded
......@@ -104,7 +103,7 @@ extern "C" __global__ void computeNonbonded(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF
, const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#endif
PARAMETER_ARGUMENTS) {
......@@ -338,8 +337,7 @@ extern "C" __global__ void computeNonbonded(
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
......@@ -272,7 +272,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
if (maxTiles < 1)
maxTiles = 1;
int numAtoms = context.getNumAtoms();
interactingTiles = OpenCLArray::create<mm_ushort2>(context, maxTiles, "interactingTiles");
interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles");
interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms");
interactionCount = OpenCLArray::create<cl_uint>(context, 1, "interactionCount");
int elementSize = (context.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
......@@ -423,7 +423,7 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
delete interactingAtoms;
interactingTiles = NULL; // Avoid an error in the destructor if the following allocation fails
interactingAtoms = NULL;
interactingTiles = OpenCLArray::create<mm_ushort2>(context, maxTiles, "interactingTiles");
interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles");
interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms");
forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
forceKernel.setArg<cl_uint>(11, maxTiles);
......
......@@ -20,7 +20,7 @@ __kernel void computeN2Energy(
__global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else
unsigned int numTiles
......@@ -204,8 +204,7 @@ __kernel void computeN2Energy(
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
......@@ -20,7 +20,7 @@ __kernel void computeN2Energy(
__global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else
unsigned int numTiles
......@@ -220,8 +220,7 @@ __kernel void computeN2Energy(
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
......@@ -14,7 +14,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#endif
__local real* restrict local_value,
#ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else
unsigned int numTiles
......@@ -178,8 +178,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
......@@ -14,7 +14,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#endif
__local real* restrict local_value,
#ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else
unsigned int numTiles
......@@ -188,8 +188,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
......@@ -98,8 +98,8 @@ void prefixSum(__local short* sum, __local ushort2* temp) {
* This is called by findBlocksWithInteractions(). It compacts the list of blocks, identifies interactions
* in them, and writes the result to global memory.
*/
void storeInteractionData(unsigned short x, __local unsigned short* buffer, __local short* sum, __local ushort2* temp, __local int* atoms, __local int* numAtoms,
__local int* baseIndex, __global unsigned int* interactionCount, __global ushort2* interactingTiles, __global unsigned int* interactingAtoms, real4 periodicBoxSize,
void storeInteractionData(int x, __local unsigned short* buffer, __local short* sum, __local ushort2* temp, __local int* atoms, __local int* numAtoms,
__local int* baseIndex, __global unsigned int* interactionCount, __global int* interactingTiles, __global unsigned int* interactingAtoms, real4 periodicBoxSize,
real4 invPeriodicBoxSize, __global const real4* posq, __local real4* posBuffer, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) {
const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF &&
......@@ -192,7 +192,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo
*numAtoms = atomsToStore-tilesToStore*TILE_SIZE;
if (*baseIndex+tilesToStore <= maxTiles) {
if (get_local_id(0) < tilesToStore)
interactingTiles[*baseIndex+get_local_id(0)] = (ushort2) (x, singlePeriodicCopy);
interactingTiles[*baseIndex+get_local_id(0)] = x;
for (int i = get_local_id(0); i < tilesToStore*TILE_SIZE; i += get_local_size(0))
interactingAtoms[*baseIndex*TILE_SIZE+i] = (i < atomsToStore ? atoms[i] : NUM_ATOMS);
}
......@@ -216,7 +216,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo
barrier(CLK_LOCAL_MEM_FENCE);
if (*baseIndex < maxTiles) {
if (get_local_id(0) == 0)
interactingTiles[*baseIndex] = (ushort2) (x, singlePeriodicCopy);
interactingTiles[*baseIndex] = x;
if (get_local_id(0) < TILE_SIZE)
interactingAtoms[*baseIndex*TILE_SIZE+get_local_id(0)] = (get_local_id(0) < *numAtoms ? atoms[get_local_id(0)] : NUM_ATOMS);
}
......@@ -234,7 +234,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo
* mark them as non-interacting.
*/
__kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, __global unsigned int* restrict interactionCount,
__global ushort2* restrict interactingTiles, __global unsigned int* restrict interactingAtoms, __global const real4* restrict posq, unsigned int maxTiles, unsigned int startBlockIndex,
__global int* restrict interactingTiles, __global unsigned int* restrict interactingAtoms, __global const real4* restrict posq, unsigned int maxTiles, unsigned int startBlockIndex,
unsigned int numBlocks, __global real2* restrict sortedBlocks, __global const real4* restrict sortedBlockCenter, __global const real4* restrict sortedBlockBoundingBox,
__global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global real4* restrict oldPositions,
__global const int* restrict rebuildNeighborList) {
......@@ -272,7 +272,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
if (get_local_id(0) == get_local_size(0)-1)
numAtoms = 0;
real2 sortedKey = sortedBlocks[i];
unsigned short x = (unsigned short) sortedKey.y;
int x = (int) sortedKey.y;
real4 blockCenterX = sortedBlockCenter[i];
real4 blockSizeX = sortedBlockBoundingBox[i];
......
......@@ -21,7 +21,7 @@ __kernel void computeBornSum(
#endif
__global const real4* restrict posq, __global const float2* restrict global_params,
#ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else
unsigned int numTiles,
......@@ -190,8 +190,7 @@ __kernel void computeBornSum(
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......@@ -391,7 +390,7 @@ __kernel void computeGBSAForce1(
#endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else
unsigned int numTiles,
......@@ -568,8 +567,7 @@ __kernel void computeGBSAForce1(
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
......@@ -20,7 +20,7 @@ __kernel void computeBornSum(
#endif
__global const real4* restrict posq, __global const float2* restrict global_params,
#ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else
unsigned int numTiles,
......@@ -196,8 +196,7 @@ __kernel void computeBornSum(
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......@@ -412,7 +411,7 @@ __kernel void computeGBSAForce1(
#endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else
unsigned int numTiles,
......@@ -603,8 +602,7 @@ __kernel void computeGBSAForce1(
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
......@@ -25,7 +25,7 @@ __kernel void computeNonbonded(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF
, __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif
PARAMETER_ARGUMENTS) {
......@@ -217,8 +217,7 @@ __kernel void computeNonbonded(
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
......@@ -22,7 +22,7 @@ __kernel void computeNonbonded(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF
, __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif
PARAMETER_ARGUMENTS) {
......@@ -234,8 +234,7 @@ __kernel void computeNonbonded(
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
......@@ -61,7 +61,7 @@ extern "C" __global__ void computeElectrostatics(
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
......@@ -230,10 +230,8 @@ extern "C" __global__ void computeElectrostatics(
unsigned int x, y;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
}
if (numTiles <= maxTiles)
x = tiles[pos];
else
#endif
{
......
......@@ -400,7 +400,7 @@ extern "C" __global__ void computeFixedField(
const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const ushort2* __restrict__ exclusionTiles,
unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#elif defined USE_GK
const real* __restrict__ bornRadii, unsigned long long* __restrict__ gkFieldBuffers,
#endif
......@@ -569,10 +569,8 @@ extern "C" __global__ void computeFixedField(
unsigned int x, y;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
}
if (numTiles <= maxTiles)
x = tiles[pos];
else
#endif
{
......
......@@ -201,7 +201,7 @@ extern "C" __global__ void computeInducedField(
unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const ushort2* __restrict__ exclusionTiles,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#elif defined USE_GK
unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real* __restrict__ inducedDipoleS,
const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii,
......@@ -339,10 +339,8 @@ extern "C" __global__ void computeInducedField(
unsigned int x, y;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
}
if (numTiles <= maxTiles)
x = tiles[pos];
else
#endif
{
......
......@@ -184,7 +184,7 @@ extern "C" __global__ void computeElectrostatics(
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
......@@ -312,10 +312,8 @@ extern "C" __global__ void computeElectrostatics(
unsigned int x, y;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
}
if (numTiles <= maxTiles)
x = tiles[pos];
else
#endif
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment