Commit ca489698 authored by Lee-Ping's avatar Lee-Ping
Browse files

Merge branch 'master' of https://github.com/SimTk/openmm

parents c023f4ca 9d03dbb4
...@@ -254,7 +254,7 @@ void CudaNonbondedUtilities::initialize(const System& system) { ...@@ -254,7 +254,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
maxTiles = numTiles; maxTiles = numTiles;
if (maxTiles < 1) if (maxTiles < 1)
maxTiles = 1; maxTiles = 1;
interactingTiles = CudaArray::create<ushort2>(context, maxTiles, "interactingTiles"); interactingTiles = CudaArray::create<int>(context, maxTiles, "interactingTiles");
interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms");
interactionCount = CudaArray::create<unsigned int>(context, 1, "interactionCount"); interactionCount = CudaArray::create<unsigned int>(context, 1, "interactionCount");
int elementSize = (context.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (context.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
...@@ -384,7 +384,7 @@ void CudaNonbondedUtilities::updateNeighborListSize() { ...@@ -384,7 +384,7 @@ void CudaNonbondedUtilities::updateNeighborListSize() {
delete interactingAtoms; delete interactingAtoms;
interactingTiles = NULL; // Avoid an error in the destructor if the following allocation fails interactingTiles = NULL; // Avoid an error in the destructor if the following allocation fails
interactingAtoms = NULL; interactingAtoms = NULL;
interactingTiles = CudaArray::create<ushort2>(context, maxTiles, "interactingTiles"); interactingTiles = CudaArray::create<int>(context, maxTiles, "interactingTiles");
interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms = CudaArray::create<int>(context, CudaContext::TileSize*maxTiles, "interactingAtoms");
if (forceArgs.size() > 0) if (forceArgs.size() > 0)
forceArgs[7] = &interactingTiles->getDevicePointer(); forceArgs[7] = &interactingTiles->getDevicePointer();
......
...@@ -16,7 +16,7 @@ typedef struct { ...@@ -16,7 +16,7 @@ typedef struct {
extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer,
const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, const ushort2* __restrict__ exclusionTiles, const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, const ushort2* __restrict__ exclusionTiles,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
...@@ -194,8 +194,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc ...@@ -194,8 +194,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -13,7 +13,7 @@ typedef struct { ...@@ -13,7 +13,7 @@ typedef struct {
extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions, extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const unsigned int* __restrict__ exclusions,
const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ global_value, const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ global_value,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
...@@ -170,8 +170,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const ...@@ -170,8 +170,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -122,8 +122,8 @@ __device__ void prefixSum(short* sum, ushort2* temp) { ...@@ -122,8 +122,8 @@ __device__ void prefixSum(short* sum, ushort2* temp) {
* This is called by findBlocksWithInteractions(). It compacts the list of blocks, identifies interactions * This is called by findBlocksWithInteractions(). It compacts the list of blocks, identifies interactions
* in them, and writes the result to global memory. * in them, and writes the result to global memory.
*/ */
__device__ void storeInteractionData(unsigned short x, unsigned short* buffer, short* sum, ushort2* temp, int* atoms, int& numAtoms, __device__ void storeInteractionData(int x, unsigned short* buffer, short* sum, ushort2* temp, int* atoms, int& numAtoms,
int& baseIndex, unsigned int* interactionCount, ushort2* interactingTiles, unsigned int* interactingAtoms, real4 periodicBoxSize, int& baseIndex, unsigned int* interactionCount, int* interactingTiles, unsigned int* interactingAtoms, real4 periodicBoxSize,
real4 invPeriodicBoxSize, const real4* posq, real3* posBuffer, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) { real4 invPeriodicBoxSize, const real4* posq, real3* posBuffer, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) {
const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF && const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF &&
...@@ -223,7 +223,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s ...@@ -223,7 +223,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s
numAtoms = atomsToStore-tilesToStore*TILE_SIZE; numAtoms = atomsToStore-tilesToStore*TILE_SIZE;
if (baseIndex+tilesToStore <= maxTiles) { if (baseIndex+tilesToStore <= maxTiles) {
if (threadIdx.x < tilesToStore) if (threadIdx.x < tilesToStore)
interactingTiles[baseIndex+threadIdx.x] = make_ushort2(x, singlePeriodicCopy); interactingTiles[baseIndex+threadIdx.x] = x;
for (int i = threadIdx.x; i < tilesToStore*TILE_SIZE; i += blockDim.x) for (int i = threadIdx.x; i < tilesToStore*TILE_SIZE; i += blockDim.x)
interactingAtoms[baseIndex*TILE_SIZE+i] = (i < atomsToStore ? atoms[i] : NUM_ATOMS); interactingAtoms[baseIndex*TILE_SIZE+i] = (i < atomsToStore ? atoms[i] : NUM_ATOMS);
} }
...@@ -247,7 +247,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s ...@@ -247,7 +247,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s
__syncthreads(); __syncthreads();
if (baseIndex < maxTiles) { if (baseIndex < maxTiles) {
if (threadIdx.x == 0) if (threadIdx.x == 0)
interactingTiles[baseIndex] = make_ushort2(x, singlePeriodicCopy); interactingTiles[baseIndex] = x;
if (threadIdx.x < TILE_SIZE) if (threadIdx.x < TILE_SIZE)
interactingAtoms[baseIndex*TILE_SIZE+threadIdx.x] = (threadIdx.x < numAtoms ? atoms[threadIdx.x] : NUM_ATOMS); interactingAtoms[baseIndex*TILE_SIZE+threadIdx.x] = (threadIdx.x < numAtoms ? atoms[threadIdx.x] : NUM_ATOMS);
} }
...@@ -290,7 +290,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s ...@@ -290,7 +290,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s
* [in] blockCenter - the center of each bounding box * [in] blockCenter - the center of each bounding box
* [in] blockBoundingBox - bounding box of each atom block * [in] blockBoundingBox - bounding box of each atom block
* [out] interactionCount - total number of tiles that have interactions * [out] interactionCount - total number of tiles that have interactions
* [out] interactingTiles - set of tiles that have interactions * [out] interactingTiles - set of blocks that have interactions
* [out] interactingAtoms - a list of atoms that interact with each atom block * [out] interactingAtoms - a list of atoms that interact with each atom block
* [in] posq - x,y,z coordinates of each atom and charge q * [in] posq - x,y,z coordinates of each atom and charge q
* [in] maxTiles - maximum number of tiles to process, used for multi-GPUs * [in] maxTiles - maximum number of tiles to process, used for multi-GPUs
...@@ -313,7 +313,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s ...@@ -313,7 +313,7 @@ __device__ void storeInteractionData(unsigned short x, unsigned short* buffer, s
* *
*/ */
extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int* __restrict__ interactionCount, extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int* __restrict__ interactionCount,
ushort2* __restrict__ interactingTiles, unsigned int* __restrict__ interactingAtoms, const real4* __restrict__ posq, unsigned int maxTiles, unsigned int startBlockIndex, int* __restrict__ interactingTiles, unsigned int* __restrict__ interactingAtoms, const real4* __restrict__ posq, unsigned int maxTiles, unsigned int startBlockIndex,
unsigned int numBlocks, real2* __restrict__ sortedBlocks, const real4* __restrict__ sortedBlockCenter, const real4* __restrict__ sortedBlockBoundingBox, unsigned int numBlocks, real2* __restrict__ sortedBlocks, const real4* __restrict__ sortedBlockCenter, const real4* __restrict__ sortedBlockBoundingBox,
const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices, real4* __restrict__ oldPositions, const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices, real4* __restrict__ oldPositions,
const int* __restrict__ rebuildNeighborList) { const int* __restrict__ rebuildNeighborList) {
...@@ -343,7 +343,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea ...@@ -343,7 +343,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
if (threadIdx.x == blockDim.x-1) if (threadIdx.x == blockDim.x-1)
numAtoms = 0; numAtoms = 0;
real2 sortedKey = sortedBlocks[i]; real2 sortedKey = sortedBlocks[i];
unsigned short x = (unsigned short) sortedKey.y; int x = (int) sortedKey.y;
real4 blockCenterX = sortedBlockCenter[i]; real4 blockCenterX = sortedBlockCenter[i];
real4 blockSizeX = sortedBlockBoundingBox[i]; real4 blockSizeX = sortedBlockBoundingBox[i];
......
...@@ -69,7 +69,7 @@ typedef struct { ...@@ -69,7 +69,7 @@ typedef struct {
*/ */
extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ global_bornSum, const real4* __restrict__ posq, const float2* __restrict__ global_params, extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ global_bornSum, const real4* __restrict__ posq, const float2* __restrict__ global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms, unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
...@@ -230,8 +230,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa ...@@ -230,8 +230,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
...@@ -415,7 +414,7 @@ typedef struct { ...@@ -415,7 +414,7 @@ typedef struct {
extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ global_bornForce, extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ global_bornForce,
real* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ global_bornRadii, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const real* __restrict__ global_bornRadii,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms, unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
...@@ -586,8 +585,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo ...@@ -586,8 +585,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -84,8 +84,7 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) { ...@@ -84,8 +84,7 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
* [in]exclusionTiles - x,y denotes the indices of tiles that have an exclusion * [in]exclusionTiles - x,y denotes the indices of tiles that have an exclusion
* [in]startTileIndex - index into first tile to be processed * [in]startTileIndex - index into first tile to be processed
* [in]numTileIndices - number of tiles this context is responsible for processing * [in]numTileIndices - number of tiles this context is responsible for processing
* [in]ushort2 tiles - x component lists the tiles that interact with each tile * [in]int tiles - the atom block for each tile
* - y component not used currently
* [in]interactionCount - total number of tiles that have an interaction * [in]interactionCount - total number of tiles that have an interaction
* [in]maxTiles - stores the size of the neighbourlist in case it needs * [in]maxTiles - stores the size of the neighbourlist in case it needs
* - to be expanded * - to be expanded
...@@ -104,7 +103,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -104,7 +103,7 @@ extern "C" __global__ void computeNonbonded(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions, unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer, const real4* __restrict__ posq, const tileflags* __restrict__ exclusions,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, , const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms unsigned int maxTiles, const real4* __restrict__ blockCenter, const real4* __restrict__ blockSize, const unsigned int* __restrict__ interactingAtoms
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
...@@ -338,8 +337,7 @@ extern "C" __global__ void computeNonbonded( ...@@ -338,8 +337,7 @@ extern "C" __global__ void computeNonbonded(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -272,7 +272,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) { ...@@ -272,7 +272,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
if (maxTiles < 1) if (maxTiles < 1)
maxTiles = 1; maxTiles = 1;
int numAtoms = context.getNumAtoms(); int numAtoms = context.getNumAtoms();
interactingTiles = OpenCLArray::create<mm_ushort2>(context, maxTiles, "interactingTiles"); interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles");
interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms");
interactionCount = OpenCLArray::create<cl_uint>(context, 1, "interactionCount"); interactionCount = OpenCLArray::create<cl_uint>(context, 1, "interactionCount");
int elementSize = (context.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float)); int elementSize = (context.getUseDoublePrecision() ? sizeof(cl_double) : sizeof(cl_float));
...@@ -423,7 +423,7 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() { ...@@ -423,7 +423,7 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
delete interactingAtoms; delete interactingAtoms;
interactingTiles = NULL; // Avoid an error in the destructor if the following allocation fails interactingTiles = NULL; // Avoid an error in the destructor if the following allocation fails
interactingAtoms = NULL; interactingAtoms = NULL;
interactingTiles = OpenCLArray::create<mm_ushort2>(context, maxTiles, "interactingTiles"); interactingTiles = OpenCLArray::create<cl_int>(context, maxTiles, "interactingTiles");
interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms"); interactingAtoms = OpenCLArray::create<cl_int>(context, OpenCLContext::TileSize*maxTiles, "interactingAtoms");
forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(7, interactingTiles->getDeviceBuffer());
forceKernel.setArg<cl_uint>(11, maxTiles); forceKernel.setArg<cl_uint>(11, maxTiles);
......
...@@ -20,7 +20,7 @@ __kernel void computeN2Energy( ...@@ -20,7 +20,7 @@ __kernel void computeN2Energy(
__global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions, __global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global const ushort2* exclusionTiles, __global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
...@@ -204,8 +204,7 @@ __kernel void computeN2Energy( ...@@ -204,8 +204,7 @@ __kernel void computeN2Energy(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -20,7 +20,7 @@ __kernel void computeN2Energy( ...@@ -20,7 +20,7 @@ __kernel void computeN2Energy(
__global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions, __global const real4* restrict posq, __local real4* restrict local_posq, __global const unsigned int* restrict exclusions,
__global const ushort2* exclusionTiles, __global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
...@@ -220,8 +220,7 @@ __kernel void computeN2Energy( ...@@ -220,8 +220,7 @@ __kernel void computeN2Energy(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -14,7 +14,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -14,7 +14,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#endif #endif
__local real* restrict local_value, __local real* restrict local_value,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
...@@ -178,8 +178,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -178,8 +178,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -14,7 +14,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -14,7 +14,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#endif #endif
__local real* restrict local_value, __local real* restrict local_value,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
...@@ -188,8 +188,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -188,8 +188,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -98,8 +98,8 @@ void prefixSum(__local short* sum, __local ushort2* temp) { ...@@ -98,8 +98,8 @@ void prefixSum(__local short* sum, __local ushort2* temp) {
* This is called by findBlocksWithInteractions(). It compacts the list of blocks, identifies interactions * This is called by findBlocksWithInteractions(). It compacts the list of blocks, identifies interactions
* in them, and writes the result to global memory. * in them, and writes the result to global memory.
*/ */
void storeInteractionData(unsigned short x, __local unsigned short* buffer, __local short* sum, __local ushort2* temp, __local int* atoms, __local int* numAtoms, void storeInteractionData(int x, __local unsigned short* buffer, __local short* sum, __local ushort2* temp, __local int* atoms, __local int* numAtoms,
__local int* baseIndex, __global unsigned int* interactionCount, __global ushort2* interactingTiles, __global unsigned int* interactingAtoms, real4 periodicBoxSize, __local int* baseIndex, __global unsigned int* interactionCount, __global int* interactingTiles, __global unsigned int* interactingAtoms, real4 periodicBoxSize,
real4 invPeriodicBoxSize, __global const real4* posq, __local real4* posBuffer, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) { real4 invPeriodicBoxSize, __global const real4* posq, __local real4* posBuffer, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) {
const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF && const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF &&
...@@ -192,7 +192,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo ...@@ -192,7 +192,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo
*numAtoms = atomsToStore-tilesToStore*TILE_SIZE; *numAtoms = atomsToStore-tilesToStore*TILE_SIZE;
if (*baseIndex+tilesToStore <= maxTiles) { if (*baseIndex+tilesToStore <= maxTiles) {
if (get_local_id(0) < tilesToStore) if (get_local_id(0) < tilesToStore)
interactingTiles[*baseIndex+get_local_id(0)] = (ushort2) (x, singlePeriodicCopy); interactingTiles[*baseIndex+get_local_id(0)] = x;
for (int i = get_local_id(0); i < tilesToStore*TILE_SIZE; i += get_local_size(0)) for (int i = get_local_id(0); i < tilesToStore*TILE_SIZE; i += get_local_size(0))
interactingAtoms[*baseIndex*TILE_SIZE+i] = (i < atomsToStore ? atoms[i] : NUM_ATOMS); interactingAtoms[*baseIndex*TILE_SIZE+i] = (i < atomsToStore ? atoms[i] : NUM_ATOMS);
} }
...@@ -216,7 +216,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo ...@@ -216,7 +216,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (*baseIndex < maxTiles) { if (*baseIndex < maxTiles) {
if (get_local_id(0) == 0) if (get_local_id(0) == 0)
interactingTiles[*baseIndex] = (ushort2) (x, singlePeriodicCopy); interactingTiles[*baseIndex] = x;
if (get_local_id(0) < TILE_SIZE) if (get_local_id(0) < TILE_SIZE)
interactingAtoms[*baseIndex*TILE_SIZE+get_local_id(0)] = (get_local_id(0) < *numAtoms ? atoms[get_local_id(0)] : NUM_ATOMS); interactingAtoms[*baseIndex*TILE_SIZE+get_local_id(0)] = (get_local_id(0) < *numAtoms ? atoms[get_local_id(0)] : NUM_ATOMS);
} }
...@@ -234,7 +234,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo ...@@ -234,7 +234,7 @@ void storeInteractionData(unsigned short x, __local unsigned short* buffer, __lo
* mark them as non-interacting. * mark them as non-interacting.
*/ */
__kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, __global unsigned int* restrict interactionCount, __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, __global unsigned int* restrict interactionCount,
__global ushort2* restrict interactingTiles, __global unsigned int* restrict interactingAtoms, __global const real4* restrict posq, unsigned int maxTiles, unsigned int startBlockIndex, __global int* restrict interactingTiles, __global unsigned int* restrict interactingAtoms, __global const real4* restrict posq, unsigned int maxTiles, unsigned int startBlockIndex,
unsigned int numBlocks, __global real2* restrict sortedBlocks, __global const real4* restrict sortedBlockCenter, __global const real4* restrict sortedBlockBoundingBox, unsigned int numBlocks, __global real2* restrict sortedBlocks, __global const real4* restrict sortedBlockCenter, __global const real4* restrict sortedBlockBoundingBox,
__global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global real4* restrict oldPositions, __global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global real4* restrict oldPositions,
__global const int* restrict rebuildNeighborList) { __global const int* restrict rebuildNeighborList) {
...@@ -272,7 +272,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -272,7 +272,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
if (get_local_id(0) == get_local_size(0)-1) if (get_local_id(0) == get_local_size(0)-1)
numAtoms = 0; numAtoms = 0;
real2 sortedKey = sortedBlocks[i]; real2 sortedKey = sortedBlocks[i];
unsigned short x = (unsigned short) sortedKey.y; int x = (int) sortedKey.y;
real4 blockCenterX = sortedBlockCenter[i]; real4 blockCenterX = sortedBlockCenter[i];
real4 blockSizeX = sortedBlockBoundingBox[i]; real4 blockSizeX = sortedBlockBoundingBox[i];
......
...@@ -21,7 +21,7 @@ __kernel void computeBornSum( ...@@ -21,7 +21,7 @@ __kernel void computeBornSum(
#endif #endif
__global const real4* restrict posq, __global const float2* restrict global_params, __global const real4* restrict posq, __global const float2* restrict global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
...@@ -190,8 +190,7 @@ __kernel void computeBornSum( ...@@ -190,8 +190,7 @@ __kernel void computeBornSum(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
...@@ -391,7 +390,7 @@ __kernel void computeGBSAForce1( ...@@ -391,7 +390,7 @@ __kernel void computeGBSAForce1(
#endif #endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
...@@ -568,8 +567,7 @@ __kernel void computeGBSAForce1( ...@@ -568,8 +567,7 @@ __kernel void computeGBSAForce1(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -20,7 +20,7 @@ __kernel void computeBornSum( ...@@ -20,7 +20,7 @@ __kernel void computeBornSum(
#endif #endif
__global const real4* restrict posq, __global const float2* restrict global_params, __global const real4* restrict posq, __global const float2* restrict global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
...@@ -196,8 +196,7 @@ __kernel void computeBornSum( ...@@ -196,8 +196,7 @@ __kernel void computeBornSum(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
...@@ -412,7 +411,7 @@ __kernel void computeGBSAForce1( ...@@ -412,7 +411,7 @@ __kernel void computeGBSAForce1(
#endif #endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms, unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
...@@ -603,8 +602,7 @@ __kernel void computeGBSAForce1( ...@@ -603,8 +602,7 @@ __kernel void computeGBSAForce1(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -25,7 +25,7 @@ __kernel void computeNonbonded( ...@@ -25,7 +25,7 @@ __kernel void computeNonbonded(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices __global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, , __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
...@@ -217,8 +217,7 @@ __kernel void computeNonbonded( ...@@ -217,8 +217,7 @@ __kernel void computeNonbonded(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -22,7 +22,7 @@ __kernel void computeNonbonded( ...@@ -22,7 +22,7 @@ __kernel void computeNonbonded(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const unsigned int* restrict exclusions,
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices __global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, __global const ushort2* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, , __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
...@@ -234,8 +234,7 @@ __kernel void computeNonbonded( ...@@ -234,8 +234,7 @@ __kernel void computeNonbonded(
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
......
...@@ -61,7 +61,7 @@ extern "C" __global__ void computeElectrostatics( ...@@ -61,7 +61,7 @@ extern "C" __global__ void computeElectrostatics(
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices, const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#endif #endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) { const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
...@@ -230,10 +230,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -230,10 +230,8 @@ extern "C" __global__ void computeElectrostatics(
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles)
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
}
else else
#endif #endif
{ {
......
...@@ -400,7 +400,7 @@ extern "C" __global__ void computeFixedField( ...@@ -400,7 +400,7 @@ extern "C" __global__ void computeFixedField(
const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const ushort2* __restrict__ exclusionTiles, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const ushort2* __restrict__ exclusionTiles,
unsigned int startTileIndex, unsigned int numTileIndices, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#elif defined USE_GK #elif defined USE_GK
const real* __restrict__ bornRadii, unsigned long long* __restrict__ gkFieldBuffers, const real* __restrict__ bornRadii, unsigned long long* __restrict__ gkFieldBuffers,
#endif #endif
...@@ -569,10 +569,8 @@ extern "C" __global__ void computeFixedField( ...@@ -569,10 +569,8 @@ extern "C" __global__ void computeFixedField(
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles)
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
}
else else
#endif #endif
{ {
......
...@@ -201,7 +201,7 @@ extern "C" __global__ void computeInducedField( ...@@ -201,7 +201,7 @@ extern "C" __global__ void computeInducedField(
unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const ushort2* __restrict__ exclusionTiles, unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const ushort2* __restrict__ exclusionTiles,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#elif defined USE_GK #elif defined USE_GK
unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real* __restrict__ inducedDipoleS, unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real* __restrict__ inducedDipoleS,
const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii, const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii,
...@@ -339,10 +339,8 @@ extern "C" __global__ void computeInducedField( ...@@ -339,10 +339,8 @@ extern "C" __global__ void computeInducedField(
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles)
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
}
else else
#endif #endif
{ {
......
...@@ -184,7 +184,7 @@ extern "C" __global__ void computeElectrostatics( ...@@ -184,7 +184,7 @@ extern "C" __global__ void computeElectrostatics(
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices, const ushort2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms, const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#endif #endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) { const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
...@@ -312,10 +312,8 @@ extern "C" __global__ void computeElectrostatics( ...@@ -312,10 +312,8 @@ extern "C" __global__ void computeElectrostatics(
unsigned int x, y; unsigned int x, y;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles)
ushort2 tileIndices = tiles[pos]; x = tiles[pos];
x = tileIndices.x;
}
else else
#endif #endif
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment