Commit dca54ec7 authored by Saurabh Belsare's avatar Saurabh Belsare
Browse files

Merged fork with latest original master

parents cace5edf 01f9e415
...@@ -5,6 +5,11 @@ const real PI = 3.14159265358979323846f; ...@@ -5,6 +5,11 @@ const real PI = 3.14159265358979323846f;
real4 v0a = (real4) (pos1.xyz-pos2.xyz, 0.0f); real4 v0a = (real4) (pos1.xyz-pos2.xyz, 0.0f);
real4 v1a = (real4) (pos3.xyz-pos2.xyz, 0.0f); real4 v1a = (real4) (pos3.xyz-pos2.xyz, 0.0f);
real4 v2a = (real4) (pos3.xyz-pos4.xyz, 0.0f); real4 v2a = (real4) (pos3.xyz-pos4.xyz, 0.0f);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0a)
APPLY_PERIODIC_TO_DELTA(v1a)
APPLY_PERIODIC_TO_DELTA(v2a)
#endif
real4 cp0a = cross(v0a, v1a); real4 cp0a = cross(v0a, v1a);
real4 cp1a = cross(v1a, v2a); real4 cp1a = cross(v1a, v2a);
real cosangle = dot(normalize(cp0a), normalize(cp1a)); real cosangle = dot(normalize(cp0a), normalize(cp1a));
...@@ -28,6 +33,11 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI); ...@@ -28,6 +33,11 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
real4 v0b = (real4) (pos5.xyz-pos6.xyz, 0.0f); real4 v0b = (real4) (pos5.xyz-pos6.xyz, 0.0f);
real4 v1b = (real4) (pos7.xyz-pos6.xyz, 0.0f); real4 v1b = (real4) (pos7.xyz-pos6.xyz, 0.0f);
real4 v2b = (real4) (pos7.xyz-pos8.xyz, 0.0f); real4 v2b = (real4) (pos7.xyz-pos8.xyz, 0.0f);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0b)
APPLY_PERIODIC_TO_DELTA(v1b)
APPLY_PERIODIC_TO_DELTA(v2b)
#endif
real4 cp0b = cross(v0b, v1b); real4 cp0b = cross(v0b, v1b);
real4 cp1b = cross(v1b, v2b); real4 cp1b = cross(v1b, v2b);
cosangle = dot(normalize(cp0b), normalize(cp1b)); cosangle = dot(normalize(cp0b), normalize(cp1b));
......
...@@ -30,6 +30,10 @@ ...@@ -30,6 +30,10 @@
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI); tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += -prefactor*erfAlphaR; tempEnergy += -prefactor*erfAlphaR;
} }
else {
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w;
}
} }
else { else {
#if HAS_LENNARD_JONES #if HAS_LENNARD_JONES
......
...@@ -70,8 +70,11 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global ...@@ -70,8 +70,11 @@ __kernel void computeGroupCenters(__global const real4* restrict posq, __global
/** /**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude. * Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/ */
real4 delta(real4 vec1, real4 vec2) { real4 delta(real4 vec1, real4 vec2, bool periodic, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0); real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
if (periodic)
APPLY_PERIODIC_TO_DELTA(result);
result.w = result.x*result.x + result.y*result.y + result.z*result.z; result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result; return result;
} }
...@@ -110,7 +113,7 @@ real4 computeCross(real4 vec1, real4 vec2) { ...@@ -110,7 +113,7 @@ real4 computeCross(real4 vec1, real4 vec2) {
* Compute the forces on groups based on the bonds. * Compute the forces on groups based on the bonds.
*/ */
__kernel void computeGroupForces(__global long* restrict groupForce, __global mixed* restrict energyBuffer, __global const real4* restrict centerPositions, __kernel void computeGroupForces(__global long* restrict groupForce, __global mixed* restrict energyBuffer, __global const real4* restrict centerPositions,
__global const int* restrict bondGroups __global const int* restrict bondGroups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
EXTRA_ARGS) { EXTRA_ARGS) {
mixed energy = 0; mixed energy = 0;
for (int index = get_global_id(0); index < NUM_BONDS; index += get_global_size(0)) { for (int index = get_global_id(0); index < NUM_BONDS; index += get_global_size(0)) {
......
/** /**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude. * Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/ */
real4 ccb_delta(real4 vec1, real4 vec2) { real4 ccb_delta(real4 vec1, real4 vec2, bool periodic, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0); real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
if (periodic)
APPLY_PERIODIC_TO_DELTA(result);
result.w = result.x*result.x + result.y*result.y + result.z*result.z; result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result; return result;
} }
......
...@@ -181,6 +181,8 @@ __kernel void computeN2Energy( ...@@ -181,6 +181,8 @@ __kernel void computeN2Energy(
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else #else
...@@ -204,42 +206,38 @@ __kernel void computeN2Energy( ...@@ -204,42 +206,38 @@ __kernel void computeN2Energy(
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS; SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
SYNC_WARPS; ushort2 tile = exclusionTiles[skipBase+tgx];
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos) else
currentSkipIndex++; skipTiles[get_local_id(0)] = end;
includeTile = (skipTiles[currentSkipIndex] != pos); skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) { if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
......
...@@ -201,6 +201,8 @@ __kernel void computeN2Energy( ...@@ -201,6 +201,8 @@ __kernel void computeN2Energy(
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0]; const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0)); int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0)); int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else #else
...@@ -220,35 +222,31 @@ __kernel void computeN2Energy( ...@@ -220,35 +222,31 @@ __kernel void computeN2Energy(
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
while (nextToSkip < pos) { while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) { if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++]; ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos); else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos);
#endif
if (includeTile) { if (includeTile) {
// Load the data for this tile. // Load the data for this tile.
......
...@@ -157,6 +157,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -157,6 +157,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else #else
...@@ -178,42 +180,38 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -178,42 +180,38 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS; SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
SYNC_WARPS; ushort2 tile = exclusionTiles[skipBase+tgx];
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos) else
currentSkipIndex++; skipTiles[get_local_id(0)] = end;
includeTile = (skipTiles[currentSkipIndex] != pos); skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) { if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
......
...@@ -170,6 +170,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -170,6 +170,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0]; const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0)); int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0)); int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else #else
...@@ -188,35 +190,31 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -188,35 +190,31 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
while (nextToSkip < pos) { while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) { if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++]; ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos); else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos);
#endif
if (includeTile) { if (includeTile) {
// Load the data for this tile. // Load the data for this tile.
......
...@@ -55,7 +55,7 @@ inline real4 computeCross(real4 vec1, real4 vec2) { ...@@ -55,7 +55,7 @@ inline real4 computeCross(real4 vec1, real4 vec2) {
/** /**
* Determine whether a particular interaction is in the list of exclusions. * Determine whether a particular interaction is in the list of exclusions.
*/ */
inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict exclusions, __global int* restrict exclusionStartIndex) { inline bool isInteractionExcluded(int atom1, int atom2, __global const int* restrict exclusions, __global const int* restrict exclusionStartIndex) {
int first = exclusionStartIndex[atom1]; int first = exclusionStartIndex[atom1];
int last = exclusionStartIndex[atom1+1]; int last = exclusionStartIndex[atom1+1];
for (int i = last-1; i >= first; i--) { for (int i = last-1; i >= first; i--) {
...@@ -174,7 +174,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea ...@@ -174,7 +174,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea
__global const real4* restrict posq, __global const real4* restrict blockCenter, __global const real4* restrict blockBoundingBox, __global int2* restrict neighborPairs, __global const real4* restrict posq, __global const real4* restrict blockCenter, __global const real4* restrict blockBoundingBox, __global int2* restrict neighborPairs,
__global int* restrict numNeighborPairs, __global int* restrict numNeighborsForAtom, int maxNeighborPairs __global int* restrict numNeighborPairs, __global int* restrict numNeighborsForAtom, int maxNeighborPairs
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
, __global int* restrict exclusions, __global int* restrict exclusionStartIndex , __global const int* restrict exclusions, __global const int* restrict exclusionStartIndex
#endif #endif
) { ) {
__local real4 positionCache[FIND_NEIGHBORS_WORKGROUP_SIZE]; __local real4 positionCache[FIND_NEIGHBORS_WORKGROUP_SIZE];
...@@ -264,7 +264,9 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea ...@@ -264,7 +264,9 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea
} }
} }
} }
numNeighborsForAtom[atom1] = totalNeighborsForAtom1; if (atom1 < NUM_ATOMS)
numNeighborsForAtom[atom1] = totalNeighborsForAtom1;
SYNC_WARPS;
} }
} }
...@@ -307,6 +309,7 @@ __kernel void computeNeighborStartIndices(__global int* restrict numNeighborsFor ...@@ -307,6 +309,7 @@ __kernel void computeNeighborStartIndices(__global int* restrict numNeighborsFor
numNeighborsForAtom[globalIndex] = 0; // Clear this so the next kernel can use it as a counter numNeighborsForAtom[globalIndex] = 0; // Clear this so the next kernel can use it as a counter
} }
globalOffset += posBuffer[get_local_size(0)-1]; globalOffset += posBuffer[get_local_size(0)-1];
barrier(CLK_LOCAL_MEM_FENCE);
} }
if (get_local_id(0) == 0) if (get_local_id(0) == 0)
neighborStartIndex[0] = 0; neighborStartIndex[0] = 0;
......
...@@ -169,6 +169,8 @@ __kernel void computeBornSum( ...@@ -169,6 +169,8 @@ __kernel void computeBornSum(
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else #else
...@@ -190,42 +192,38 @@ __kernel void computeBornSum( ...@@ -190,42 +192,38 @@ __kernel void computeBornSum(
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS; SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
SYNC_WARPS; ushort2 tile = exclusionTiles[skipBase+tgx];
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos) else
currentSkipIndex++; skipTiles[get_local_id(0)] = end;
includeTile = (skipTiles[currentSkipIndex] != pos); skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) { if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
...@@ -556,6 +554,8 @@ __kernel void computeGBSAForce1( ...@@ -556,6 +554,8 @@ __kernel void computeGBSAForce1(
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int pos = (int) (warp*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps); int end = (int) ((warp+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : (long)numTiles)/totalWarps);
#else #else
...@@ -577,42 +577,38 @@ __kernel void computeGBSAForce1( ...@@ -577,42 +577,38 @@ __kernel void computeGBSAForce1(
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS; SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
SYNC_WARPS; ushort2 tile = exclusionTiles[skipBase+tgx];
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos) else
currentSkipIndex++; skipTiles[get_local_id(0)] = end;
includeTile = (skipTiles[currentSkipIndex] != pos); skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) { if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
......
...@@ -178,6 +178,8 @@ __kernel void computeBornSum( ...@@ -178,6 +178,8 @@ __kernel void computeBornSum(
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0)); int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0)); int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else #else
...@@ -196,35 +198,31 @@ __kernel void computeBornSum( ...@@ -196,35 +198,31 @@ __kernel void computeBornSum(
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
while (nextToSkip < pos) { while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) { if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++]; ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos); else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos);
#endif
if (includeTile) { if (includeTile) {
// Load the data for this tile. // Load the data for this tile.
...@@ -593,6 +591,8 @@ __kernel void computeGBSAForce1( ...@@ -593,6 +591,8 @@ __kernel void computeGBSAForce1(
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0)); int pos = (int) (get_group_id(0)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0)); int end = (int) ((get_group_id(0)+1)*(numTiles > maxTiles ? NUM_BLOCKS*((long)NUM_BLOCKS+1)/2 : numTiles)/get_num_groups(0));
#else #else
...@@ -611,35 +611,31 @@ __kernel void computeGBSAForce1( ...@@ -611,35 +611,31 @@ __kernel void computeGBSAForce1(
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
while (nextToSkip < pos) { while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) { if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++]; ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos); else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos);
#endif
if (includeTile) { if (includeTile) {
// Load the data for this tile. // Load the data for this tile.
......
...@@ -200,6 +200,8 @@ __kernel void computeNonbonded( ...@@ -200,6 +200,8 @@ __kernel void computeNonbonded(
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0]; unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (numTiles > maxTiles ? startTileIndex+warp*(long)numTileIndices/totalWarps : warp*(long)numTiles/totalWarps); int pos = (int) (numTiles > maxTiles ? startTileIndex+warp*(long)numTileIndices/totalWarps : warp*(long)numTiles/totalWarps);
int end = (int) (numTiles > maxTiles ? startTileIndex+(warp+1)*(long)numTileIndices/totalWarps : (warp+1)*(long)numTiles/totalWarps); int end = (int) (numTiles > maxTiles ? startTileIndex+(warp+1)*(long)numTileIndices/totalWarps : (warp+1)*(long)numTiles/totalWarps);
#else #else
...@@ -223,42 +225,38 @@ __kernel void computeNonbonded( ...@@ -223,42 +225,38 @@ __kernel void computeNonbonded(
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) {
SYNC_WARPS; SYNC_WARPS;
while (skipTiles[tbx+TILE_SIZE-1] < pos) { if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) {
SYNC_WARPS; ushort2 tile = exclusionTiles[skipBase+tgx];
if (skipBase+tgx < NUM_TILES_WITH_EXCLUSIONS) { skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
ushort2 tile = exclusionTiles[skipBase+tgx];
skipTiles[get_local_id(0)] = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos) else
currentSkipIndex++; skipTiles[get_local_id(0)] = end;
includeTile = (skipTiles[currentSkipIndex] != pos); skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
} }
while (skipTiles[currentSkipIndex] < pos)
currentSkipIndex++;
includeTile = (skipTiles[currentSkipIndex] != pos);
#endif
if (includeTile) { if (includeTile) {
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
......
...@@ -214,6 +214,8 @@ __kernel void computeNonbonded( ...@@ -214,6 +214,8 @@ __kernel void computeNonbonded(
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0]; const unsigned int numTiles = interactionCount[0];
if (numTiles > maxTiles)
return; // There wasn't enough memory for the neighbor list.
int pos = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+get_group_id(0)*(long)numTileIndices/get_num_groups(0)) : get_group_id(0)*(long)numTiles/get_num_groups(0)); int pos = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+get_group_id(0)*(long)numTileIndices/get_num_groups(0)) : get_group_id(0)*(long)numTiles/get_num_groups(0));
int end = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+(get_group_id(0)+1)*(long)numTileIndices/get_num_groups(0)) : (get_group_id(0)+1)*(long)numTiles/get_num_groups(0)); int end = (int) (numTiles > maxTiles ? (unsigned int) (startTileIndex+(get_group_id(0)+1)*(long)numTileIndices/get_num_groups(0)) : (get_group_id(0)+1)*(long)numTiles/get_num_groups(0));
#else #else
...@@ -234,35 +236,31 @@ __kernel void computeNonbonded( ...@@ -234,35 +236,31 @@ __kernel void computeNonbonded(
int x, y; int x, y;
bool singlePeriodicCopy = false; bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { x = tiles[pos];
x = tiles[pos]; real4 blockSizeX = blockSize[x];
real4 blockSizeX = blockSize[x]; singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF && 0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF); #else
} y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
else x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
#endif if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
{ y += (x < y ? -1 : 1);
y = (int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2); x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error. }
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
// Skip over tiles that have exclusions, since they were already processed. // Skip over tiles that have exclusions, since they were already processed.
while (nextToSkip < pos) { while (nextToSkip < pos) {
if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) { if (currentSkipIndex < NUM_TILES_WITH_EXCLUSIONS) {
ushort2 tile = exclusionTiles[currentSkipIndex++]; ushort2 tile = exclusionTiles[currentSkipIndex++];
nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2; nextToSkip = tile.x + tile.y*NUM_BLOCKS - tile.y*(tile.y+1)/2;
}
else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos); else
nextToSkip = end;
} }
includeTile = (nextToSkip != pos);
#endif
if (includeTile) { if (includeTile) {
// Load the data for this tile. // Load the data for this tile.
......
__kernel void updateBsplines(__global const real4* restrict posq, __global real4* restrict pmeBsplineTheta, __local real4* restrict bsplinesCache, __kernel void updateBsplines(__global const real4* restrict posq, __global real4* restrict pmeBsplineTheta, __local real4* restrict bsplinesCache,
__global int2* restrict pmeAtomGridIndex, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { __global int2* restrict pmeAtomGridIndex, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const real4 scale = 1/(real) (PME_ORDER-1); const real4 scale = 1/(real) (PME_ORDER-1);
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) { for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
__local real4* data = &bsplinesCache[get_local_id(0)*PME_ORDER]; __local real4* data = &bsplinesCache[get_local_id(0)*PME_ORDER];
real4 pos = posq[i]; real4 pos = posq[i];
APPLY_PERIODIC_TO_POS(pos)
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
...@@ -83,8 +85,9 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co ...@@ -83,8 +85,9 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange, __kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global long* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { __global long* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize,
const real4 scale = 1/(real) (PME_ORDER-1); real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const real scale = 1/(real) (PME_ORDER-1);
real4 data[PME_ORDER]; real4 data[PME_ORDER];
// Process the atoms in spatially sorted order. This improves efficiency when writing // Process the atoms in spatially sorted order. This improves efficiency when writing
...@@ -93,9 +96,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -93,9 +96,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) { for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
int atom = pmeAtomGridIndex[i].x; int atom = pmeAtomGridIndex[i].x;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
...@@ -118,7 +119,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -118,7 +119,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
data[j-1] = div*dr*data[j-2]; data[j-1] = div*dr*data[j-2];
for (int k = 1; k < (j-1); k++) for (int k = 1; k < (j-1); k++)
data[j-k-1] = div*((dr+(real4) k) *data[j-k-2] + (-dr+(real4) (j-k))*data[j-k-1]); data[j-k-1] = div*((dr+(real4) k) *data[j-k-2] + (-dr+(real4) (j-k))*data[j-k-1]);
data[0] = div*(- dr+1.0f)*data[0]; data[0] = div*(-dr+1.0f)*data[0];
} }
data[PME_ORDER-1] = scale*dr*data[PME_ORDER-2]; data[PME_ORDER-1] = scale*dr*data[PME_ORDER-2];
for (int j = 1; j < (PME_ORDER-1); j++) for (int j = 1; j < (PME_ORDER-1); j++)
...@@ -165,7 +166,8 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea ...@@ -165,7 +166,8 @@ __kernel void finishSpreadCharge(__global long* restrict fixedGrid, __global rea
} }
#elif defined(DEVICE_IS_CPU) #elif defined(DEVICE_IS_CPU)
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange, __kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) { __global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const int firstx = get_global_id(0)*GRID_SIZE_X/get_global_size(0); const int firstx = get_global_id(0)*GRID_SIZE_X/get_global_size(0);
const int lastx = (get_global_id(0)+1)*GRID_SIZE_X/get_global_size(0); const int lastx = (get_global_id(0)+1)*GRID_SIZE_X/get_global_size(0);
if (firstx == lastx) if (firstx == lastx)
...@@ -179,9 +181,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con ...@@ -179,9 +181,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
for (int i = 0; i < NUM_ATOMS; i++) { for (int i = 0; i < NUM_ATOMS; i++) {
int atom = i;//pmeAtomGridIndex[i].x; int atom = i;//pmeAtomGridIndex[i].x;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
...@@ -362,12 +362,17 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe ...@@ -362,12 +362,17 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
energy += eterm*(grid.x*grid.x + grid.y*grid.y); energy += eterm*(grid.x*grid.x + grid.y*grid.y);
} }
} }
#ifdef USE_PME_STREAM
energyBuffer[get_global_id(0)] = 0.5f*energy; energyBuffer[get_global_id(0)] = 0.5f*energy;
#else
energyBuffer[get_global_id(0)] += 0.5f*energy;
#endif
} }
__kernel void gridInterpolateForce(__global const real4* restrict posq, __global real4* restrict forceBuffers, __global const real* restrict pmeGrid, __kernel void gridInterpolateForce(__global const real4* restrict posq, __global real4* restrict forceBuffers, __global const real* restrict pmeGrid,
real4 periodicBoxSize, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ, __global int2* restrict pmeAtomGridIndex) { real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real4 recipBoxVecX,
const real4 scale = 1/(real) (PME_ORDER-1); real4 recipBoxVecY, real4 recipBoxVecZ, __global int2* restrict pmeAtomGridIndex) {
const real scale = 1/(real) (PME_ORDER-1);
real4 data[PME_ORDER]; real4 data[PME_ORDER];
real4 ddata[PME_ORDER]; real4 ddata[PME_ORDER];
...@@ -378,9 +383,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global ...@@ -378,9 +383,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
int atom = pmeAtomGridIndex[i].x; int atom = pmeAtomGridIndex[i].x;
real4 force = 0.0f; real4 force = 0.0f;
real4 pos = posq[atom]; real4 pos = posq[atom];
pos.x -= floor(pos.x*recipBoxVecX.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*recipBoxVecY.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*recipBoxVecZ.z)*periodicBoxSize.z;
real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x, real3 t = (real3) (pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y, pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
pos.z*recipBoxVecZ.z); pos.z*recipBoxVecZ.z);
...@@ -403,7 +406,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global ...@@ -403,7 +406,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
data[j-1] = div*dr*data[j-2]; data[j-1] = div*dr*data[j-2];
for (int k = 1; k < (j-1); k++) for (int k = 1; k < (j-1); k++)
data[j-k-1] = div*((dr+(real4) k) *data[j-k-2] + (-dr+(real4) (j-k))*data[j-k-1]); data[j-k-1] = div*((dr+(real4) k) *data[j-k-2] + (-dr+(real4) (j-k))*data[j-k-1]);
data[0] = div*(- dr+1.0f)*data[0]; data[0] = div*(-dr+1.0f)*data[0];
} }
ddata[0] = -data[0]; ddata[0] = -data[0];
for (int j = 1; j < PME_ORDER; j++) for (int j = 1; j < PME_ORDER; j++)
......
...@@ -2,6 +2,11 @@ const real PI = 3.14159265358979323846f; ...@@ -2,6 +2,11 @@ const real PI = 3.14159265358979323846f;
real4 v0 = (real4) (pos1.xyz-pos2.xyz, 0.0f); real4 v0 = (real4) (pos1.xyz-pos2.xyz, 0.0f);
real4 v1 = (real4) (pos3.xyz-pos2.xyz, 0.0f); real4 v1 = (real4) (pos3.xyz-pos2.xyz, 0.0f);
real4 v2 = (real4) (pos3.xyz-pos4.xyz, 0.0f); real4 v2 = (real4) (pos3.xyz-pos4.xyz, 0.0f);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0)
APPLY_PERIODIC_TO_DELTA(v1)
APPLY_PERIODIC_TO_DELTA(v2)
#endif
real4 cp0 = cross(v0, v1); real4 cp0 = cross(v0, v1);
real4 cp1 = cross(v1, v2); real4 cp1 = cross(v1, v2);
real cosangle = dot(normalize(cp0), normalize(cp1)); real cosangle = dot(normalize(cp0), normalize(cp1));
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2015 Stanford University and the Authors. * * Portions copyright (c) 2015-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -39,9 +39,9 @@ OpenMM::OpenCLPlatform platform; ...@@ -39,9 +39,9 @@ OpenMM::OpenCLPlatform platform;
void initializeTests(int argc, char* argv[]) { void initializeTests(int argc, char* argv[]) {
if (argc > 1) if (argc > 1)
platform.setPropertyDefaultValue("OpenCLPrecision", std::string(argv[1])); platform.setPropertyDefaultValue("Precision", std::string(argv[1]));
if (argc > 2) if (argc > 2)
platform.setPropertyDefaultValue("OpenCLPlatformIndex", std::string(argv[2])); platform.setPropertyDefaultValue("OpenCLPlatformIndex", std::string(argv[2]));
if (argc > 3) if (argc > 3)
platform.setPropertyDefaultValue("OpenCLDeviceIndex", std::string(argv[3])); platform.setPropertyDefaultValue("DeviceIndex", std::string(argv[3]));
} }
...@@ -120,6 +120,15 @@ void testCheckpoint() { ...@@ -120,6 +120,15 @@ void testCheckpoint() {
integrator2.step(10); integrator2.step(10);
State s8 = context2.getState(State::Positions | State::Velocities | State::Parameters); State s8 = context2.getState(State::Positions | State::Velocities | State::Parameters);
compareStates(s6, s8); compareStates(s6, s8);
// See if a checkpoint created from one Context can be loaded into a different one.
VerletIntegrator integrator3(0.001);
Context context3(system, integrator3, platform);
stream1.seekg(0, stream1.beg);
context3.loadCheckpoint(stream1);
State s9 = context3.getState(State::Positions | State::Velocities | State::Parameters | State::Energy);
compareStates(s1, s9);
} }
void runPlatformTests() { void runPlatformTests() {
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2011-2015 Stanford University and the Authors. * * Portions copyright (c) 2011-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -54,7 +54,7 @@ template <class Real2> ...@@ -54,7 +54,7 @@ template <class Real2>
void testTransform(bool realToComplex, int xsize, int ysize, int zsize) { void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
System system; System system;
system.addParticle(0.0); system.addParticle(0.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false"); OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1);
OpenCLContext& context = *platformData.contexts[0]; OpenCLContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. * * Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -54,7 +54,7 @@ void testGaussian() { ...@@ -54,7 +54,7 @@ void testGaussian() {
System system; System system;
for (int i = 0; i < numAtoms; i++) for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0); system.addParticle(1.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false"); OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1);
OpenCLContext& context = *platformData.contexts[0]; OpenCLContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0); context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. * * Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) { ...@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
System system; System system;
system.addParticle(0.0); system.addParticle(0.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false"); OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1);
OpenCLContext& context = *platformData.contexts[0]; OpenCLContext& context = *platformData.contexts[0];
context.initialize(); context.initialize();
OpenCLArray data(context, array.size(), sizeof(float), "sortData"); OpenCLArray data(context, array.size(), sizeof(float), "sortData");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment