Commit 0f76088d authored by peastman's avatar peastman
Browse files

Merge pull request #866 from peastman/cpu

More fixes to OpenCL with CPU device
parents dffbb5e4 85669a54
...@@ -227,7 +227,9 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea ...@@ -227,7 +227,9 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea
int start = block2*TILE_SIZE; int start = block2*TILE_SIZE;
int included[TILE_SIZE]; int included[TILE_SIZE];
int numIncluded = 0; int numIncluded = 0;
SYNC_WARPS;
positionCache[get_local_id(0)] = posq[start+indexInWarp]; positionCache[get_local_id(0)] = posq[start+indexInWarp];
SYNC_WARPS;
if (atom1 < NUM_ATOMS) { if (atom1 < NUM_ATOMS) {
for (int j = 0; j < 32; j++) { for (int j = 0; j < 32; j++) {
int atom2 = start+j; int atom2 = start+j;
......
...@@ -21,7 +21,8 @@ __kernel void computeBornSum( ...@@ -21,7 +21,8 @@ __kernel void computeBornSum(
__global const real4* restrict posq, __global const float2* restrict global_params, __global const real4* restrict posq, __global const float2* restrict global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
#endif #endif
...@@ -62,7 +63,7 @@ __kernel void computeBornSum( ...@@ -62,7 +63,7 @@ __kernel void computeBornSum(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -111,7 +112,7 @@ __kernel void computeBornSum( ...@@ -111,7 +112,7 @@ __kernel void computeBornSum(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -253,14 +254,13 @@ __kernel void computeBornSum( ...@@ -253,14 +254,13 @@ __kernel void computeBornSum(
real4 blockCenterX = blockCenter[x]; real4 blockCenterX = blockCenter[x];
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) { for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
localData[tgx].x -= floor((localData[tgx].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[tgx], blockCenterX)
localData[tgx].y -= floor((localData[tgx].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[tgx].z -= floor((localData[tgx].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
} }
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) { for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
unsigned int atom1 = x*TILE_SIZE+tgx; unsigned int atom1 = x*TILE_SIZE+tgx;
real bornSum = 0; real bornSum = 0;
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
float2 params1 = global_params[atom1]; float2 params1 = global_params[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
...@@ -321,7 +321,7 @@ __kernel void computeBornSum( ...@@ -321,7 +321,7 @@ __kernel void computeBornSum(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
int atom2 = atomIndices[j]; int atom2 = atomIndices[j];
...@@ -412,7 +412,8 @@ __kernel void computeGBSAForce1( ...@@ -412,7 +412,8 @@ __kernel void computeGBSAForce1(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
#endif #endif
...@@ -452,7 +453,7 @@ __kernel void computeGBSAForce1( ...@@ -452,7 +453,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -516,7 +517,7 @@ __kernel void computeGBSAForce1( ...@@ -516,7 +517,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -669,15 +670,13 @@ __kernel void computeGBSAForce1( ...@@ -669,15 +670,13 @@ __kernel void computeGBSAForce1(
real4 blockCenterX = blockCenter[x]; real4 blockCenterX = blockCenter[x];
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) { for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
localData[tgx].x -= floor((localData[tgx].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[tgx], blockCenterX)
localData[tgx].y -= floor((localData[tgx].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[tgx].z -= floor((localData[tgx].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
} }
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) { for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
unsigned int atom1 = x*TILE_SIZE+tgx; unsigned int atom1 = x*TILE_SIZE+tgx;
real4 force = 0; real4 force = 0;
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
posq1.xyz -= floor((posq1.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
float bornRadius1 = global_bornRadii[atom1]; float bornRadius1 = global_bornRadii[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
...@@ -740,7 +739,7 @@ __kernel void computeGBSAForce1( ...@@ -740,7 +739,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
int atom2 = atomIndices[j]; int atom2 = atomIndices[j];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment