Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
0f76088d
Commit
0f76088d
authored
Apr 01, 2015
by
peastman
Browse files
Merge pull request #866 from peastman/cpu
More fixes to OpenCL with CPU device
parents
dffbb5e4
85669a54
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
18 deletions
+19
-18
platforms/opencl/src/kernels/customManyParticle.cl
platforms/opencl/src/kernels/customManyParticle.cl
+3
-1
platforms/opencl/src/kernels/gbsaObc_cpu.cl
platforms/opencl/src/kernels/gbsaObc_cpu.cl
+16
-17
No files found.
platforms/opencl/src/kernels/customManyParticle.cl
View file @
0f76088d
...
@@ -227,7 +227,9 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea
...
@@ -227,7 +227,9 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea
int
start
=
block2*TILE_SIZE
;
int
start
=
block2*TILE_SIZE
;
int
included[TILE_SIZE]
;
int
included[TILE_SIZE]
;
int
numIncluded
=
0
;
int
numIncluded
=
0
;
SYNC_WARPS
;
positionCache[get_local_id
(
0
)
]
=
posq[start+indexInWarp]
;
positionCache[get_local_id
(
0
)
]
=
posq[start+indexInWarp]
;
SYNC_WARPS
;
if
(
atom1
<
NUM_ATOMS
)
{
if
(
atom1
<
NUM_ATOMS
)
{
for
(
int
j
=
0
; j < 32; j++) {
for
(
int
j
=
0
; j < 32; j++) {
int
atom2
=
start+j
;
int
atom2
=
start+j
;
...
@@ -287,7 +289,7 @@ __kernel void computeNeighborStartIndices(__global int* restrict numNeighborsFor
...
@@ -287,7 +289,7 @@ __kernel void computeNeighborStartIndices(__global int* restrict numNeighborsFor
unsigned
int
globalIndex
=
startAtom+get_local_id
(
0
)
;
unsigned
int
globalIndex
=
startAtom+get_local_id
(
0
)
;
posBuffer[get_local_id
(
0
)
]
=
(
globalIndex
<
NUM_ATOMS
?
numNeighborsForAtom[globalIndex]
:
0
)
;
posBuffer[get_local_id
(
0
)
]
=
(
globalIndex
<
NUM_ATOMS
?
numNeighborsForAtom[globalIndex]
:
0
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Perform
a
parallel
prefix
sum.
//
Perform
a
parallel
prefix
sum.
...
...
platforms/opencl/src/kernels/gbsaObc_cpu.cl
View file @
0f76088d
...
@@ -20,8 +20,9 @@ __kernel void computeBornSum(
...
@@ -20,8 +20,9 @@ __kernel void computeBornSum(
#
endif
#
endif
__global
const
real4*
restrict
posq,
__global
const
float2*
restrict
global_params,
__global
const
real4*
restrict
posq,
__global
const
float2*
restrict
global_params,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms,
#
else
#
else
unsigned
int
numTiles,
unsigned
int
numTiles,
#
endif
#
endif
...
@@ -62,7 +63,7 @@ __kernel void computeBornSum(
...
@@ -62,7 +63,7 @@ __kernel void computeBornSum(
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -111,7 +112,7 @@ __kernel void computeBornSum(
...
@@ -111,7 +112,7 @@ __kernel void computeBornSum(
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -253,14 +254,13 @@ __kernel void computeBornSum(
...
@@ -253,14 +254,13 @@ __kernel void computeBornSum(
real4 blockCenterX = blockCenter[x];
real4 blockCenterX = blockCenter[x];
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
localData[tgx].x -= floor((localData[tgx].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[tgx], blockCenterX)
localData[tgx].y -= floor((localData[tgx].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[tgx].z -= floor((localData[tgx].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
}
}
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
unsigned int atom1 = x*TILE_SIZE+tgx;
unsigned int atom1 = x*TILE_SIZE+tgx;
real bornSum = 0;
real bornSum = 0;
real4 posq1 = posq[atom1];
real4 posq1 = posq[atom1];
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
float2 params1 = global_params[atom1];
float2 params1 = global_params[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
...
@@ -321,7 +321,7 @@ __kernel void computeBornSum(
...
@@ -321,7 +321,7 @@ __kernel void computeBornSum(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
int atom2 = atomIndices[j];
int atom2 = atomIndices[j];
...
@@ -411,8 +411,9 @@ __kernel void computeGBSAForce1(
...
@@ -411,8 +411,9 @@ __kernel void computeGBSAForce1(
#endif
#endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else
#else
unsigned int numTiles,
unsigned int numTiles,
#endif
#endif
...
@@ -452,7 +453,7 @@ __kernel void computeGBSAForce1(
...
@@ -452,7 +453,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -516,7 +517,7 @@ __kernel void computeGBSAForce1(
...
@@ -516,7 +517,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -669,15 +670,13 @@ __kernel void computeGBSAForce1(
...
@@ -669,15 +670,13 @@ __kernel void computeGBSAForce1(
real4
blockCenterX
=
blockCenter[x]
;
real4
blockCenterX
=
blockCenter[x]
;
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
localData[tgx].x
-=
floor
((
localData[tgx].x-blockCenterX.x
)
*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
localData[tgx],
blockCenterX
)
localData[tgx].y
-=
floor
((
localData[tgx].y-blockCenterX.y
)
*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
localData[tgx].z
-=
floor
((
localData[tgx].z-blockCenterX.z
)
*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
}
}
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
real4
force
=
0
;
real4
force
=
0
;
real4
posq1
=
posq[atom1]
;
real4
posq1
=
posq[atom1]
;
posq1.xyz
-=
floor
(
(
posq1
.xyz-
blockCenterX
.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1
,
blockCenterX
)
float
bornRadius1
=
global_bornRadii[atom1]
;
float
bornRadius1
=
global_bornRadii[atom1]
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
...
@@ -740,7 +739,7 @@ __kernel void computeGBSAForce1(
...
@@ -740,7 +739,7 @@ __kernel void computeGBSAForce1(
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
int
atom2
=
atomIndices[j]
;
int
atom2
=
atomIndices[j]
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment