Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
85669a54
Commit
85669a54
authored
Apr 01, 2015
by
peastman
Browse files
More fixes to OpenCL with CPU device
parent
dffbb5e4
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
18 deletions
+19
-18
platforms/opencl/src/kernels/customManyParticle.cl
platforms/opencl/src/kernels/customManyParticle.cl
+3
-1
platforms/opencl/src/kernels/gbsaObc_cpu.cl
platforms/opencl/src/kernels/gbsaObc_cpu.cl
+16
-17
No files found.
platforms/opencl/src/kernels/customManyParticle.cl
View file @
85669a54
...
@@ -227,7 +227,9 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea
...
@@ -227,7 +227,9 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea
int
start
=
block2*TILE_SIZE
;
int
start
=
block2*TILE_SIZE
;
int
included[TILE_SIZE]
;
int
included[TILE_SIZE]
;
int
numIncluded
=
0
;
int
numIncluded
=
0
;
SYNC_WARPS
;
positionCache[get_local_id
(
0
)
]
=
posq[start+indexInWarp]
;
positionCache[get_local_id
(
0
)
]
=
posq[start+indexInWarp]
;
SYNC_WARPS
;
if
(
atom1
<
NUM_ATOMS
)
{
if
(
atom1
<
NUM_ATOMS
)
{
for
(
int
j
=
0
; j < 32; j++) {
for
(
int
j
=
0
; j < 32; j++) {
int
atom2
=
start+j
;
int
atom2
=
start+j
;
...
@@ -287,7 +289,7 @@ __kernel void computeNeighborStartIndices(__global int* restrict numNeighborsFor
...
@@ -287,7 +289,7 @@ __kernel void computeNeighborStartIndices(__global int* restrict numNeighborsFor
unsigned
int
globalIndex
=
startAtom+get_local_id
(
0
)
;
unsigned
int
globalIndex
=
startAtom+get_local_id
(
0
)
;
posBuffer[get_local_id
(
0
)
]
=
(
globalIndex
<
NUM_ATOMS
?
numNeighborsForAtom[globalIndex]
:
0
)
;
posBuffer[get_local_id
(
0
)
]
=
(
globalIndex
<
NUM_ATOMS
?
numNeighborsForAtom[globalIndex]
:
0
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Perform
a
parallel
prefix
sum.
//
Perform
a
parallel
prefix
sum.
...
...
platforms/opencl/src/kernels/gbsaObc_cpu.cl
View file @
85669a54
...
@@ -20,8 +20,9 @@ __kernel void computeBornSum(
...
@@ -20,8 +20,9 @@ __kernel void computeBornSum(
#
endif
#
endif
__global
const
real4*
restrict
posq,
__global
const
float2*
restrict
global_params,
__global
const
real4*
restrict
posq,
__global
const
float2*
restrict
global_params,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms,
#
else
#
else
unsigned
int
numTiles,
unsigned
int
numTiles,
#
endif
#
endif
...
@@ -62,7 +63,7 @@ __kernel void computeBornSum(
...
@@ -62,7 +63,7 @@ __kernel void computeBornSum(
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -111,7 +112,7 @@ __kernel void computeBornSum(
...
@@ -111,7 +112,7 @@ __kernel void computeBornSum(
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -253,14 +254,13 @@ __kernel void computeBornSum(
...
@@ -253,14 +254,13 @@ __kernel void computeBornSum(
real4 blockCenterX = blockCenter[x];
real4 blockCenterX = blockCenter[x];
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
localData[tgx].x -= floor((localData[tgx].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[tgx], blockCenterX)
localData[tgx].y -= floor((localData[tgx].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[tgx].z -= floor((localData[tgx].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
}
}
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
unsigned int atom1 = x*TILE_SIZE+tgx;
unsigned int atom1 = x*TILE_SIZE+tgx;
real bornSum = 0;
real bornSum = 0;
real4 posq1 = posq[atom1];
real4 posq1 = posq[atom1];
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
float2 params1 = global_params[atom1];
float2 params1 = global_params[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
...
@@ -321,7 +321,7 @@ __kernel void computeBornSum(
...
@@ -321,7 +321,7 @@ __kernel void computeBornSum(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
int atom2 = atomIndices[j];
int atom2 = atomIndices[j];
...
@@ -411,8 +411,9 @@ __kernel void computeGBSAForce1(
...
@@ -411,8 +411,9 @@ __kernel void computeGBSAForce1(
#endif
#endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else
#else
unsigned int numTiles,
unsigned int numTiles,
#endif
#endif
...
@@ -452,7 +453,7 @@ __kernel void computeGBSAForce1(
...
@@ -452,7 +453,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -516,7 +517,7 @@ __kernel void computeGBSAForce1(
...
@@ -516,7 +517,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -669,15 +670,13 @@ __kernel void computeGBSAForce1(
...
@@ -669,15 +670,13 @@ __kernel void computeGBSAForce1(
real4
blockCenterX
=
blockCenter[x]
;
real4
blockCenterX
=
blockCenter[x]
;
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
localData[tgx].x
-=
floor
((
localData[tgx].x-blockCenterX.x
)
*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
localData[tgx],
blockCenterX
)
localData[tgx].y
-=
floor
((
localData[tgx].y-blockCenterX.y
)
*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
localData[tgx].z
-=
floor
((
localData[tgx].z-blockCenterX.z
)
*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
}
}
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
real4
force
=
0
;
real4
force
=
0
;
real4
posq1
=
posq[atom1]
;
real4
posq1
=
posq[atom1]
;
posq1.xyz
-=
floor
(
(
posq1
.xyz-
blockCenterX
.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1
,
blockCenterX
)
float
bornRadius1
=
global_bornRadii[atom1]
;
float
bornRadius1
=
global_bornRadii[atom1]
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
...
@@ -740,7 +739,7 @@ __kernel void computeGBSAForce1(
...
@@ -740,7 +739,7 @@ __kernel void computeGBSAForce1(
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
int
atom2
=
atomIndices[j]
;
int
atom2
=
atomIndices[j]
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment