Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
7b3a8266
Commit
7b3a8266
authored
Apr 28, 2010
by
Peter Eastman
Browse files
Reduced bank conflicts in nonbonded kernel
parent
cd6cb66a
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
50 additions
and
22 deletions
+50
-22
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+1
-1
platforms/opencl/src/kernels/nonbonded_default.cl
platforms/opencl/src/kernels/nonbonded_default.cl
+22
-9
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+27
-12
No files found.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
7b3a8266
...
...
@@ -282,7 +282,7 @@ void OpenCLNonbondedUtilities::computeInteractions() {
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
)
const
{
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
int
localDataSize
=
2
*
sizeof
(
cl_float
4
);
int
localDataSize
=
7
*
sizeof
(
cl_float
);
stringstream
localData
;
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
localData
<<
params
[
i
].
getType
()
<<
" "
<<
params
[
i
].
getName
()
<<
";
\n
"
;
...
...
platforms/opencl/src/kernels/nonbonded_default.cl
View file @
7b3a8266
#
define
TILE_SIZE
32
typedef
struct
{
float4
posq
;
float4
force
;
float
x,
y,
z
;
float
q
;
float
fx,
fy,
fz
;
ATOM_PARAMETER_DATA
}
AtomData
;
...
...
@@ -42,7 +43,10 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
localData[get_local_id
(
0
)
].posq
=
posq1
;
localData[get_local_id
(
0
)
].x
=
posq1.x
;
localData[get_local_id
(
0
)
].y
=
posq1.y
;
localData[get_local_id
(
0
)
].z
=
posq1.z
;
localData[get_local_id
(
0
)
].q
=
posq1.w
;
LOAD_LOCAL_PARAMETERS_FROM_1
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
unsigned
int
xi
=
x/TILE_SIZE
;
...
...
@@ -55,7 +59,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
baseLocalAtom+j
;
float4
posq2
=
localData[atom2].
posq
;
float4
posq2
=
(
float4
)
(
localData[atom2].
x,
localData[atom2].y,
localData[atom2].z,
localData[atom2].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -95,10 +99,16 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
(
lasty
!=
y
&&
get_local_id
(
0
)
<
TILE_SIZE
)
{
unsigned
int
j
=
y
+
tgx
;
localData[get_local_id
(
0
)
].posq
=
posq[j]
;
float4
tempPosq
=
posq[j]
;
localData[get_local_id
(
0
)
].x
=
tempPosq.x
;
localData[get_local_id
(
0
)
].y
=
tempPosq.y
;
localData[get_local_id
(
0
)
].z
=
tempPosq.z
;
localData[get_local_id
(
0
)
].q
=
tempPosq.w
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
localData[get_local_id
(
0
)
].force
=
0.0f
;
localData[get_local_id
(
0
)
].fx
=
0.0f
;
localData[get_local_id
(
0
)
].fy
=
0.0f
;
localData[get_local_id
(
0
)
].fz
=
0.0f
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Compute
the
full
set
of
interactions
in
this
tile.
...
...
@@ -117,7 +127,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
baseLocalAtom+tj
;
float4
posq2
=
localData[atom2].
posq
;
float4
posq2
=
(
float4
)
(
localData[atom2].
x,
localData[atom2].y,
localData[atom2].z,
localData[atom2].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -135,7 +145,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
localData[baseLocalAtom+tj+forceBufferOffset].force.xyz
+=
delta.xyz
;
localData[baseLocalAtom+tj+forceBufferOffset].fx
+=
delta.x
;
localData[baseLocalAtom+tj+forceBufferOffset].fy
+=
delta.y
;
localData[baseLocalAtom+tj+forceBufferOffset].fz
+=
delta.z
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
excl
>>=
1
;
tj
=
(
tj+1
)
%
(
TILE_SIZE/2
)
;
...
...
@@ -155,7 +167,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
unsigned
int
offset2
=
y
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
#
endif
forceBuffers[offset1].xyz
=
forceBuffers[offset1].xyz+force.xyz+tempBuffer[get_local_id
(
0
)
+TILE_SIZE].xyz
;
forceBuffers[offset2].xyz
=
forceBuffers[offset2].xyz+localData[get_local_id
(
0
)
].force.xyz+localData[get_local_id
(
0
)
+TILE_SIZE].force.xyz
;
float4
sum
=
(
float4
)
(
localData[get_local_id
(
0
)
].fx+localData[get_local_id
(
0
)
+TILE_SIZE].fx,
localData[get_local_id
(
0
)
].fy+localData[get_local_id
(
0
)
+TILE_SIZE].fy,
localData[get_local_id
(
0
)
].fz+localData[get_local_id
(
0
)
+TILE_SIZE].fz,
0.0f
)
;
forceBuffers[offset2].xyz
=
forceBuffers[offset2].xyz+sum.xyz
;
}
lasty
=
y
;
}
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
7b3a8266
#
define
TILE_SIZE
32
typedef
struct
{
float4
posq
;
float4
force
;
float
x,
y,
z
;
float
q
;
float
fx,
fy,
fz
;
ATOM_PARAMETER_DATA
}
AtomData
;
...
...
@@ -43,7 +44,10 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
localData[get_local_id
(
0
)
].posq
=
posq1
;
localData[get_local_id
(
0
)
].x
=
posq1.x
;
localData[get_local_id
(
0
)
].y
=
posq1.y
;
localData[get_local_id
(
0
)
].z
=
posq1.z
;
localData[get_local_id
(
0
)
].q
=
posq1.w
;
LOAD_LOCAL_PARAMETERS_FROM_1
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
tile
=
xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*
(
xi+1
)
/2
;
...
...
@@ -55,7 +59,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
tbx+j
;
float4
posq2
=
localData[atom2].
posq
;
float4
posq2
=
(
float4
)
(
localData[atom2].
x,
localData[atom2].y,
localData[atom2].z,
localData[atom2].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -89,10 +93,16 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y
+
tgx
;
localData[get_local_id
(
0
)
].posq
=
posq[j]
;
float4
tempPosq
=
posq[j]
;
localData[get_local_id
(
0
)
].x
=
tempPosq.x
;
localData[get_local_id
(
0
)
].y
=
tempPosq.y
;
localData[get_local_id
(
0
)
].z
=
tempPosq.z
;
localData[get_local_id
(
0
)
].q
=
tempPosq.w
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
localData[get_local_id
(
0
)
].force
=
0.0f
;
localData[get_local_id
(
0
)
].fx
=
0.0f
;
localData[get_local_id
(
0
)
].fy
=
0.0f
;
localData[get_local_id
(
0
)
].fz
=
0.0f
;
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
&&
flags
==
0
)
{
...
...
@@ -106,7 +116,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
((
flags&
(
1<<j
))
!=
0
)
{
bool
isExcluded
=
false
;
int
atom2
=
tbx+j
;
float4
posq2
=
localData[atom2].
posq
;
float4
posq2
=
(
float4
)
(
localData[atom2].
x,
localData[atom2].y,
localData[atom2].z,
localData[atom2].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -136,8 +146,11 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
tempBuffer[get_local_id
(
0
)
].xyz
+=
tempBuffer[get_local_id
(
0
)
+4].xyz
;
if
(
tgx
%
16
==
0
)
tempBuffer[get_local_id
(
0
)
].xyz
+=
tempBuffer[get_local_id
(
0
)
+8].xyz
;
if
(
tgx
==
0
)
localData[tbx+j].force.xyz
+=
tempBuffer[get_local_id
(
0
)
].xyz
+
tempBuffer[get_local_id
(
0
)
+16].xyz
;
if
(
tgx
==
0
)
{
localData[tbx+j].fx
+=
tempBuffer[get_local_id
(
0
)
].x
+
tempBuffer[get_local_id
(
0
)
+16].x
;
localData[tbx+j].fy
+=
tempBuffer[get_local_id
(
0
)
].y
+
tempBuffer[get_local_id
(
0
)
+16].y
;
localData[tbx+j].fz
+=
tempBuffer[get_local_id
(
0
)
].z
+
tempBuffer[get_local_id
(
0
)
+16].z
;
}
}
}
}
...
...
@@ -160,7 +173,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
tbx+tj
;
float4
posq2
=
localData[atom2].
posq
;
float4
posq2
=
(
float4
)
(
localData[atom2].
x,
localData[atom2].y,
localData[atom2].z,
localData[atom2].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -178,7 +191,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
localData[tbx+tj].force.xyz
+=
delta.xyz
;
localData[tbx+tj].fx
+=
delta.x
;
localData[tbx+tj].fy
+=
delta.y
;
localData[tbx+tj].fz
+=
delta.z
;
excl
>>=
1
;
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
)
;
}
...
...
@@ -193,7 +208,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
unsigned
int
offset2
=
y
+
tgx
+
warp*PADDED_NUM_ATOMS
;
#
endif
forceBuffers[offset1].xyz
+=
force.xyz
;
forceBuffers[offset2]
.xyz
+=
localData[get_local_id
(
0
)
].f
orce.xyz
;
forceBuffers[offset2]
+=
(
float4
)
(
localData[get_local_id
(
0
)
].f
x,
localData[get_local_id
(
0
)
].fy,
localData[get_local_id
(
0
)
].fz,
0.0f
)
;
lasty
=
y
;
}
pos++
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment