Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
7b3a8266
"...src/SimTKReference/ReferenceProperDihedralBond.cpp" did not exist on "76e2849ccf0aea4dd118a77e8d7d7e66b1107ab0"
Commit
7b3a8266
authored
Apr 28, 2010
by
Peter Eastman
Browse files
Reduced bank conflicts in nonbonded kernel
parent
cd6cb66a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
50 additions
and
22 deletions
+50
-22
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+1
-1
platforms/opencl/src/kernels/nonbonded_default.cl
platforms/opencl/src/kernels/nonbonded_default.cl
+22
-9
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+27
-12
No files found.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
7b3a8266
...
@@ -282,7 +282,7 @@ void OpenCLNonbondedUtilities::computeInteractions() {
...
@@ -282,7 +282,7 @@ void OpenCLNonbondedUtilities::computeInteractions() {
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
)
const
{
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
)
const
{
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
int
localDataSize
=
2
*
sizeof
(
cl_float
4
);
int
localDataSize
=
7
*
sizeof
(
cl_float
);
stringstream
localData
;
stringstream
localData
;
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
localData
<<
params
[
i
].
getType
()
<<
" "
<<
params
[
i
].
getName
()
<<
";
\n
"
;
localData
<<
params
[
i
].
getType
()
<<
" "
<<
params
[
i
].
getName
()
<<
";
\n
"
;
...
...
platforms/opencl/src/kernels/nonbonded_default.cl
View file @
7b3a8266
#
define
TILE_SIZE
32
#
define
TILE_SIZE
32
typedef
struct
{
typedef
struct
{
float4
posq
;
float
x,
y,
z
;
float4
force
;
float
q
;
float
fx,
fy,
fz
;
ATOM_PARAMETER_DATA
ATOM_PARAMETER_DATA
}
AtomData
;
}
AtomData
;
...
@@ -42,7 +43,10 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -42,7 +43,10 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
(
x
==
y
)
{
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
//
This
tile
is
on
the
diagonal.
localData[get_local_id
(
0
)
].posq
=
posq1
;
localData[get_local_id
(
0
)
].x
=
posq1.x
;
localData[get_local_id
(
0
)
].y
=
posq1.y
;
localData[get_local_id
(
0
)
].z
=
posq1.z
;
localData[get_local_id
(
0
)
].q
=
posq1.w
;
LOAD_LOCAL_PARAMETERS_FROM_1
LOAD_LOCAL_PARAMETERS_FROM_1
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
xi
=
x/TILE_SIZE
;
...
@@ -55,7 +59,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -55,7 +59,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
bool
isExcluded
=
!
(
excl
&
0x1
)
;
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
#
endif
int
atom2
=
baseLocalAtom+j
;
int
atom2
=
baseLocalAtom+j
;
float4
posq2
=
localData[atom2].
posq
;
float4
posq2
=
(
float4
)
(
localData[atom2].
x,
localData[atom2].y,
localData[atom2].z,
localData[atom2].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
@@ -95,10 +99,16 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -95,10 +99,16 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
(
lasty
!=
y
&&
get_local_id
(
0
)
<
TILE_SIZE
)
{
if
(
lasty
!=
y
&&
get_local_id
(
0
)
<
TILE_SIZE
)
{
unsigned
int
j
=
y
+
tgx
;
unsigned
int
j
=
y
+
tgx
;
localData[get_local_id
(
0
)
].posq
=
posq[j]
;
float4
tempPosq
=
posq[j]
;
localData[get_local_id
(
0
)
].x
=
tempPosq.x
;
localData[get_local_id
(
0
)
].y
=
tempPosq.y
;
localData[get_local_id
(
0
)
].z
=
tempPosq.z
;
localData[get_local_id
(
0
)
].q
=
tempPosq.w
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
}
localData[get_local_id
(
0
)
].force
=
0.0f
;
localData[get_local_id
(
0
)
].fx
=
0.0f
;
localData[get_local_id
(
0
)
].fy
=
0.0f
;
localData[get_local_id
(
0
)
].fz
=
0.0f
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Compute
the
full
set
of
interactions
in
this
tile.
//
Compute
the
full
set
of
interactions
in
this
tile.
...
@@ -117,7 +127,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -117,7 +127,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
bool
isExcluded
=
!
(
excl
&
0x1
)
;
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
#
endif
int
atom2
=
baseLocalAtom+tj
;
int
atom2
=
baseLocalAtom+tj
;
float4
posq2
=
localData[atom2].
posq
;
float4
posq2
=
(
float4
)
(
localData[atom2].
x,
localData[atom2].y,
localData[atom2].z,
localData[atom2].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
@@ -135,7 +145,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -135,7 +145,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
force.xyz
-=
delta.xyz
;
localData[baseLocalAtom+tj+forceBufferOffset].force.xyz
+=
delta.xyz
;
localData[baseLocalAtom+tj+forceBufferOffset].fx
+=
delta.x
;
localData[baseLocalAtom+tj+forceBufferOffset].fy
+=
delta.y
;
localData[baseLocalAtom+tj+forceBufferOffset].fz
+=
delta.z
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
excl
>>=
1
;
excl
>>=
1
;
tj
=
(
tj+1
)
%
(
TILE_SIZE/2
)
;
tj
=
(
tj+1
)
%
(
TILE_SIZE/2
)
;
...
@@ -155,7 +167,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -155,7 +167,8 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
unsigned
int
offset2
=
y
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
#
endif
#
endif
forceBuffers[offset1].xyz
=
forceBuffers[offset1].xyz+force.xyz+tempBuffer[get_local_id
(
0
)
+TILE_SIZE].xyz
;
forceBuffers[offset1].xyz
=
forceBuffers[offset1].xyz+force.xyz+tempBuffer[get_local_id
(
0
)
+TILE_SIZE].xyz
;
forceBuffers[offset2].xyz
=
forceBuffers[offset2].xyz+localData[get_local_id
(
0
)
].force.xyz+localData[get_local_id
(
0
)
+TILE_SIZE].force.xyz
;
float4
sum
=
(
float4
)
(
localData[get_local_id
(
0
)
].fx+localData[get_local_id
(
0
)
+TILE_SIZE].fx,
localData[get_local_id
(
0
)
].fy+localData[get_local_id
(
0
)
+TILE_SIZE].fy,
localData[get_local_id
(
0
)
].fz+localData[get_local_id
(
0
)
+TILE_SIZE].fz,
0.0f
)
;
forceBuffers[offset2].xyz
=
forceBuffers[offset2].xyz+sum.xyz
;
}
}
lasty
=
y
;
lasty
=
y
;
}
}
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
7b3a8266
#
define
TILE_SIZE
32
#
define
TILE_SIZE
32
typedef
struct
{
typedef
struct
{
float4
posq
;
float
x,
y,
z
;
float4
force
;
float
q
;
float
fx,
fy,
fz
;
ATOM_PARAMETER_DATA
ATOM_PARAMETER_DATA
}
AtomData
;
}
AtomData
;
...
@@ -43,7 +44,10 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -43,7 +44,10 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
(
x
==
y
)
{
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
//
This
tile
is
on
the
diagonal.
localData[get_local_id
(
0
)
].posq
=
posq1
;
localData[get_local_id
(
0
)
].x
=
posq1.x
;
localData[get_local_id
(
0
)
].y
=
posq1.y
;
localData[get_local_id
(
0
)
].z
=
posq1.z
;
localData[get_local_id
(
0
)
].q
=
posq1.w
;
LOAD_LOCAL_PARAMETERS_FROM_1
LOAD_LOCAL_PARAMETERS_FROM_1
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
tile
=
xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*
(
xi+1
)
/2
;
unsigned
int
tile
=
xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*
(
xi+1
)
/2
;
...
@@ -55,7 +59,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -55,7 +59,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
bool
isExcluded
=
!
(
excl
&
0x1
)
;
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
#
endif
int
atom2
=
tbx+j
;
int
atom2
=
tbx+j
;
float4
posq2
=
localData[atom2].
posq
;
float4
posq2
=
(
float4
)
(
localData[atom2].
x,
localData[atom2].y,
localData[atom2].z,
localData[atom2].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
@@ -89,10 +93,16 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -89,10 +93,16 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
(
lasty
!=
y
)
{
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y
+
tgx
;
unsigned
int
j
=
y
+
tgx
;
localData[get_local_id
(
0
)
].posq
=
posq[j]
;
float4
tempPosq
=
posq[j]
;
localData[get_local_id
(
0
)
].x
=
tempPosq.x
;
localData[get_local_id
(
0
)
].y
=
tempPosq.y
;
localData[get_local_id
(
0
)
].z
=
tempPosq.z
;
localData[get_local_id
(
0
)
].q
=
tempPosq.w
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
}
localData[get_local_id
(
0
)
].force
=
0.0f
;
localData[get_local_id
(
0
)
].fx
=
0.0f
;
localData[get_local_id
(
0
)
].fy
=
0.0f
;
localData[get_local_id
(
0
)
].fz
=
0.0f
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
unsigned
int
flags
=
interactionFlags[pos]
;
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
&&
flags
==
0
)
{
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
&&
flags
==
0
)
{
...
@@ -106,7 +116,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -106,7 +116,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
((
flags&
(
1<<j
))
!=
0
)
{
if
((
flags&
(
1<<j
))
!=
0
)
{
bool
isExcluded
=
false
;
bool
isExcluded
=
false
;
int
atom2
=
tbx+j
;
int
atom2
=
tbx+j
;
float4
posq2
=
localData[atom2].
posq
;
float4
posq2
=
(
float4
)
(
localData[atom2].
x,
localData[atom2].y,
localData[atom2].z,
localData[atom2].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
@@ -136,8 +146,11 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -136,8 +146,11 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
tempBuffer[get_local_id
(
0
)
].xyz
+=
tempBuffer[get_local_id
(
0
)
+4].xyz
;
tempBuffer[get_local_id
(
0
)
].xyz
+=
tempBuffer[get_local_id
(
0
)
+4].xyz
;
if
(
tgx
%
16
==
0
)
if
(
tgx
%
16
==
0
)
tempBuffer[get_local_id
(
0
)
].xyz
+=
tempBuffer[get_local_id
(
0
)
+8].xyz
;
tempBuffer[get_local_id
(
0
)
].xyz
+=
tempBuffer[get_local_id
(
0
)
+8].xyz
;
if
(
tgx
==
0
)
if
(
tgx
==
0
)
{
localData[tbx+j].force.xyz
+=
tempBuffer[get_local_id
(
0
)
].xyz
+
tempBuffer[get_local_id
(
0
)
+16].xyz
;
localData[tbx+j].fx
+=
tempBuffer[get_local_id
(
0
)
].x
+
tempBuffer[get_local_id
(
0
)
+16].x
;
localData[tbx+j].fy
+=
tempBuffer[get_local_id
(
0
)
].y
+
tempBuffer[get_local_id
(
0
)
+16].y
;
localData[tbx+j].fz
+=
tempBuffer[get_local_id
(
0
)
].z
+
tempBuffer[get_local_id
(
0
)
+16].z
;
}
}
}
}
}
}
}
...
@@ -160,7 +173,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -160,7 +173,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
bool
isExcluded
=
!
(
excl
&
0x1
)
;
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
#
endif
int
atom2
=
tbx+tj
;
int
atom2
=
tbx+tj
;
float4
posq2
=
localData[atom2].
posq
;
float4
posq2
=
(
float4
)
(
localData[atom2].
x,
localData[atom2].y,
localData[atom2].z,
localData[atom2].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
@@ -178,7 +191,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -178,7 +191,9 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
force.xyz
-=
delta.xyz
;
localData[tbx+tj].force.xyz
+=
delta.xyz
;
localData[tbx+tj].fx
+=
delta.x
;
localData[tbx+tj].fy
+=
delta.y
;
localData[tbx+tj].fz
+=
delta.z
;
excl
>>=
1
;
excl
>>=
1
;
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
)
;
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
)
;
}
}
...
@@ -193,7 +208,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
...
@@ -193,7 +208,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
unsigned
int
offset2
=
y
+
tgx
+
warp*PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
warp*PADDED_NUM_ATOMS
;
#
endif
#
endif
forceBuffers[offset1].xyz
+=
force.xyz
;
forceBuffers[offset1].xyz
+=
force.xyz
;
forceBuffers[offset2]
.xyz
+=
localData[get_local_id
(
0
)
].f
orce.xyz
;
forceBuffers[offset2]
+=
(
float4
)
(
localData[get_local_id
(
0
)
].f
x,
localData[get_local_id
(
0
)
].fy,
localData[get_local_id
(
0
)
].fz,
0.0f
)
;
lasty
=
y
;
lasty
=
y
;
}
}
pos++
;
pos++
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment