Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
cd6cb66a
Commit
cd6cb66a
authored
Apr 28, 2010
by
Peter Eastman
Browse files
Restructured local memory in nonbonded kernel to reduce bank conflicts
parent
466f7ac1
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
63 additions
and
48 deletions
+63
-48
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+32
-29
platforms/opencl/src/kernels/nonbonded_default.cl
platforms/opencl/src/kernels/nonbonded_default.cl
+14
-8
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+17
-11
No files found.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
cd6cb66a
...
...
@@ -282,16 +282,23 @@ void OpenCLNonbondedUtilities::computeInteractions() {
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
)
const
{
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
int
localDataSize
=
2
*
sizeof
(
cl_float4
);
stringstream
localData
;
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
localData
<<
params
[
i
].
getType
()
<<
" "
<<
params
[
i
].
getName
()
<<
";
\n
"
;
localDataSize
+=
params
[
i
].
getSize
();
}
if
((
localDataSize
/
4
)
%
2
==
0
)
{
localData
<<
"float padding;
\n
"
;
localDataSize
+=
4
;
}
replacements
[
"ATOM_PARAMETER_DATA"
]
=
localData
.
str
();
stringstream
args
;
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
args
<<
", __global "
;
args
<<
params
[
i
].
getType
();
args
<<
"* global_"
;
args
<<
params
[
i
].
getName
();
args
<<
", __local "
;
args
<<
params
[
i
].
getType
();
args
<<
"* local_"
;
args
<<
params
[
i
].
getName
();
}
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
{
if
(
arguments
[
i
].
getMemory
().
getInfo
<
CL_MEM_TYPE
>
()
==
CL_MEM_OBJECT_IMAGE2D
)
{
...
...
@@ -311,18 +318,18 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
stringstream
loadLocal1
;
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
loadLocal1
<<
"local
_
"
;
loadLocal1
<<
"local
Data[get_local_id(0)].
"
;
loadLocal1
<<
params
[
i
].
getName
();
loadLocal1
<<
"
[get_local_id(0)]
= "
;
loadLocal1
<<
" = "
;
loadLocal1
<<
params
[
i
].
getName
();
loadLocal1
<<
"1;
\n
"
;
}
replacements
[
"LOAD_LOCAL_PARAMETERS_FROM_1"
]
=
loadLocal1
.
str
();
stringstream
loadLocal2
;
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
loadLocal2
<<
"local
_
"
;
loadLocal2
<<
"local
Data[get_local_id(0)].
"
;
loadLocal2
<<
params
[
i
].
getName
();
loadLocal2
<<
"
[get_local_id(0)]
= global_"
;
loadLocal2
<<
" = global_"
;
loadLocal2
<<
params
[
i
].
getName
();
loadLocal2
<<
"[j];
\n
"
;
}
...
...
@@ -342,9 +349,9 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
load2j
<<
params
[
i
].
getType
();
load2j
<<
" "
;
load2j
<<
params
[
i
].
getName
();
load2j
<<
"2 = local
_
"
;
load2j
<<
"2 = local
Data[atom2].
"
;
load2j
<<
params
[
i
].
getName
();
load2j
<<
"
[atom2]
;
\n
"
;
load2j
<<
";
\n
"
;
}
replacements
[
"LOAD_ATOM2_PARAMETERS"
]
=
load2j
.
str
();
map
<
string
,
string
>
defines
;
...
...
@@ -374,32 +381,28 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
// Set arguments to the Kernel.
kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
context
.
getForceBuffers
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
1
,
context
.
getEnergyBuffer
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
3
,
exclusions
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
4
,
exclusionIndex
->
getDeviceBuffer
());
kernel
.
setArg
(
5
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
kernel
.
setArg
(
6
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
kernel
.
setArg
(
7
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
int
paramBase
=
10
;
int
index
=
0
;
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getForceBuffers
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getEnergyBuffer
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosq
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusions
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionIndex
->
getDeviceBuffer
());
kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
(
2
*
sizeof
(
cl_float4
)
+
localDataSize
),
NULL
);
kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
if
(
useCutoff
)
{
paramBase
=
11
;
kernel
.
setArg
<
cl
::
Buffer
>
(
8
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
9
,
interactionFlags
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
10
,
interactionCount
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionFlags
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
}
else
{
kernel
.
setArg
<
cl
::
Buffer
>
(
8
,
tiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl_uint
>
(
9
,
tiles
->
getSize
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
tiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl_uint
>
(
index
++
,
tiles
->
getSize
());
}
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
kernel
.
setArg
<
cl
::
Memory
>
(
i
*
2
+
paramBase
,
params
[
i
].
getMemory
());
kernel
.
setArg
(
i
*
2
+
paramBase
+
1
,
OpenCLContext
::
ThreadBlockSize
*
params
[
i
].
getSize
(),
NULL
);
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
params
[
i
].
getMemory
());
}
paramBase
+=
2
*
params
.
size
();
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
{
kernel
.
setArg
<
cl
::
Memory
>
(
i
+
paramBase
,
arguments
[
i
].
getMemory
());
kernel
.
setArg
<
cl
::
Memory
>
(
i
ndex
++
,
arguments
[
i
].
getMemory
());
}
return
kernel
;
}
platforms/opencl/src/kernels/nonbonded_default.cl
View file @
cd6cb66a
#
define
TILE_SIZE
32
typedef
struct
{
float4
posq
;
float4
force
;
ATOM_PARAMETER_DATA
}
AtomData
;
/**
*
Compute
nonbonded
interactions.
*/
__kernel
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__local
flo
at
4
*
local
_posq,
__local
float4*
local_force
,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
__global
unsigned
int*
exclusionIndices,
__local
AtomD
at
a
*
local
Data
,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount
#
else
...
...
@@ -36,7 +42,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
local
_posq
[get_local_id
(
0
)
]
=
posq1
;
local
Data
[get_local_id
(
0
)
]
.posq
=
posq1
;
LOAD_LOCAL_PARAMETERS_FROM_1
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
unsigned
int
xi
=
x/TILE_SIZE
;
...
...
@@ -49,7 +55,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
baseLocalAtom+j
;
float4
posq2
=
local
_posq
[atom2]
;
float4
posq2
=
local
Data
[atom2]
.posq
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -89,10 +95,10 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
(
lasty
!=
y
&&
get_local_id
(
0
)
<
TILE_SIZE
)
{
unsigned
int
j
=
y
+
tgx
;
local
_posq
[get_local_id
(
0
)
]
=
posq[j]
;
local
Data
[get_local_id
(
0
)
]
.posq
=
posq[j]
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
local
_force
[get_local_id
(
0
)
]
=
0.0f
;
local
Data
[get_local_id
(
0
)
]
.force
=
0.0f
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Compute
the
full
set
of
interactions
in
this
tile.
...
...
@@ -111,7 +117,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
baseLocalAtom+tj
;
float4
posq2
=
local
_posq
[atom2]
;
float4
posq2
=
local
Data
[atom2]
.posq
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -129,7 +135,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
local
_force
[baseLocalAtom+tj+forceBufferOffset].xyz
+=
delta.xyz
;
local
Data
[baseLocalAtom+tj+forceBufferOffset].
force.
xyz
+=
delta.xyz
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
excl
>>=
1
;
tj
=
(
tj+1
)
%
(
TILE_SIZE/2
)
;
...
...
@@ -149,7 +155,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
unsigned
int
offset2
=
y
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
#
endif
forceBuffers[offset1].xyz
=
forceBuffers[offset1].xyz+force.xyz+tempBuffer[get_local_id
(
0
)
+TILE_SIZE].xyz
;
forceBuffers[offset2].xyz
=
forceBuffers[offset2].xyz+local
_force
[get_local_id
(
0
)
].xyz+local
_force
[get_local_id
(
0
)
+TILE_SIZE].xyz
;
forceBuffers[offset2].xyz
=
forceBuffers[offset2].xyz+local
Data
[get_local_id
(
0
)
].
force.
xyz+local
Data
[get_local_id
(
0
)
+TILE_SIZE].
force.
xyz
;
}
lasty
=
y
;
}
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
cd6cb66a
#
define
TILE_SIZE
32
typedef
struct
{
float4
posq
;
float4
force
;
ATOM_PARAMETER_DATA
}
AtomData
;
/**
*
Compute
nonbonded
interactions.
*/
__kernel
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__local
flo
at
4
*
local
_posq,
__local
float4*
local_force
,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
__global
unsigned
int*
exclusionIndices,
__local
AtomD
at
a
*
local
Data
,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount
#
else
...
...
@@ -37,7 +43,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
local
_posq
[get_local_id
(
0
)
]
=
posq1
;
local
Data
[get_local_id
(
0
)
]
.posq
=
posq1
;
LOAD_LOCAL_PARAMETERS_FROM_1
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
tile
=
xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*
(
xi+1
)
/2
;
...
...
@@ -49,7 +55,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
tbx+j
;
float4
posq2
=
local
_posq
[atom2]
;
float4
posq2
=
local
Data
[atom2]
.posq
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -83,13 +89,13 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y
+
tgx
;
local
_posq
[get_local_id
(
0
)
]
=
posq[j]
;
local
Data
[get_local_id
(
0
)
]
.posq
=
posq[j]
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
local
_force
[get_local_id
(
0
)
]
=
0.0f
;
local
Data
[get_local_id
(
0
)
]
.force
=
0.0f
;
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
)
{
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
&&
flags
==
0
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
}
...
...
@@ -100,7 +106,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
((
flags&
(
1<<j
))
!=
0
)
{
bool
isExcluded
=
false
;
int
atom2
=
tbx+j
;
float4
posq2
=
local
_posq
[atom2]
;
float4
posq2
=
local
Data
[atom2]
.posq
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -131,7 +137,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
if
(
tgx
%
16
==
0
)
tempBuffer[get_local_id
(
0
)
].xyz
+=
tempBuffer[get_local_id
(
0
)
+8].xyz
;
if
(
tgx
==
0
)
local
_force
[tbx+j].xyz
+=
tempBuffer[get_local_id
(
0
)
].xyz
+
tempBuffer[get_local_id
(
0
)
+16].xyz
;
local
Data
[tbx+j].
force.
xyz
+=
tempBuffer[get_local_id
(
0
)
].xyz
+
tempBuffer[get_local_id
(
0
)
+16].xyz
;
}
}
}
...
...
@@ -154,7 +160,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
tbx+tj
;
float4
posq2
=
local
_posq
[atom2]
;
float4
posq2
=
local
Data
[atom2]
.posq
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -172,7 +178,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
local
_force
[tbx+tj].xyz
+=
delta.xyz
;
local
Data
[tbx+tj].
force.
xyz
+=
delta.xyz
;
excl
>>=
1
;
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
)
;
}
...
...
@@ -187,7 +193,7 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
unsigned
int
offset2
=
y
+
tgx
+
warp*PADDED_NUM_ATOMS
;
#
endif
forceBuffers[offset1].xyz
+=
force.xyz
;
forceBuffers[offset2].xyz
+=
local
_force
[get_local_id
(
0
)
].xyz
;
forceBuffers[offset2].xyz
+=
local
Data
[get_local_id
(
0
)
].
force.
xyz
;
lasty
=
y
;
}
pos++
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment