Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b9b2b7a3
Commit
b9b2b7a3
authored
Sep 11, 2015
by
peastman
Browse files
Merge pull request #1132 from peastman/tileorder
Minor optimization on AMD GPUs
parents
8a76aa72
c400a83b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
1 deletion
+17
-1
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+17
-1
No files found.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
b9b2b7a3
...
@@ -180,6 +180,22 @@ void OpenCLNonbondedUtilities::requestExclusions(const vector<vector<int> >& exc
...
@@ -180,6 +180,22 @@ void OpenCLNonbondedUtilities::requestExclusions(const vector<vector<int> >& exc
}
}
static
bool
compareUshort2
(
mm_ushort2
a
,
mm_ushort2
b
)
{
static
bool
compareUshort2
(
mm_ushort2
a
,
mm_ushort2
b
)
{
// This version is used on devices with SIMD width of 32 or less. It sorts tiles to improve cache efficiency.
return
((
a
.
y
<
b
.
y
)
||
(
a
.
y
==
b
.
y
&&
a
.
x
<
b
.
x
));
}
static
bool
compareUshort2LargeSIMD
(
mm_ushort2
a
,
mm_ushort2
b
)
{
// This version is used on devices with SIMD width greater than 32. It puts diagonal tiles before off-diagonal
// ones to reduce thread divergence.
if
(
a
.
x
==
a
.
y
)
{
if
(
b
.
x
==
b
.
y
)
return
(
a
.
x
<
b
.
x
);
return
true
;
}
if
(
b
.
x
==
b
.
y
)
return
false
;
return
((
a
.
y
<
b
.
y
)
||
(
a
.
y
==
b
.
y
&&
a
.
x
<
b
.
x
));
return
((
a
.
y
<
b
.
y
)
||
(
a
.
y
==
b
.
y
&&
a
.
x
<
b
.
x
));
}
}
...
@@ -212,7 +228,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -212,7 +228,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
vector
<
mm_ushort2
>
exclusionTilesVec
;
vector
<
mm_ushort2
>
exclusionTilesVec
;
for
(
set
<
pair
<
int
,
int
>
>::
const_iterator
iter
=
tilesWithExclusions
.
begin
();
iter
!=
tilesWithExclusions
.
end
();
++
iter
)
for
(
set
<
pair
<
int
,
int
>
>::
const_iterator
iter
=
tilesWithExclusions
.
begin
();
iter
!=
tilesWithExclusions
.
end
();
++
iter
)
exclusionTilesVec
.
push_back
(
mm_ushort2
((
unsigned
short
)
iter
->
first
,
(
unsigned
short
)
iter
->
second
));
exclusionTilesVec
.
push_back
(
mm_ushort2
((
unsigned
short
)
iter
->
first
,
(
unsigned
short
)
iter
->
second
));
sort
(
exclusionTilesVec
.
begin
(),
exclusionTilesVec
.
end
(),
co
mpareUshort2
);
sort
(
exclusionTilesVec
.
begin
(),
exclusionTilesVec
.
end
(),
co
ntext
.
getSIMDWidth
()
<=
32
?
compareUshort2
:
compareUshort2LargeSIMD
);
exclusionTiles
=
OpenCLArray
::
create
<
mm_ushort2
>
(
context
,
exclusionTilesVec
.
size
(),
"exclusionTiles"
);
exclusionTiles
=
OpenCLArray
::
create
<
mm_ushort2
>
(
context
,
exclusionTilesVec
.
size
(),
"exclusionTiles"
);
exclusionTiles
->
upload
(
exclusionTilesVec
);
exclusionTiles
->
upload
(
exclusionTilesVec
);
map
<
pair
<
int
,
int
>
,
int
>
exclusionTileMap
;
map
<
pair
<
int
,
int
>
,
int
>
exclusionTileMap
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment