Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c400a83b
Commit
c400a83b
authored
Sep 11, 2015
by
peastman
Browse files
Minor optimization on AMD GPUs
parent
12e8a362
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
1 deletion
+17
-1
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+17
-1
No files found.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
c400a83b
...
@@ -180,6 +180,22 @@ void OpenCLNonbondedUtilities::requestExclusions(const vector<vector<int> >& exc
...
@@ -180,6 +180,22 @@ void OpenCLNonbondedUtilities::requestExclusions(const vector<vector<int> >& exc
}
}
static
bool
compareUshort2
(
mm_ushort2
a
,
mm_ushort2
b
)
{
static
bool
compareUshort2
(
mm_ushort2
a
,
mm_ushort2
b
)
{
// This version is used on devices with SIMD width of 32 or less. It sorts tiles to improve cache efficiency.
return
((
a
.
y
<
b
.
y
)
||
(
a
.
y
==
b
.
y
&&
a
.
x
<
b
.
x
));
}
static
bool
compareUshort2LargeSIMD
(
mm_ushort2
a
,
mm_ushort2
b
)
{
// This version is used on devices with SIMD width greater than 32. It puts diagonal tiles before off-diagonal
// ones to reduce thread divergence.
if
(
a
.
x
==
a
.
y
)
{
if
(
b
.
x
==
b
.
y
)
return
(
a
.
x
<
b
.
x
);
return
true
;
}
if
(
b
.
x
==
b
.
y
)
return
false
;
return
((
a
.
y
<
b
.
y
)
||
(
a
.
y
==
b
.
y
&&
a
.
x
<
b
.
x
));
return
((
a
.
y
<
b
.
y
)
||
(
a
.
y
==
b
.
y
&&
a
.
x
<
b
.
x
));
}
}
...
@@ -212,7 +228,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -212,7 +228,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
vector
<
mm_ushort2
>
exclusionTilesVec
;
vector
<
mm_ushort2
>
exclusionTilesVec
;
for
(
set
<
pair
<
int
,
int
>
>::
const_iterator
iter
=
tilesWithExclusions
.
begin
();
iter
!=
tilesWithExclusions
.
end
();
++
iter
)
for
(
set
<
pair
<
int
,
int
>
>::
const_iterator
iter
=
tilesWithExclusions
.
begin
();
iter
!=
tilesWithExclusions
.
end
();
++
iter
)
exclusionTilesVec
.
push_back
(
mm_ushort2
((
unsigned
short
)
iter
->
first
,
(
unsigned
short
)
iter
->
second
));
exclusionTilesVec
.
push_back
(
mm_ushort2
((
unsigned
short
)
iter
->
first
,
(
unsigned
short
)
iter
->
second
));
sort
(
exclusionTilesVec
.
begin
(),
exclusionTilesVec
.
end
(),
co
mpareUshort2
);
sort
(
exclusionTilesVec
.
begin
(),
exclusionTilesVec
.
end
(),
co
ntext
.
getSIMDWidth
()
<=
32
?
compareUshort2
:
compareUshort2LargeSIMD
);
exclusionTiles
=
OpenCLArray
::
create
<
mm_ushort2
>
(
context
,
exclusionTilesVec
.
size
(),
"exclusionTiles"
);
exclusionTiles
=
OpenCLArray
::
create
<
mm_ushort2
>
(
context
,
exclusionTilesVec
.
size
(),
"exclusionTiles"
);
exclusionTiles
->
upload
(
exclusionTilesVec
);
exclusionTiles
->
upload
(
exclusionTilesVec
);
map
<
pair
<
int
,
int
>
,
int
>
exclusionTileMap
;
map
<
pair
<
int
,
int
>
,
int
>
exclusionTileMap
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment