Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
1692db3c
Commit
1692db3c
authored
Aug 27, 2018
by
Paul
Browse files
Vectorized loads
parent
33fe5e12
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
23 additions
and
8 deletions
+23
-8
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
+1
-1
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
+22
-7
No files found.
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
View file @
1692db3c
...
@@ -33,7 +33,7 @@ inline auto launch(std::size_t global, std::size_t local)
...
@@ -33,7 +33,7 @@ inline auto launch(std::size_t global, std::size_t local)
};
};
}
}
inline
auto
gs_launch
(
std
::
size_t
n
,
std
::
size_t
local
=
256
)
inline
auto
gs_launch
(
std
::
size_t
n
,
std
::
size_t
local
=
1024
)
{
{
std
::
size_t
groups
=
1
+
n
/
local
;
std
::
size_t
groups
=
1
+
n
/
local
;
std
::
size_t
nglobal
=
std
::
min
<
std
::
size_t
>
(
256
,
groups
)
*
local
;
std
::
size_t
nglobal
=
std
::
min
<
std
::
size_t
>
(
256
,
groups
)
*
local
;
...
...
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
View file @
1692db3c
...
@@ -10,6 +10,15 @@ namespace migraph {
...
@@ -10,6 +10,15 @@ namespace migraph {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
template
<
class
T
>
using
vec4
=
T
__attribute__
((
ext_vector_type
(
4
)));
template
<
class
T
>
vec4
<
T
>*
as_vec4
(
T
*
x
)
{
return
reinterpret_cast
<
vec4
<
T
>*>
(
x
);
}
template
<
class
F
,
class
...
Arguments
>
template
<
class
F
,
class
...
Arguments
>
auto
nary_nonstandard_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
auto
nary_nonstandard_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
{
{
...
@@ -49,13 +58,13 @@ inline auto binary_broadcast(argument result, argument arg1, argument arg2)
...
@@ -49,13 +58,13 @@ inline auto binary_broadcast(argument result, argument arg1, argument arg2)
visit_all
(
result
,
arg1
,
arg2
)([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
visit_all
(
result
,
arg1
,
arg2
)([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
using
type
=
std
::
remove_cv_t
<
typename
decltype
(
output
)
::
value_type
>
;
using
type
=
std
::
remove_cv_t
<
typename
decltype
(
output
)
::
value_type
>
;
auto
*
xp
=
input1
.
data
();
auto
*
xp
=
as_vec4
(
input1
.
data
()
)
;
auto
*
yp
=
input2
.
data
();
auto
*
yp
=
input2
.
data
();
auto
*
outp
=
output
.
data
();
auto
*
outp
=
as_vec4
(
output
.
data
()
)
;
const
std
::
size_t
nlocal
=
256
;
const
std
::
size_t
nlocal
=
1024
;
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
n
=
output
.
size
();
const
std
::
size_t
n
=
output
.
size
()
/
4
;
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
__shared__
type
buffer
[
2048
];
__shared__
type
buffer
[
2048
];
...
@@ -66,9 +75,15 @@ inline auto binary_broadcast(argument result, argument arg1, argument arg2)
...
@@ -66,9 +75,15 @@ inline auto binary_broadcast(argument result, argument arg1, argument arg2)
__syncthreads
();
__syncthreads
();
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
{
{
auto
bidx
=
i
%
bdim_len
;
vec4
<
type
>
x
=
xp
[
i
];
auto
b
=
buffer
[
bidx
];
vec4
<
type
>
out
=
outp
[
i
];
outp
[
i
]
=
f
(
xp
[
i
],
b
);
for
(
std
::
size_t
j
=
0
;
j
<
4
;
j
++
)
{
auto
gidx
=
i
*
4
+
j
;
auto
bidx
=
gidx
%
bdim_len
;
auto
b
=
buffer
[
bidx
];
out
[
j
]
=
f
(
x
[
j
],
b
);
}
}
}
});
});
});
});
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment