Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
3b04798c
"vscode:/vscode.git/clone" did not exist on "4cfb259f02135f6e5b5a8f965833f6a5ab7672d9"
Commit
3b04798c
authored
Aug 28, 2018
by
Paul
Browse files
Formatting
parent
fbcb4570
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
19 deletions
+20
-19
src/include/migraph/generate.hpp
src/include/migraph/generate.hpp
+3
-3
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
+16
-15
test/gpu/miopen.cpp
test/gpu/miopen.cpp
+1
-1
No files found.
src/include/migraph/generate.hpp
View file @
3b04798c
...
...
@@ -12,9 +12,9 @@ constexpr T normalize(unsigned long z)
{
if
(
z
==
0
)
return
0
;
const
auto
max
=
32768
;
const
double
range
=
max
/
2
;
double
result
=
(
z
%
max
)
/
range
;
const
auto
max
=
32768
;
const
double
range
=
max
/
2
;
double
result
=
(
z
%
max
)
/
range
;
result
-=
1
;
return
result
;
}
...
...
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
View file @
3b04798c
...
...
@@ -27,7 +27,7 @@ template <class T>
vec4
<
T
>
vec4_load
(
T
*
x
,
size_t
i
)
{
vec4
<
T
>
result
;
auto
n
=
i
*
4
;
auto
n
=
i
*
4
;
result
[
0
]
=
x
[
n
+
0
];
result
[
1
]
=
x
[
n
+
1
];
result
[
2
]
=
x
[
n
+
2
];
...
...
@@ -85,9 +85,9 @@ inline auto binary_broadcast(argument result, argument arg1, argument arg2)
auto
*
yp
=
input2
.
data
();
auto
*
outp
=
output
.
data
();
const
std
::
size_t
nlocal
=
1024
;
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
n
=
output
.
size
();
const
std
::
size_t
nlocal
=
1024
;
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
n
=
output
.
size
();
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
__shared__
type
buffer
[
2048
];
...
...
@@ -100,10 +100,10 @@ inline auto binary_broadcast(argument result, argument arg1, argument arg2)
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
{
auto
bidx
=
i
%
bdim_len
;
auto
b
=
buffer
[
bidx
];
type
x
=
xp
[
i
];
outp
[
i
]
=
f
(
x
,
b
);
auto
bidx
=
i
%
bdim_len
;
auto
b
=
buffer
[
bidx
];
type
x
=
xp
[
i
];
outp
[
i
]
=
f
(
x
,
b
);
}
});
#else
...
...
@@ -131,7 +131,7 @@ inline auto binary_broadcast(argument result, argument arg1, argument arg2)
{
buffer
[
bdim_vec_len
][
i
]
=
yp
[
bdim_vec_len
][
i
];
}
for
(
size_t
i
=
idx
.
local
;
i
<
(
vec_size
-
bdim_vec_rem
);
i
+=
nlocal
)
for
(
size_t
i
=
idx
.
local
;
i
<
(
vec_size
-
bdim_vec_rem
);
i
+=
nlocal
)
{
buffer
[
bdim_vec_len
][
i
]
=
yp
[
0
][
i
];
}
...
...
@@ -224,15 +224,16 @@ inline auto nary(argument result, argument arg1, argument arg2)
arg2
.
get_shape
().
strides
().
end
(),
[](
auto
x
)
{
return
x
!=
0
;
})
==
1
)
{
auto
not_zero
=
[](
auto
x
)
{
return
x
!=
0
;
};
auto
not_zero
=
[](
auto
x
)
{
return
x
!=
0
;
};
const
auto
&
strides
=
arg2
.
get_shape
().
strides
();
auto
stride_it
=
std
::
find_if
(
strides
.
begin
(),
strides
.
end
(),
not_zero
);
auto
stride_idx
=
std
::
distance
(
strides
.
begin
(),
stride_it
);
auto
stride_len
=
arg2
.
get_shape
().
lens
()[
stride_idx
];
auto
stride_it
=
std
::
find_if
(
strides
.
begin
(),
strides
.
end
(),
not_zero
);
auto
stride_idx
=
std
::
distance
(
strides
.
begin
(),
stride_it
);
auto
stride_len
=
arg2
.
get_shape
().
lens
()[
stride_idx
];
// TODO: Dont require disibility by 4
bool
divisible_by_4
=
(
stride_len
%
4
==
0
)
and
(
arg1
.
get_shape
().
elements
()
%
4
==
0
);
if
(
divisible_by_4
and
stride_len
<=
2048
and
std
::
none_of
(
std
::
next
(
stride_it
),
strides
.
end
(),
not_zero
))
{
if
(
divisible_by_4
and
stride_len
<=
2048
and
std
::
none_of
(
std
::
next
(
stride_it
),
strides
.
end
(),
not_zero
))
{
binary_broadcast
(
result
,
arg1
,
arg2
)(
f
);
return
;
}
...
...
test/gpu/miopen.cpp
View file @
3b04798c
...
...
@@ -77,7 +77,7 @@ struct auto_print
};
std
::
array
<
std
::
function
<
void
()
>
,
2
>
auto_print
::
handlers
=
{};
template
<
class
T
>
template
<
class
T
>
auto
get_hash
(
const
T
&
x
)
{
return
std
::
hash
<
T
>
{}(
x
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment