Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
702412b1
"docs/en_US/QuickStart.md" did not exist on "a656bba5161b32080d2dc71c2ff331f34e183485"
Commit
702412b1
authored
Feb 28, 2022
by
Shucai Xiao
Browse files
refine contiguous gpu implementation
parent
562724bf
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
5 deletions
+26
-5
src/targets/gpu/device/contiguous.cpp
src/targets/gpu/device/contiguous.cpp
+26
-5
No files found.
src/targets/gpu/device/contiguous.cpp
View file @
702412b1
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/contiguous.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <hip/hip_fp16.h>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -21,11 +22,31 @@ void contiguous_nonstandard(hipStream_t stream, const argument& result, const ar
...
@@ -21,11 +22,31 @@ void contiguous_nonstandard(hipStream_t stream, const argument& result, const ar
void
contiguous_packed
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
void
contiguous_packed
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
{
index_int
nelements
=
result
.
get_shape
().
elements
();
index_int
nelements
=
result
.
get_shape
().
elements
();
visit_all
(
result
,
arg
)([
&
](
auto
output_v
,
auto
input_v
)
{
auto
type
=
result
.
get_shape
().
type
();
const
auto
*
input
=
device_cast
(
input_v
.
data
());
if
(
type
==
shape
::
half_type
)
auto
*
output
=
device_cast
(
output_v
.
data
());
{
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
__device__
{
output
[
i
]
=
input
[
i
];
});
visit_all
(
result
,
arg
)([
&
](
auto
output_v
,
auto
input_v
)
{
});
const
auto
*
input
=
device_cast
(
input_v
.
data
());
auto
*
output
=
device_cast
(
output_v
.
data
());
const
__half2
*
input2
=
reinterpret_cast
<
__half2
*>
(
input_v
.
data
());
__half2
*
output2
=
reinterpret_cast
<
__half2
*>
(
output_v
.
data
());
gs_launch
(
stream
,
nelements
/
2
)([
=
](
auto
i
)
__device__
{
output2
[
i
]
=
input2
[
i
];
if
(
i
==
0
and
(
nelements
%
2
)
==
1
)
{
output
[
nelements
-
1
]
=
input
[
nelements
-
1
];
}
});
});
}
else
{
visit_all
(
result
,
arg
)([
&
](
auto
output_v
,
auto
input_v
)
{
const
auto
*
input
=
device_cast
(
input_v
.
data
());
auto
*
output
=
device_cast
(
output_v
.
data
());
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
__device__
{
output
[
i
]
=
input
[
i
];
});
});
}
}
}
void
contiguous
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
void
contiguous
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment