Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
50ba9c44
Commit
50ba9c44
authored
Sep 13, 2024
by
carlushuang
Browse files
mofisy karg
parent
d7e0f7e2
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
10 additions
and
8 deletions
+10
-8
example/ck_tile/19_elementwise/CMakeLists.txt
example/ck_tile/19_elementwise/CMakeLists.txt
+1
-1
example/ck_tile/19_elementwise/elementwise_api.cpp
example/ck_tile/19_elementwise/elementwise_api.cpp
+3
-3
example/ck_tile/19_elementwise/include/ck_tile/ops/elementwise_unary/kernel/elementwise_unary_kernel.hpp
...ops/elementwise_unary/kernel/elementwise_unary_kernel.hpp
+6
-4
No files found.
example/ck_tile/19_elementwise/CMakeLists.txt
View file @
50ba9c44
...
...
@@ -2,4 +2,4 @@
# to be included in "make all/install/check"
add_executable
(
tile_example_elementwise EXCLUDE_FROM_ALL elementwise.cpp elementwise_api.cpp
)
target_include_directories
(
tile_example_elementwise PRIVATE
${
CMAKE_CURRENT_SOURCE_DIR
}
/include
)
target_compile_options
(
tile_example_elementwise PRIVATE -v --save-temps -Wno-gnu-line-marker
)
target_compile_options
(
tile_example_elementwise PRIVATE -v --save-temps -Wno-gnu-line-marker
-mllvm --amdgpu-kernarg-preload-count=16
)
example/ck_tile/19_elementwise/elementwise_api.cpp
View file @
50ba9c44
...
...
@@ -25,7 +25,7 @@ struct Cast
constexpr dim3 blocks = kernel::BlockSize(); \
\
float ave_time = ck_tile::launch_kernel( \
s, ck_tile::make_kernel<blocks.x, 1>(kernel{}, grids, blocks, 0, kargs)); \
s, ck_tile::make_kernel<blocks.x, 1>(kernel{}, grids, blocks, 0, kargs
.p_input, kargs.p_output, kargs.num_pixels
)); \
return ave_time;
float
elementwise
(
elementwise_trait
t
,
elementwise_kargs
a
,
ck_tile
::
stream_config
s
)
...
...
@@ -35,11 +35,11 @@ float elementwise(elementwise_trait t, elementwise_kargs a, ck_tile::stream_conf
{
if
(
t
.
output_type
==
"fp32"
&&
t
.
input_type
==
"fp16"
)
{
DISPATCH_ELEMENTWISE_CAST
(
float
,
ck_tile
::
fp16_t
,
sizeof
(
ck_tile
::
fp16_t
),
8
)
DISPATCH_ELEMENTWISE_CAST
(
float
,
ck_tile
::
fp16_t
,
8
*
sizeof
(
ck_tile
::
fp16_t
),
8
)
}
else
if
(
t
.
output_type
==
"fp16"
&&
t
.
input_type
==
"fp32"
)
{
DISPATCH_ELEMENTWISE_CAST
(
ck_tile
::
fp16_t
,
float
,
sizeof
(
float
),
8
)
DISPATCH_ELEMENTWISE_CAST
(
ck_tile
::
fp16_t
,
float
,
4
*
sizeof
(
float
),
8
)
}
}
return
rtn
;
...
...
example/ck_tile/19_elementwise/include/ck_tile/ops/elementwise_unary/kernel/elementwise_unary_kernel.hpp
View file @
50ba9c44
...
...
@@ -57,15 +57,17 @@ struct ElementwiseUnaryKernel
CK_TILE_HOST_DEVICE
static
constexpr
auto
BlockSize
()
{
return
Problem
::
BlockSize
;
}
CK_TILE_DEVICE
void
operator
()(
Kargs
kargs
)
const
CK_TILE_DEVICE
void
operator
()(
const
void
*
p_input_
,
void
*
p_output_
,
uint64_t
num_pixels_
)
const
{
uint64_t
block_base
=
static_cast
<
uint64_t
>
(
blockIdx
.
x
)
*
Problem
::
BlockSize
*
Problem
::
VectorSize
;
uint64_t
pixels_rem
=
kargs
.
num_pixels
-
block_base
;
uint64_t
pixels_rem
=
num_pixels
_
-
block_base
;
const
auto
input_window
=
[
&
]()
{
const
InputType
*
p_input
=
reinterpret_cast
<
const
InputType
*>
(
kargs
.
p_input
)
+
block_base
;
reinterpret_cast
<
const
InputType
*>
(
p_input
_
)
+
block_base
;
auto
tmp
=
make_naive_tensor_view_packed
<
address_space_enum
::
global
>
(
p_input
,
...
...
@@ -78,7 +80,7 @@ struct ElementwiseUnaryKernel
auto
output_window
=
[
&
]()
{
OutputType
*
p_output
=
reinterpret_cast
<
OutputType
*>
(
kargs
.
p_output
)
+
block_base
;
reinterpret_cast
<
OutputType
*>
(
p_output
_
)
+
block_base
;
auto
tmp
=
make_naive_tensor_view_packed
<
address_space_enum
::
global
>
(
p_output
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment