Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
b426b99a
Commit
b426b99a
authored
Feb 05, 2025
by
Jiming Ruan
Browse files
remove unnecessary change
parent
485e530b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
4 deletions
+6
-4
include/ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_two_pass.hpp
...ps/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_two_pass.hpp
+6
-4
No files found.
include/ck_tile/ops/rmsnorm2d/pipeline/rmsnorm2d_fwd_pipeline_two_pass.hpp
View file @
b426b99a
...
@@ -125,7 +125,7 @@ struct Rmsnorm2dFwdPipelineTwoPass
...
@@ -125,7 +125,7 @@ struct Rmsnorm2dFwdPipelineTwoPass
// compute inv-rms
// compute inv-rms
auto
inv_rms
=
tile_elementwise_in
(
auto
inv_rms
=
tile_elementwise_in
(
[
&
](
const
auto
&
v_
)
{
[
&
](
const
auto
&
v_
)
{
return
r
sqrt
f
(
v_
/
row_size
+
epsilon
);
return
type_convert
<
ComputeDataType
>
(
1.0
f
)
/
(
sqrt
(
v_
/
row_size
+
epsilon
)
)
;
},
},
square_sum
);
square_sum
);
...
@@ -151,7 +151,8 @@ struct Rmsnorm2dFwdPipelineTwoPass
...
@@ -151,7 +151,8 @@ struct Rmsnorm2dFwdPipelineTwoPass
// rmsnorm computation
// rmsnorm computation
for
(
int
iN
=
__builtin_amdgcn_readfirstlane
(
0
);
iN
<
num_n_tile_iteration
;
++
iN
)
for
(
int
iN
=
__builtin_amdgcn_readfirstlane
(
0
);
iN
<
num_n_tile_iteration
;
++
iN
)
{
{
auto
acc
=
make_static_distributed_tensor
<
ComputeDataType
>
(
decltype
(
load_tile
(
x_window
))
::
get_tile_distribution
());
auto
acc
=
make_static_distributed_tensor
<
ComputeDataType
>
(
decltype
(
load_tile
(
x_window
))
::
get_tile_distribution
());
if
constexpr
(
kFusedAdd
==
Rmsnorm2dFusedAddEnum
::
PRE_ADD_STORE
)
if
constexpr
(
kFusedAdd
==
Rmsnorm2dFusedAddEnum
::
PRE_ADD_STORE
)
{
{
...
@@ -178,7 +179,8 @@ struct Rmsnorm2dFwdPipelineTwoPass
...
@@ -178,7 +179,8 @@ struct Rmsnorm2dFwdPipelineTwoPass
const
auto
gamma
=
load_tile
(
gamma_window
);
const
auto
gamma
=
load_tile
(
gamma_window
);
// rmsnorm computation
// rmsnorm computation
auto
rmsn
=
make_static_distributed_tensor
<
ComputeDataType
>
(
decltype
(
load_tile
(
x_window
))
::
get_tile_distribution
());
auto
rmsn
=
make_static_distributed_tensor
<
ComputeDataType
>
(
decltype
(
load_tile
(
x_window
))
::
get_tile_distribution
());
sweep_tile
(
rmsn
,
[
&
,
inv_rms_
=
inv_rms
](
auto
idx
)
{
sweep_tile
(
rmsn
,
[
&
,
inv_rms_
=
inv_rms
](
auto
idx
)
{
constexpr
auto
i_idx
=
make_tuple
(
idx
[
number
<
0
>
{}]);
constexpr
auto
i_idx
=
make_tuple
(
idx
[
number
<
0
>
{}]);
constexpr
auto
j_idx
=
make_tuple
(
idx
[
number
<
1
>
{}]);
constexpr
auto
j_idx
=
make_tuple
(
idx
[
number
<
1
>
{}]);
...
@@ -192,7 +194,7 @@ struct Rmsnorm2dFwdPipelineTwoPass
...
@@ -192,7 +194,7 @@ struct Rmsnorm2dFwdPipelineTwoPass
static_assert
(
kFusedQuant
==
Rmsnorm2dFusedQuantEnum
::
NO_SWEEP
);
static_assert
(
kFusedQuant
==
Rmsnorm2dFusedQuantEnum
::
NO_SWEEP
);
Epilogue
{}(
y_window
,
rmsn
);
Epilogue
{}(
y_window
,
rmsn
);
move_tile_window
(
gamma_window
,
{
-
Block_N
});
move_tile_window
(
gamma_window
,
{
-
Block_N
});
move_tile_window
(
y_window
,
{
0
,
-
Block_N
});
move_tile_window
(
y_window
,
{
0
,
-
Block_N
});
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment