Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
6bdb0fcb
Commit
6bdb0fcb
authored
Nov 04, 2024
by
dummycoderfe
Browse files
use fast rcpf
parent
9a681c7d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
4 additions
and
4 deletions
+4
-4
include/ck_tile/ops/layernorm2d/pipeline/layernorm2d_fwd_pipeline_one_pass.hpp
...ayernorm2d/pipeline/layernorm2d_fwd_pipeline_one_pass.hpp
+1
-1
include/ck_tile/ops/welford/block/block_welford.hpp
include/ck_tile/ops/welford/block/block_welford.hpp
+1
-1
include/ck_tile/ops/welford/thread/thread_welford.hpp
include/ck_tile/ops/welford/thread/thread_welford.hpp
+2
-2
No files found.
include/ck_tile/ops/layernorm2d/pipeline/layernorm2d_fwd_pipeline_one_pass.hpp
View file @
6bdb0fcb
...
@@ -90,7 +90,7 @@ struct Layernorm2dFwdPipelineOnePass
...
@@ -90,7 +90,7 @@ struct Layernorm2dFwdPipelineOnePass
// compute inv-std
// compute inv-std
auto
inv_std
=
tile_elementwise_in
(
auto
inv_std
=
tile_elementwise_in
(
[
&
](
const
auto
&
v_
)
{
[
&
](
const
auto
&
v_
)
{
return
type_convert
<
ComputeDataType
>
(
1.0
f
)
/
(
sqrt
(
v_
+
epsilon
));
return
type_convert
<
ComputeDataType
>
(
1.0
f
)
*
__builtin_amdgcn_rcpf
(
sqrt
(
v_
+
epsilon
));
},
},
var
);
var
);
...
...
include/ck_tile/ops/welford/block/block_welford.hpp
View file @
6bdb0fcb
...
@@ -356,7 +356,7 @@ CK_TILE_DEVICE constexpr void block_tile_welford_post_scale_var(VarDistributedTe
...
@@ -356,7 +356,7 @@ CK_TILE_DEVICE constexpr void block_tile_welford_post_scale_var(VarDistributedTe
int
count
)
int
count
)
{
{
using
DataType
=
typename
VarDistributedTensor_
::
DataType
;
using
DataType
=
typename
VarDistributedTensor_
::
DataType
;
tile_elementwise_inout
([
&
count
](
auto
&
x
)
{
x
=
x
/
type_convert
<
DataType
>
(
count
);
},
tile_elementwise_inout
([
&
count
](
auto
&
x
)
{
x
=
x
*
__builtin_amdgcn_rcpf
(
type_convert
<
DataType
>
(
count
)
)
;
},
var_tensor
);
var_tensor
);
}
}
}
// namespace ck_tile
}
// namespace ck_tile
include/ck_tile/ops/welford/thread/thread_welford.hpp
View file @
6bdb0fcb
...
@@ -12,7 +12,7 @@ CK_TILE_DEVICE void welford_update(T& mean, T& var, T x, int count)
...
@@ -12,7 +12,7 @@ CK_TILE_DEVICE void welford_update(T& mean, T& var, T x, int count)
{
{
// TODO: check nan? maybe no
// TODO: check nan? maybe no
T
delta
=
x
-
mean
;
T
delta
=
x
-
mean
;
mean
+=
delta
/
count
;
mean
+=
delta
*
__builtin_amdgcn_rcpf
(
count
)
;
T
delta2
=
x
-
mean
;
T
delta2
=
x
-
mean
;
var
+=
delta
*
delta2
;
var
+=
delta
*
delta2
;
}
}
...
@@ -25,7 +25,7 @@ welford_merge(T& mean_a, T& var_a, int& count_a, T mean_b, T var_b, int count_b)
...
@@ -25,7 +25,7 @@ welford_merge(T& mean_a, T& var_a, int& count_a, T mean_b, T var_b, int count_b)
T
count_
=
type_convert
<
T
>
(
count
);
T
count_
=
type_convert
<
T
>
(
count
);
T
count_a_
=
type_convert
<
T
>
(
count_a
);
T
count_a_
=
type_convert
<
T
>
(
count_a
);
T
count_b_
=
type_convert
<
T
>
(
count_b
);
T
count_b_
=
type_convert
<
T
>
(
count_b
);
T
count_b_over_count
=
count
==
0
?
type_convert
<
T
>
(
0
)
:
count_b_
/
count_
;
T
count_b_over_count
=
count
==
0
?
type_convert
<
T
>
(
0
)
:
count_b_
*
__builtin_amdgcn_rcpf
(
count_
)
;
T
delta
=
mean_b
-
mean_a
;
T
delta
=
mean_b
-
mean_a
;
mean_a
+=
delta
*
count_b_over_count
;
mean_a
+=
delta
*
count_b_over_count
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment