Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
b16fad32
"...text-generation-inference.git" did not exist on "6e3220529df5906ae586031873b7865e9923040b"
Commit
b16fad32
authored
Jan 20, 2025
by
Jiming Ruan
Browse files
Add support to non-var in Welford alg
remove static
parent
64d5c4d6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
43 additions
and
12 deletions
+43
-12
include/ck_tile/ops/norm_reduce/thread/thread_welford.hpp
include/ck_tile/ops/norm_reduce/thread/thread_welford.hpp
+43
-12
No files found.
include/ck_tile/ops/norm_reduce/thread/thread_welford.hpp
View file @
b16fad32
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
5
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#pragma once
...
@@ -8,7 +8,7 @@
...
@@ -8,7 +8,7 @@
namespace
ck_tile
{
namespace
ck_tile
{
template
<
typename
T
,
bool
kFastFDiv
=
false
>
template
<
typename
T
,
bool
kFastFDiv
=
false
>
CK_TILE_DEVICE
void
welford_update
(
T
&
mean
,
T
&
var
,
T
x
,
int
count
,
bool_constant
<
kFastFDiv
>
=
{})
CK_TILE_DEVICE
T
welford_update
_impl
(
T
&
mean
,
T
x
,
int
count
,
bool_constant
<
kFastFDiv
>
=
{})
{
{
// TODO: check nan? maybe no
// TODO: check nan? maybe no
T
delta
=
x
-
mean
;
T
delta
=
x
-
mean
;
...
@@ -20,25 +20,33 @@ CK_TILE_DEVICE void welford_update(T& mean, T& var, T x, int count, bool_constan
...
@@ -20,25 +20,33 @@ CK_TILE_DEVICE void welford_update(T& mean, T& var, T x, int count, bool_constan
{
{
mean
+=
delta
/
count
;
mean
+=
delta
/
count
;
}
}
return
delta
;
}
template
<
typename
T
,
bool
kFastFDiv
=
false
>
CK_TILE_DEVICE
void
welford_update
(
T
&
mean
,
T
x
,
int
count
,
bool_constant
<
kFastFDiv
>
=
{})
{
welford_update_impl
(
mean
,
x
,
count
,
constant
<
kFastFDiv
>
{});
}
template
<
typename
T
,
bool
kFastFDiv
=
false
>
CK_TILE_DEVICE
void
welford_update
(
T
&
mean
,
T
&
var
,
T
x
,
int
count
,
bool_constant
<
kFastFDiv
>
=
{})
{
T
delta
=
welford_update_impl
(
mean
,
x
,
count
,
constant
<
kFastFDiv
>
{});
T
delta2
=
x
-
mean
;
T
delta2
=
x
-
mean
;
var
+=
delta
*
delta2
;
var
+=
delta
*
delta2
;
}
}
template
<
typename
T
,
bool
kFastFDiv
=
false
>
template
<
typename
T
,
bool
kFastFDiv
=
false
>
CK_TILE_DEVICE
static
void
welford_merge
(
T
&
mean_a
,
CK_TILE_DEVICE
auto
T
&
var_a
,
welford_merge_impl
(
T
&
mean_a
,
int
&
count_a
,
T
mean_b
,
int
count_b
,
bool_constant
<
kFastFDiv
>
=
{})
int
&
count_a
,
T
mean_b
,
T
var_b
,
int
count_b
,
bool_constant
<
kFastFDiv
>
=
{})
{
{
int
count
=
count_a
+
count_b
;
int
count
=
count_a
+
count_b
;
T
count_
=
type_convert
<
T
>
(
count
);
T
count_
=
type_convert
<
T
>
(
count
);
T
count_a_
=
type_convert
<
T
>
(
count_a
);
T
count_b_
=
type_convert
<
T
>
(
count_b
);
T
count_b_
=
type_convert
<
T
>
(
count_b
);
T
count_b_over_count
;
T
count_b_over_count
;
if
(
kFastFDiv
&&
std
::
is_same_v
<
T
,
float
>
)
if
constexpr
(
kFastFDiv
&&
std
::
is_same_v
<
T
,
float
>
)
{
{
count_b_over_count
=
count_b_over_count
=
count
==
0
?
type_convert
<
T
>
(
0
)
:
count_b_
*
__builtin_amdgcn_rcpf
(
count_
);
count
==
0
?
type_convert
<
T
>
(
0
)
:
count_b_
*
__builtin_amdgcn_rcpf
(
count_
);
...
@@ -50,8 +58,31 @@ CK_TILE_DEVICE static void welford_merge(T& mean_a,
...
@@ -50,8 +58,31 @@ CK_TILE_DEVICE static void welford_merge(T& mean_a,
T
delta
=
mean_b
-
mean_a
;
T
delta
=
mean_b
-
mean_a
;
mean_a
+=
delta
*
count_b_over_count
;
mean_a
+=
delta
*
count_b_over_count
;
var_a
+=
var_b
+
delta
*
delta
*
count_a_
*
count_b_over_count
;
count_a
=
count
;
count_a
=
count
;
return
make_tuple
(
delta
,
count_b_over_count
);
}
template
<
typename
T
,
bool
kFastFDiv
=
false
>
CK_TILE_DEVICE
auto
welford_merge
(
T
&
mean_a
,
int
&
count_a
,
T
mean_b
,
int
count_b
,
bool_constant
<
kFastFDiv
>
=
{})
{
welford_merge_impl
(
mean_a
,
count_a
,
mean_b
,
count_b
,
constant
<
kFastFDiv
>
{});
}
template
<
typename
T
,
bool
kFastFDiv
=
false
>
CK_TILE_DEVICE
void
welford_merge
(
T
&
mean_a
,
T
&
var_a
,
int
&
count_a
,
T
mean_b
,
T
var_b
,
int
count_b
,
bool_constant
<
kFastFDiv
>
=
{})
{
const
T
count_a_
=
type_convert
<
T
>
(
count_a
);
const
auto
[
delta
,
count_b_over_count
]
=
welford_merge_impl
(
mean_a
,
count_a
,
mean_b
,
count_b
,
constant
<
kFastFDiv
>
{});
var_a
+=
var_b
+
delta
*
delta
*
count_a_
*
count_b_over_count
;
}
}
}
// namespace ck_tile
}
// namespace ck_tile
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment