Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
cc50b687
Commit
cc50b687
authored
Jun 20, 2022
by
Anthony Chang
Browse files
format
parent
ab8e0f28
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
5 deletions
+9
-5
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp
...tion/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp
+5
-4
include/ck/utility/reduction_operator.hpp
include/ck/utility/reduction_operator.hpp
+4
-1
No files found.
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp
View file @
cc50b687
...
@@ -932,8 +932,10 @@ struct GridwiseGemmLayernorm_k0mk1_k0nk1_mn_xdl_cshuffle_v1
...
@@ -932,8 +932,10 @@ struct GridwiseGemmLayernorm_k0mk1_k0nk1_mn_xdl_cshuffle_v1
reduce
::
SquaredAdd
,
reduce
::
SquaredAdd
,
false
>
;
false
>
;
const
auto
d0_zeroVal
=
ThreadwiseReduceD0
::
Op
::
template
GetIdentityValue
<
FloatReduceAcc
>();
const
auto
d0_zeroVal
=
const
auto
d1_zeroVal
=
ThreadwiseReduceD1
::
Op
::
template
GetIdentityValue
<
FloatReduceAcc
>();
ThreadwiseReduceD0
::
Op
::
template
GetIdentityValue
<
FloatReduceAcc
>();
const
auto
d1_zeroVal
=
ThreadwiseReduceD1
::
Op
::
template
GetIdentityValue
<
FloatReduceAcc
>();
static_for
<
0
,
mreduce_per_thread
,
1
>
{}(
static_for
<
0
,
mreduce_per_thread
,
1
>
{}(
[
&
](
auto
i
)
{
d0_thread_buf
(
i
)
=
d0_zeroVal
;
});
[
&
](
auto
i
)
{
d0_thread_buf
(
i
)
=
d0_zeroVal
;
});
static_for
<
0
,
mreduce_per_thread
,
1
>
{}(
static_for
<
0
,
mreduce_per_thread
,
1
>
{}(
...
@@ -984,8 +986,7 @@ struct GridwiseGemmLayernorm_k0mk1_k0nk1_mn_xdl_cshuffle_v1
...
@@ -984,8 +986,7 @@ struct GridwiseGemmLayernorm_k0mk1_k0nk1_mn_xdl_cshuffle_v1
FloatReduceAcc
numerator
=
c_reduce_thread_buf
(
dst_offset
)
-
avg_sum
;
FloatReduceAcc
numerator
=
c_reduce_thread_buf
(
dst_offset
)
-
avg_sum
;
FloatReduceAcc
divisor
=
epsilon
+
avg_squared_sum
-
avg_sum
*
avg_sum
;
FloatReduceAcc
divisor
=
epsilon
+
avg_squared_sum
-
avg_sum
*
avg_sum
;
FloatReduceAcc
divisor_sqrt
;
FloatReduceAcc
divisor_sqrt
;
tensor_operation
::
element_wise
::
UnarySqrt
{}(
tensor_operation
::
element_wise
::
UnarySqrt
{}(
divisor_sqrt
,
divisor
);
divisor_sqrt
,
divisor
);
c_reduce_thread_buf
(
dst_offset
)
=
numerator
/
divisor_sqrt
;
c_reduce_thread_buf
(
dst_offset
)
=
numerator
/
divisor_sqrt
;
});
});
...
...
include/ck/utility/reduction_operator.hpp
View file @
cc50b687
...
@@ -84,7 +84,10 @@ struct Add
...
@@ -84,7 +84,10 @@ struct Add
struct
SquaredAdd
struct
SquaredAdd
{
{
template
<
class
T
>
template
<
class
T
>
__host__
__device__
static
constexpr
T
GetIdentityValue
()
{
return
type_convert
<
T
>
(
0.0
f
);
};
__host__
__device__
static
constexpr
T
GetIdentityValue
()
{
return
type_convert
<
T
>
(
0.0
f
);
};
__host__
__device__
static
constexpr
bool
__host__
__device__
static
constexpr
bool
IsCompatibleInMemoryDataOperation
(
InMemoryDataOperationEnum
operation
)
IsCompatibleInMemoryDataOperation
(
InMemoryDataOperationEnum
operation
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment