Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
27f8c64b
Commit
27f8c64b
authored
May 04, 2023
by
rocking
Browse files
Add comment
parent
e8ded1e7
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
5 deletions
+13
-5
include/ck/tensor_operation/gpu/device/impl/device_normalization_splitk_impl.hpp
...tion/gpu/device/impl/device_normalization_splitk_impl.hpp
+12
-5
include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp
.../grid/normalization/gridwise_normalization_splitk_1st.hpp
+1
-0
No files found.
include/ck/tensor_operation/gpu/device/impl/device_normalization_splitk_impl.hpp
View file @
27f8c64b
...
@@ -330,7 +330,12 @@ struct DeviceNormalizationSplitKImpl : public DeviceNormalization<XDataType,
...
@@ -330,7 +330,12 @@ struct DeviceNormalizationSplitKImpl : public DeviceNormalization<XDataType,
kGridSize_
=
math
::
integer_divide_ceil
(
KRaw_
,
K_BlockTileSize
*
numBlockTileIteration_
);
kGridSize_
=
math
::
integer_divide_ceil
(
KRaw_
,
K_BlockTileSize
*
numBlockTileIteration_
);
gridSize_
=
math
::
integer_divide_ceil
(
MRaw_
,
M_BlockTileSize
)
*
kGridSize_
;
gridSize_
=
math
::
integer_divide_ceil
(
MRaw_
,
M_BlockTileSize
)
*
kGridSize_
;
numMeanVarCountIteration_
=
math
::
integer_divide_ceil
(
kGridSize_
,
KThreadClusterSize
);
// We do not use vector load for mean, var and count
static
constexpr
index_t
K_MeanVarCountBlockTileSize
=
KThreadClusterSize
;
numMeanVarCountIteration_
=
math
::
integer_divide_ceil
(
kGridSize_
,
K_MeanVarCountBlockTileSize
);
x_grid_desc_m_k_
=
x_grid_desc_m_k_
=
MakeSrc2dDescriptor
(
Lengths_
,
xStrides_
,
kGridSize_
,
numBlockTileIteration_
);
MakeSrc2dDescriptor
(
Lengths_
,
xStrides_
,
kGridSize_
,
numBlockTileIteration_
);
...
@@ -347,12 +352,14 @@ struct DeviceNormalizationSplitKImpl : public DeviceNormalization<XDataType,
...
@@ -347,12 +352,14 @@ struct DeviceNormalizationSplitKImpl : public DeviceNormalization<XDataType,
kGridSize_
);
kGridSize_
);
kernel2_mean_var_grid_desc_m_kblock_
=
kernel2_mean_var_grid_desc_m_kblock_
=
MakeMeanVarDescriptor_M_K
<
Sequence
<
true
,
true
>
,
M_BlockTileSize
,
K_BlockTileSize
>
(
MakeMeanVarDescriptor_M_K
<
Sequence
<
true
,
true
>
,
MRaw_
,
kGridSize_
);
M_BlockTileSize
,
K_MeanVarCountBlockTileSize
>
(
MRaw_
,
kGridSize_
);
kernel2_count_grid_desc_m_kblock_
=
kernel2_count_grid_desc_m_kblock_
=
MakeCountDescriptor_M_K
<
Sequence
<
true
,
true
>
,
M_BlockTileSize
,
K_BlockTileSize
>
(
MakeCountDescriptor_M_K
<
Sequence
<
true
,
true
>
,
MRaw_
,
kGridSize_
);
M_BlockTileSize
,
K_MeanVarCountBlockTileSize
>
(
MRaw_
,
kGridSize_
);
}
}
ComputeDataType
epsilon_
;
ComputeDataType
epsilon_
;
...
...
include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp
View file @
27f8c64b
...
@@ -224,6 +224,7 @@ struct GridwiseNormalizationSplitK1st
...
@@ -224,6 +224,7 @@ struct GridwiseNormalizationSplitK1st
int
count
=
threadwise_welford
.
cur_count_
;
int
count
=
threadwise_welford
.
cur_count_
;
BlockwiseWelford
::
Run
(
mean_thread_buf
(
I
),
var_thread_buf
(
I
),
count
);
BlockwiseWelford
::
Run
(
mean_thread_buf
(
I
),
var_thread_buf
(
I
),
count
);
// The value of count is same for all I
if
constexpr
(
I
==
MThreadSliceSize
-
1
)
if
constexpr
(
I
==
MThreadSliceSize
-
1
)
welford_count
=
count
;
welford_count
=
count
;
});
});
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment