Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
316fcc3f
"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "6131a93b969f87d171148bd367fd9990d5a49b6b"
Commit
316fcc3f
authored
Apr 02, 2021
by
Jing Zhang
Browse files
rename
parent
59328f3a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
20 deletions
+20
-20
composable_kernel/include/driver/driver_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
...convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
+10
-10
composable_kernel/include/driver/driver_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw_outpad.hpp
...tion_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw_outpad.hpp
+10
-10
No files found.
composable_kernel/include/driver/driver_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
View file @
316fcc3f
...
...
@@ -136,13 +136,13 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad
}
// hack to control index calculation when iterating over a_k_m_global tensor
constexpr
auto
a_
k_m
_global_iterator_hacks
=
constexpr
auto
a_
e_k
_global_iterator_hacks
=
make_tuple
(
make_tuple
(
Sequence
<
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
>
{}),
make_tuple
(
Sequence
<
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
>
{}));
constexpr
auto
a_
k_m
_global_move_slice_window_iterator_hack
=
Sequence
<
0
,
0
,
0
>
{};
constexpr
auto
a_
e_k
_global_move_slice_window_iterator_hack
=
Sequence
<
0
,
0
,
0
>
{};
constexpr
auto
b_
k
_n_global_iterator_hacks
=
constexpr
auto
b_
e
_n_
ho_wo_
global_iterator_hacks
=
make_tuple
(
make_tuple
(
Sequence
<
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
>
{},
...
...
@@ -152,12 +152,12 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad
Sequence
<
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
>
{}));
constexpr
auto
b_
k
_n_global_move_slice_window_iterator_hack
=
constexpr
auto
b_
e
_n_
ho_wo_
global_move_slice_window_iterator_hack
=
Sequence
<
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
0
,
0
,
0
>
{};
// hack to control index calculation when iterating over c_m0_m1_n0_n1_global tensor
// hack for NKHW format
constexpr
auto
c_k_n_h_w_global_tensor_iterator_hacks
=
constexpr
auto
c_k_n_h
o
_w
o
_global_tensor_iterator_hacks
=
make_tuple
(
make_tuple
(
Sequence
<
0
,
1
,
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
,
0
,
0
>
{},
...
...
@@ -202,11 +202,11 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad
Sequence
<
0
,
2
,
3
,
1
>
,
0
,
CThreadTransferDstScalarPerVector_W
,
decltype
(
a_
k_m
_global_iterator_hacks
),
decltype
(
b_
k
_n_global_iterator_hacks
),
decltype
(
c_k_n_h_w_global_tensor_iterator_hacks
),
decltype
(
a_
k_m
_global_move_slice_window_iterator_hack
),
decltype
(
b_
k
_n_global_move_slice_window_iterator_hack
)
>
;
decltype
(
a_
e_k
_global_iterator_hacks
),
decltype
(
b_
e
_n_
ho_wo_
global_iterator_hacks
),
decltype
(
c_k_n_h
o
_w
o
_global_tensor_iterator_hacks
),
decltype
(
a_
e_k
_global_move_slice_window_iterator_hack
),
decltype
(
b_
e
_n_
ho_wo_
global_move_slice_window_iterator_hack
)
>
;
const
auto
GridSize
=
(
K
/
KPerBlock
)
*
(
Ho
/
HoPerBlock
)
*
(
Wo
/
WoPerBlock
)
*
N
;
...
...
composable_kernel/include/driver/driver_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw_outpad.hpp
View file @
316fcc3f
...
...
@@ -149,13 +149,13 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
}
// hack to control index calculation when iterating over a_k_m_global tensor
constexpr
auto
a_
k_m
_global_iterator_hacks
=
constexpr
auto
a_
e_k
_global_iterator_hacks
=
make_tuple
(
make_tuple
(
Sequence
<
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
>
{}),
make_tuple
(
Sequence
<
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
>
{}));
constexpr
auto
a_
k_m
_global_move_slice_window_iterator_hack
=
Sequence
<
0
,
0
,
0
>
{};
constexpr
auto
a_
e_k
_global_move_slice_window_iterator_hack
=
Sequence
<
0
,
0
,
0
>
{};
constexpr
auto
b_
k
_n_global_iterator_hacks
=
constexpr
auto
b_
e
_n_
ho_wo_
global_iterator_hacks
=
make_tuple
(
make_tuple
(
Sequence
<
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
>
{},
...
...
@@ -165,12 +165,12 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
Sequence
<
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
>
{}));
constexpr
auto
b_
k
_n_global_move_slice_window_iterator_hack
=
constexpr
auto
b_
e
_n_
ho_wo_
global_move_slice_window_iterator_hack
=
Sequence
<
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
0
,
0
,
0
>
{};
// hack to control index calculation when iterating over c_m0_m1_n0_n1_global tensor
// hack for NKHW format
constexpr
auto
c_k_n_h_w_global_tensor_iterator_hacks
=
constexpr
auto
c_k_n_h
o
_w
o
_global_tensor_iterator_hacks
=
make_tuple
(
make_tuple
(
Sequence
<
0
,
1
,
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
,
0
,
0
>
{},
Sequence
<
0
,
0
,
0
,
0
,
0
>
{},
...
...
@@ -215,11 +215,11 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_outpad
Sequence
<
0
,
2
,
3
,
1
>
,
0
,
CThreadTransferDstScalarPerVector_W
,
decltype
(
a_
k_m
_global_iterator_hacks
),
decltype
(
b_
k
_n_global_iterator_hacks
),
decltype
(
c_k_n_h_w_global_tensor_iterator_hacks
),
decltype
(
a_
k_m
_global_move_slice_window_iterator_hack
),
decltype
(
b_
k
_n_global_move_slice_window_iterator_hack
)
>
;
decltype
(
a_
e_k
_global_iterator_hacks
),
decltype
(
b_
e
_n_
ho_wo_
global_iterator_hacks
),
decltype
(
c_k_n_h
o
_w
o
_global_tensor_iterator_hacks
),
decltype
(
a_
e_k
_global_move_slice_window_iterator_hack
),
decltype
(
b_
e
_n_
ho_wo_
global_move_slice_window_iterator_hack
)
>
;
const
auto
GridSize
=
(
K
/
KPerBlock
)
*
(
Hop
/
HoPerBlock
)
*
(
Wop
/
WoPerBlock
)
*
N
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment