Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
fb7b4609
Commit
fb7b4609
authored
Sep 10, 2021
by
Jing Zhang
Browse files
debug
parent
90276e6b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
12 additions
and
6 deletions
+12
-6
composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2.hpp
...ernel/include/tensor_operation/gridwise_gemm_dlops_v2.hpp
+2
-0
host/driver_offline/include/device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw.hpp
...ution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw.hpp
+6
-6
host/driver_offline/include/driver_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw_outpad.hpp
...orward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw_outpad.hpp
+4
-0
No files found.
composable_kernel/include/tensor_operation/gridwise_gemm_dlops_v2.hpp
View file @
fb7b4609
...
@@ -181,6 +181,8 @@ struct GridwiseGemmDlops_km_kn_mn_v3
...
@@ -181,6 +181,8 @@ struct GridwiseGemmDlops_km_kn_mn_v3
auto
c_global_buf
=
make_dynamic_buffer
<
AddressSpaceEnum_t
::
Global
>
(
auto
c_global_buf
=
make_dynamic_buffer
<
AddressSpaceEnum_t
::
Global
>
(
p_c_global
,
c_k_n_ho_wo_global_desc
.
GetElementSpaceSize
());
p_c_global
,
c_k_n_ho_wo_global_desc
.
GetElementSpaceSize
());
static_assert
(
E1
%
EPerBlock
==
0
,
""
);
// const auto E = a_e0_e1_k_global_desc.GetLength(I0);
// const auto E = a_e0_e1_k_global_desc.GetLength(I0);
// const auto K = a_e0_e1_k_global_desc.GetLength(I1);
// const auto K = a_e0_e1_k_global_desc.GetLength(I1);
...
...
host/driver_offline/include/device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw.hpp
View file @
fb7b4609
...
@@ -106,17 +106,17 @@ void device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw(
...
@@ -106,17 +106,17 @@ void device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw(
constexpr
index_t
WoPerBlock
=
8
;
constexpr
index_t
WoPerBlock
=
8
;
constexpr
index_t
E1
=
16
;
constexpr
index_t
E1
=
16
;
constexpr
index_t
EPerBlock
=
16
;
constexpr
index_t
EPerBlock
=
8
;
constexpr
index_t
KPerThread
=
KPerBlock
;
constexpr
index_t
KPerThread
=
KPerBlock
;
constexpr
index_t
HoPerThread
=
1
;
constexpr
index_t
HoPerThread
=
1
;
constexpr
index_t
WoPerThread
=
1
;
constexpr
index_t
WoPerThread
=
1
;
constexpr
index_t
EPerThread
=
EPerBlock
;
constexpr
index_t
EPerThread
=
EPerBlock
;
using
ABlockTransferThreadSliceLengths_E_K
=
Sequence
<
1
,
4
,
1
>
;
using
ABlockTransferThreadSliceLengths_E
0_E1
_K
=
Sequence
<
1
,
4
,
1
>
;
using
ABlockTransferThreadClusterLengths_E_K
=
Sequence
<
1
,
4
,
16
>
;
using
ABlockTransferThreadClusterLengths_E
0_E1
_K
=
Sequence
<
1
,
4
,
16
>
;
constexpr
index_t
ABlockTransferSrcScalarPerVector_E
=
4
;
constexpr
index_t
ABlockTransferSrcScalarPerVector_E
=
1
;
constexpr
index_t
ABlockTransferDstScalarPerVector_K
=
1
;
constexpr
index_t
ABlockTransferDstScalarPerVector_K
=
1
;
constexpr
index_t
BThreadTransferSrcScalarPerVector_E
=
1
;
constexpr
index_t
BThreadTransferSrcScalarPerVector_E
=
1
;
...
@@ -139,8 +139,8 @@ void device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw(
...
@@ -139,8 +139,8 @@ void device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw(
HoPerThread
,
HoPerThread
,
WoPerThread
,
WoPerThread
,
EPerThread
,
EPerThread
,
ABlockTransferThreadSliceLengths_E_K
,
ABlockTransferThreadSliceLengths_E
0_E1
_K
,
ABlockTransferThreadClusterLengths_E_K
,
ABlockTransferThreadClusterLengths_E
0_E1
_K
,
ABlockTransferSrcScalarPerVector_E
,
ABlockTransferSrcScalarPerVector_E
,
ABlockTransferDstScalarPerVector_K
,
ABlockTransferDstScalarPerVector_K
,
BThreadTransferSrcScalarPerVector_E
,
BThreadTransferSrcScalarPerVector_E
,
...
...
host/driver_offline/include/driver_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw_outpad.hpp
View file @
fb7b4609
...
@@ -256,6 +256,10 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_outp
...
@@ -256,6 +256,10 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nchw_kcyx_nkhw_outp
const
bool
has_double_tail_k_block_loop
=
(
E1
/
EPerBlock
)
%
2
==
0
;
const
bool
has_double_tail_k_block_loop
=
(
E1
/
EPerBlock
)
%
2
==
0
;
std
::
cerr
<<
"has_main_k_block_loop = "
<<
has_main_k_block_loop
<<
" has_double_tail_k_block_loop = "
<<
has_double_tail_k_block_loop
<<
std
::
endl
;
const
auto
c_blockid_to_k_n_ho_wo_block_cluster_adaptor
=
const
auto
c_blockid_to_k_n_ho_wo_block_cluster_adaptor
=
make_single_stage_tensor_adaptor
(
make_tuple
(
make_merge_transform
(
make_tuple
(
I0
,
I0
))),
make_single_stage_tensor_adaptor
(
make_tuple
(
make_merge_transform
(
make_tuple
(
I0
,
I0
))),
make_tuple
(
Sequence
<
0
,
1
>
{}),
make_tuple
(
Sequence
<
0
,
1
>
{}),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment