Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
f1403dac
"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "e95b7f2ea3aa5c482ebaad79314dbb8116bc041c"
Commit
f1403dac
authored
Mar 18, 2021
by
root
Browse files
tuning parameters
parent
649dbac0
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
21 deletions
+19
-21
composable_kernel/include/driver/driver_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
...convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
+0
-1
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_v2.hpp
...nel/include/tensor_operation/gridwise_dynamic_gemm_v2.hpp
+19
-20
No files found.
composable_kernel/include/driver/driver_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
View file @
f1403dac
...
@@ -190,7 +190,6 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad
...
@@ -190,7 +190,6 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad
GemmABlockTransferDstScalarPerVector_GemmM
,
GemmABlockTransferDstScalarPerVector_GemmM
,
false
,
// don't move back src coordinate after threadwise copy
false
,
// don't move back src coordinate after threadwise copy
Sequence
<
3
,
2
,
1
,
0
>
,
Sequence
<
3
,
2
,
1
,
0
>
,
Sequence
<
3
,
2
,
1
,
0
>
,
3
,
3
,
GemmBBlockTransferSrcScalarPerVector_GemmN
,
GemmBBlockTransferSrcScalarPerVector_GemmN
,
GemmBBlockTransferDstScalarPerVector_GemmN
,
GemmBBlockTransferDstScalarPerVector_GemmN
,
...
...
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_v2.hpp
View file @
f1403dac
...
@@ -34,7 +34,6 @@ template <index_t BlockSize,
...
@@ -34,7 +34,6 @@ template <index_t BlockSize,
index_t
ABlockTransferSrcScalarPerVector
,
index_t
ABlockTransferSrcScalarPerVector
,
index_t
ABlockTransferDstScalarPerVector_M
,
index_t
ABlockTransferDstScalarPerVector_M
,
bool
AThreadTransferSrcResetCoordinateAfterRun
,
bool
AThreadTransferSrcResetCoordinateAfterRun
,
typename
BBlockTransferThreadClusterArrangeOrder
,
typename
BBlockTransferSrcAccessOrder
,
typename
BBlockTransferSrcAccessOrder
,
index_t
BBlockTransferSrcVectorDim
,
index_t
BBlockTransferSrcVectorDim
,
index_t
BBlockTransferSrcScalarPerVector
,
index_t
BBlockTransferSrcScalarPerVector
,
...
@@ -195,21 +194,22 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
...
@@ -195,21 +194,22 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_tuple
(
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_tuple
(
Number
<
EPerBlock
>
{},
Number
<
1
>
{},
Number
<
HPerThread
>
{},
Number
<
WPerThread
>
{}));
Number
<
EPerBlock
>
{},
Number
<
1
>
{},
Number
<
HPerThread
>
{},
Number
<
WPerThread
>
{}));
auto
b_threadwise_transfer
=
ThreadwiseDynamicTensorSliceTransfer_v2
<
auto
b_threadwise_transfer
=
Float
,
ThreadwiseDynamicTensorSliceTransfer_v2
<
Float
,
Float
,
Float
,
decltype
(
b_e_n_h_w_global_desc
),
decltype
(
b_e_n_h_w_global_desc
),
decltype
(
b_e_n_h_w_thread_desc
),
decltype
(
b_e_n_h_w_thread_desc
),
Sequence
<
EPerBlock
,
1
,
HPerThread
,
WPerThread
>
,
Sequence
<
EPerBlock
,
1
,
HPerThread
,
WPerThread
>
,
Sequence
<
3
,
2
,
0
,
1
>
,
// BBlockTransferSrcAccessOrder,
BBlockTransferSrcAccessOrder
,
3
,
// BBlockTransferSrcVectorDim,
BBlockTransferSrcVectorDim
,
1
,
// BBlockTransferSrcScalarPerVector,
BBlockTransferSrcScalarPerVector
,
AddressSpace
::
Global
,
AddressSpace
::
Global
,
AddressSpace
::
Vgpr
,
AddressSpace
::
Vgpr
,
InMemoryDataOperation
::
Set
,
InMemoryDataOperation
::
Set
,
1
,
1
,
true
>
(
b_e_n_h_w_global_desc
,
true
>
(
make_multi_index
(
0
,
0
,
h_thread_data_on_global
,
w_thread_data_on_global
));
b_e_n_h_w_global_desc
,
make_multi_index
(
0
,
0
,
h_thread_data_on_global
,
w_thread_data_on_global
));
// LDS allocation for A and B: be careful of alignment
// LDS allocation for A and B: be careful of alignment
constexpr
auto
a_block_space_size
=
constexpr
auto
a_block_space_size
=
...
@@ -387,9 +387,9 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
...
@@ -387,9 +387,9 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
decltype
(
c_k_n_h_w_thread_desc
),
decltype
(
c_k_n_h_w_thread_desc
),
decltype
(
c_k_n_h_w_global_desc
),
decltype
(
c_k_n_h_w_global_desc
),
Sequence
<
KPerThread
,
1
,
HPerThread
,
WPerThread
>
,
Sequence
<
KPerThread
,
1
,
HPerThread
,
WPerThread
>
,
Sequence
<
3
,
2
,
0
,
1
>
,
//
CThreadTransferSrcDstAccessOrder
CThreadTransferSrcDstAccessOrder
,
3
,
//
CThreadTransferSrcDstVectorDim
CThreadTransferSrcDstVectorDim
,
1
,
//
CThreadTransferDstScalarPerVector,
CThreadTransferDstScalarPerVector
,
AddressSpace
::
Vgpr
,
AddressSpace
::
Vgpr
,
AddressSpace
::
Global
,
AddressSpace
::
Global
,
CGlobalMemoryDataOperation
,
CGlobalMemoryDataOperation
,
...
@@ -510,7 +510,6 @@ template <index_t BlockSize,
...
@@ -510,7 +510,6 @@ template <index_t BlockSize,
index_t
ABlockTransferSrcScalarPerVector
,
index_t
ABlockTransferSrcScalarPerVector
,
index_t
ABlockTransferDstScalarPerVector_M
,
index_t
ABlockTransferDstScalarPerVector_M
,
bool
AThreadTransferSrcResetCoordinateAfterRun
,
bool
AThreadTransferSrcResetCoordinateAfterRun
,
typename
BBlockTransferThreadClusterArrangeOrder
,
typename
BBlockTransferSrcAccessOrder
,
typename
BBlockTransferSrcAccessOrder
,
index_t
BBlockTransferSrcVectorDim
,
index_t
BBlockTransferSrcVectorDim
,
index_t
BBlockTransferSrcScalarPerVector
,
index_t
BBlockTransferSrcScalarPerVector
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment