Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
f744524e
Commit
f744524e
authored
Mar 18, 2021
by
root
Browse files
tuned
parent
f289221b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
11 additions
and
11 deletions
+11
-11
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_v2.hpp
...nel/include/tensor_operation/gridwise_dynamic_gemm_v2.hpp
+3
-3
driver/include/device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
...convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
+5
-5
driver/src/conv_driver.cpp
driver/src/conv_driver.cpp
+3
-3
No files found.
composable_kernel/include/tensor_operation/gridwise_dynamic_gemm_v2.hpp
View file @
f744524e
...
@@ -650,8 +650,8 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
...
@@ -650,8 +650,8 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
BlockSize
,
BlockSize
,
InMemoryDataOperation
::
Set
,
InMemoryDataOperation
::
Set
,
Sequence
<
CYX
,
K
>
,
Sequence
<
CYX
,
K
>
,
Sequence
<
9
,
1
>
,
//
ABlockTransferThreadSliceLengths_K_M,
ABlockTransferThreadSliceLengths_K_M
,
Sequence
<
4
,
16
>
,
//
ABlockTransferThreadClusterLengths_K_M,
ABlockTransferThreadClusterLengths_K_M
,
ABlockTransferThreadClusterArrangeOrder
,
ABlockTransferThreadClusterArrangeOrder
,
Float
,
Float
,
Float
,
Float
,
...
@@ -841,7 +841,7 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
...
@@ -841,7 +841,7 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
Sequence
<
KPerThread
,
1
,
HPerThread
,
WPerThread
>
,
Sequence
<
KPerThread
,
1
,
HPerThread
,
WPerThread
>
,
Sequence
<
3
,
2
,
0
,
1
>
,
// CThreadTransferSrcDstAccessOrder
Sequence
<
3
,
2
,
0
,
1
>
,
// CThreadTransferSrcDstAccessOrder
3
,
// CThreadTransferSrcDstVectorDim
3
,
// CThreadTransferSrcDstVectorDim
1
,
//
CThreadTransferDstScalarPerVector,
CThreadTransferDstScalarPerVector
,
AddressSpace
::
Vgpr
,
AddressSpace
::
Vgpr
,
AddressSpace
::
Global
,
AddressSpace
::
Global
,
CGlobalMemoryDataOperation
,
CGlobalMemoryDataOperation
,
...
...
driver/include/device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw.hpp
View file @
f744524e
...
@@ -73,15 +73,15 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(InDesc
...
@@ -73,15 +73,15 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(InDesc
constexpr
index_t
KPerBlock
=
16
;
constexpr
index_t
KPerBlock
=
16
;
constexpr
index_t
HPerBlock
=
16
;
constexpr
index_t
HPerBlock
=
16
;
constexpr
index_t
WPerBlock
=
16
;
constexpr
index_t
WPerBlock
=
16
;
constexpr
index_t
CYXPerBlock
=
4
;
constexpr
index_t
CYXPerBlock
=
2
*
3
*
3
;
constexpr
index_t
KPerThread
=
4
;
constexpr
index_t
KPerThread
=
4
;
constexpr
index_t
HPerThread
=
2
;
constexpr
index_t
HPerThread
=
2
;
constexpr
index_t
WPerThread
=
2
;
constexpr
index_t
WPerThread
=
2
;
constexpr
index_t
CYXPerThread
=
4
;
constexpr
index_t
CYXPerThread
=
2
;
using
GemmABlockTransferThreadSliceLengths_GemmK_GemmM
=
Sequence
<
1
,
1
>
;
using
GemmABlockTransferThreadSliceLengths_GemmK_GemmM
=
Sequence
<
4
,
1
>
;
using
GemmABlockTransferThreadClusterLengths_GemmK_GemmM
=
Sequence
<
4
,
16
>
;
using
GemmABlockTransferThreadClusterLengths_GemmK_GemmM
=
Sequence
<
9
,
16
>
;
constexpr
index_t
GemmABlockTransferSrcScalarPerVector_GemmK
=
1
;
constexpr
index_t
GemmABlockTransferSrcScalarPerVector_GemmK
=
1
;
constexpr
index_t
GemmABlockTransferDstScalarPerVector_GemmM
=
1
;
constexpr
index_t
GemmABlockTransferDstScalarPerVector_GemmM
=
1
;
...
@@ -89,7 +89,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(InDesc
...
@@ -89,7 +89,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(InDesc
constexpr
index_t
GemmBBlockTransferSrcScalarPerVector_GemmN
=
1
;
constexpr
index_t
GemmBBlockTransferSrcScalarPerVector_GemmN
=
1
;
constexpr
index_t
GemmBBlockTransferDstScalarPerVector_GemmN
=
1
;
constexpr
index_t
GemmBBlockTransferDstScalarPerVector_GemmN
=
1
;
constexpr
index_t
GemmCThreadTransferDstScalarPerVector_GemmN1
=
1
;
constexpr
index_t
GemmCThreadTransferDstScalarPerVector_GemmN1
=
2
;
constexpr
auto
conv_driver
=
constexpr
auto
conv_driver
=
DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad
<
DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad
<
...
...
driver/src/conv_driver.cpp
View file @
f744524e
...
@@ -49,8 +49,8 @@ int main(int argc, char* argv[])
...
@@ -49,8 +49,8 @@ int main(int argc, char* argv[])
using
ConvStrides
=
Sequence
<
1
,
1
>
;
using
ConvStrides
=
Sequence
<
1
,
1
>
;
using
ConvDilations
=
Sequence
<
1
,
1
>
;
using
ConvDilations
=
Sequence
<
1
,
1
>
;
using
LeftPads
=
Sequence
<
0
,
0
>
;
using
LeftPads
=
Sequence
<
0
,
0
>
;
using
RightPads
=
Sequence
<
0
,
0
>
;
using
RightPads
=
Sequence
<
0
,
0
>
;
#elif 0
#elif 0
constexpr
index_t
N
=
1
;
constexpr
index_t
N
=
1
;
constexpr
index_t
C
=
16
;
constexpr
index_t
C
=
16
;
...
@@ -706,7 +706,7 @@ int main(int argc, char* argv[])
...
@@ -706,7 +706,7 @@ int main(int argc, char* argv[])
LeftPads
{},
LeftPads
{},
RightPads
{},
RightPads
{},
nrepeat
);
nrepeat
);
#elif
1
#elif
0
device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw
<
in_data_t
,
device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw
<
in_data_t
,
in_vector_size
,
in_vector_size
,
acc_data_t
,
acc_data_t
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment