Commit f744524e authored by root's avatar root
Browse files

tuned

parent f289221b
...@@ -650,8 +650,8 @@ struct GridwiseDynamicGemm_km_kn_mn_v3 ...@@ -650,8 +650,8 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
BlockSize, BlockSize,
InMemoryDataOperation::Set, InMemoryDataOperation::Set,
Sequence<CYX, K>, Sequence<CYX, K>,
Sequence<9, 1>, // ABlockTransferThreadSliceLengths_K_M, ABlockTransferThreadSliceLengths_K_M,
Sequence<4, 16>, // ABlockTransferThreadClusterLengths_K_M, ABlockTransferThreadClusterLengths_K_M,
ABlockTransferThreadClusterArrangeOrder, ABlockTransferThreadClusterArrangeOrder,
Float, Float,
Float, Float,
...@@ -841,7 +841,7 @@ struct GridwiseDynamicGemm_km_kn_mn_v3 ...@@ -841,7 +841,7 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
Sequence<KPerThread, 1, HPerThread, WPerThread>, Sequence<KPerThread, 1, HPerThread, WPerThread>,
Sequence<3, 2, 0, 1>, // CThreadTransferSrcDstAccessOrder Sequence<3, 2, 0, 1>, // CThreadTransferSrcDstAccessOrder
3, // CThreadTransferSrcDstVectorDim 3, // CThreadTransferSrcDstVectorDim
1, // CThreadTransferDstScalarPerVector, CThreadTransferDstScalarPerVector,
AddressSpace::Vgpr, AddressSpace::Vgpr,
AddressSpace::Global, AddressSpace::Global,
CGlobalMemoryDataOperation, CGlobalMemoryDataOperation,
......
...@@ -73,15 +73,15 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(InDesc ...@@ -73,15 +73,15 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(InDesc
constexpr index_t KPerBlock = 16; constexpr index_t KPerBlock = 16;
constexpr index_t HPerBlock = 16; constexpr index_t HPerBlock = 16;
constexpr index_t WPerBlock = 16; constexpr index_t WPerBlock = 16;
constexpr index_t CYXPerBlock = 4; constexpr index_t CYXPerBlock = 2 * 3 * 3;
constexpr index_t KPerThread = 4; constexpr index_t KPerThread = 4;
constexpr index_t HPerThread = 2; constexpr index_t HPerThread = 2;
constexpr index_t WPerThread = 2; constexpr index_t WPerThread = 2;
constexpr index_t CYXPerThread = 4; constexpr index_t CYXPerThread = 2;
using GemmABlockTransferThreadSliceLengths_GemmK_GemmM = Sequence<1, 1>; using GemmABlockTransferThreadSliceLengths_GemmK_GemmM = Sequence<4, 1>;
using GemmABlockTransferThreadClusterLengths_GemmK_GemmM = Sequence<4, 16>; using GemmABlockTransferThreadClusterLengths_GemmK_GemmM = Sequence<9, 16>;
constexpr index_t GemmABlockTransferSrcScalarPerVector_GemmK = 1; constexpr index_t GemmABlockTransferSrcScalarPerVector_GemmK = 1;
constexpr index_t GemmABlockTransferDstScalarPerVector_GemmM = 1; constexpr index_t GemmABlockTransferDstScalarPerVector_GemmM = 1;
...@@ -89,7 +89,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(InDesc ...@@ -89,7 +89,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(InDesc
constexpr index_t GemmBBlockTransferSrcScalarPerVector_GemmN = 1; constexpr index_t GemmBBlockTransferSrcScalarPerVector_GemmN = 1;
constexpr index_t GemmBBlockTransferDstScalarPerVector_GemmN = 1; constexpr index_t GemmBBlockTransferDstScalarPerVector_GemmN = 1;
constexpr index_t GemmCThreadTransferDstScalarPerVector_GemmN1 = 1; constexpr index_t GemmCThreadTransferDstScalarPerVector_GemmN1 = 2;
constexpr auto conv_driver = constexpr auto conv_driver =
DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad< DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad<
......
...@@ -49,8 +49,8 @@ int main(int argc, char* argv[]) ...@@ -49,8 +49,8 @@ int main(int argc, char* argv[])
using ConvStrides = Sequence<1, 1>; using ConvStrides = Sequence<1, 1>;
using ConvDilations = Sequence<1, 1>; using ConvDilations = Sequence<1, 1>;
using LeftPads = Sequence<0, 0>; using LeftPads = Sequence<0, 0>;
using RightPads = Sequence<0, 0>; using RightPads = Sequence<0, 0>;
#elif 0 #elif 0
constexpr index_t N = 1; constexpr index_t N = 1;
constexpr index_t C = 16; constexpr index_t C = 16;
...@@ -706,7 +706,7 @@ int main(int argc, char* argv[]) ...@@ -706,7 +706,7 @@ int main(int argc, char* argv[])
LeftPads{}, LeftPads{},
RightPads{}, RightPads{},
nrepeat); nrepeat);
#elif 1 #elif 0
device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw<in_data_t, device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw<in_data_t,
in_vector_size, in_vector_size,
acc_data_t, acc_data_t,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment