Commit f744524e authored by root's avatar root
Browse files

tuned

parent f289221b
......@@ -650,8 +650,8 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
BlockSize,
InMemoryDataOperation::Set,
Sequence<CYX, K>,
Sequence<9, 1>, // ABlockTransferThreadSliceLengths_K_M,
Sequence<4, 16>, // ABlockTransferThreadClusterLengths_K_M,
ABlockTransferThreadSliceLengths_K_M,
ABlockTransferThreadClusterLengths_K_M,
ABlockTransferThreadClusterArrangeOrder,
Float,
Float,
......@@ -841,7 +841,7 @@ struct GridwiseDynamicGemm_km_kn_mn_v3
Sequence<KPerThread, 1, HPerThread, WPerThread>,
Sequence<3, 2, 0, 1>, // CThreadTransferSrcDstAccessOrder
3, // CThreadTransferSrcDstVectorDim
1, // CThreadTransferDstScalarPerVector,
CThreadTransferDstScalarPerVector,
AddressSpace::Vgpr,
AddressSpace::Global,
CGlobalMemoryDataOperation,
......
......@@ -73,15 +73,15 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(InDesc
constexpr index_t KPerBlock = 16;
constexpr index_t HPerBlock = 16;
constexpr index_t WPerBlock = 16;
constexpr index_t CYXPerBlock = 4;
constexpr index_t CYXPerBlock = 2 * 3 * 3;
constexpr index_t KPerThread = 4;
constexpr index_t HPerThread = 2;
constexpr index_t WPerThread = 2;
constexpr index_t CYXPerThread = 4;
constexpr index_t CYXPerThread = 2;
using GemmABlockTransferThreadSliceLengths_GemmK_GemmM = Sequence<1, 1>;
using GemmABlockTransferThreadClusterLengths_GemmK_GemmM = Sequence<4, 16>;
using GemmABlockTransferThreadSliceLengths_GemmK_GemmM = Sequence<4, 1>;
using GemmABlockTransferThreadClusterLengths_GemmK_GemmM = Sequence<9, 16>;
constexpr index_t GemmABlockTransferSrcScalarPerVector_GemmK = 1;
constexpr index_t GemmABlockTransferDstScalarPerVector_GemmM = 1;
......@@ -89,7 +89,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v5r1_nchw_kcyx_nkhw(InDesc
constexpr index_t GemmBBlockTransferSrcScalarPerVector_GemmN = 1;
constexpr index_t GemmBBlockTransferDstScalarPerVector_GemmN = 1;
constexpr index_t GemmCThreadTransferDstScalarPerVector_GemmN1 = 1;
constexpr index_t GemmCThreadTransferDstScalarPerVector_GemmN1 = 2;
constexpr auto conv_driver =
DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad<
......
......@@ -706,7 +706,7 @@ int main(int argc, char* argv[])
LeftPads{},
RightPads{},
nrepeat);
#elif 1
#elif 0
device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw<in_data_t,
in_vector_size,
acc_data_t,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment