Commit 7802381d authored by Jing Zhang's avatar Jing Zhang
Browse files

tuning

parent e6a23d8b
......@@ -315,7 +315,7 @@ struct GridwiseGemmDlops_km_kn_mn_v3
BBlockTransferSrcAccessOrder,
BBlockTransferSrcVectorDim,
BBlockTransferSrcScalarPerVector,
1,
BThreadTransferSrcResetCoordinateAfterRun,
true>(b_e0_e1_n_ho_wo_e2_global_desc,
make_multi_index(0, 0, 0, ho_thread_data_on_global, wo_thread_data_on_global, 0));
......
......@@ -64,8 +64,6 @@ struct ThreadwiseGemmDlops_km_kn_mn_v3
constexpr auto K = AThreadDesc_E1_K_E2{}.GetLength(I1);
constexpr auto E2 = AThreadDesc_E1_K_E2{}.GetLength(I2);
static_assert(E1 == 4 && E2 == 4, "");
constexpr auto H = BThreadDesc_E1_N_Ho_Wo_E2{}.GetLength(I2);
constexpr auto W = BThreadDesc_E1_N_Ho_Wo_E2{}.GetLength(I3);
......
......@@ -103,26 +103,26 @@ void device_convolution_forward_implicit_gemm_v5r1_dlops_nchw_kcyx_nkhw(
constexpr index_t KPerBlock = 16;
constexpr index_t HoPerBlock = 8;
constexpr index_t WoPerBlock = 8;
constexpr index_t WoPerBlock = 32;
constexpr index_t E1 = 4;
constexpr index_t E1 = 4 * 9;
constexpr index_t E2 = 4;
constexpr index_t EPerBlock = 4;
constexpr index_t KPerThread = 4;
constexpr index_t KPerThread = KPerBlock;
constexpr index_t HoPerThread = 2;
constexpr index_t WoPerThread = 2;
constexpr index_t EPerThread = 4;
constexpr index_t EPerThread = 1;
using ABlockTransferThreadSliceLengths_E0_E1_K_E2 = Sequence<1, 1, 1, 4>;
using ABlockTransferThreadSliceLengths_E0_E1_K_E2 = Sequence<1, 9, 1, E2>;
using ABlockTransferThreadClusterLengths_E0_E1_K_E2 = Sequence<1, 4, 16, 1>;
constexpr index_t ABlockTransferSrcScalarPerVector_E2 = 1;
constexpr index_t ABlockTransferDstScalarPerVector_E2 = 1;
constexpr index_t ABlockTransferSrcScalarPerVector_E2 = E2;
constexpr index_t ABlockTransferDstScalarPerVector_E2 = E2;
constexpr index_t BThreadTransferSrcScalarPerVector_E2 = 1;
constexpr index_t BThreadTransferSrcScalarPerVector_E2 = E2;
constexpr index_t CThreadTransferDstScalarPerVector_K = 1;
constexpr index_t CThreadTransferDstScalarPerVector_K = 8;
#endif
constexpr auto conv_driver =
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment