"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "e95b7f2ea3aa5c482ebaad79314dbb8116bc041c"
Commit f1403dac authored by root's avatar root
Browse files

tuning parameters

parent 649dbac0
...@@ -190,7 +190,6 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad ...@@ -190,7 +190,6 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad
GemmABlockTransferDstScalarPerVector_GemmM, GemmABlockTransferDstScalarPerVector_GemmM,
false, // don't move back src coordinate after threadwise copy false, // don't move back src coordinate after threadwise copy
Sequence<3, 2, 1, 0>, Sequence<3, 2, 1, 0>,
Sequence<3, 2, 1, 0>,
3, 3,
GemmBBlockTransferSrcScalarPerVector_GemmN, GemmBBlockTransferSrcScalarPerVector_GemmN,
GemmBBlockTransferDstScalarPerVector_GemmN, GemmBBlockTransferDstScalarPerVector_GemmN,
......
...@@ -34,7 +34,6 @@ template <index_t BlockSize, ...@@ -34,7 +34,6 @@ template <index_t BlockSize,
index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferSrcScalarPerVector,
index_t ABlockTransferDstScalarPerVector_M, index_t ABlockTransferDstScalarPerVector_M,
bool AThreadTransferSrcResetCoordinateAfterRun, bool AThreadTransferSrcResetCoordinateAfterRun,
typename BBlockTransferThreadClusterArrangeOrder,
typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcAccessOrder,
index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcVectorDim,
index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector,
...@@ -195,21 +194,22 @@ struct GridwiseDynamicGemm_km_kn_mn_v2 ...@@ -195,21 +194,22 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple( make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(
Number<EPerBlock>{}, Number<1>{}, Number<HPerThread>{}, Number<WPerThread>{})); Number<EPerBlock>{}, Number<1>{}, Number<HPerThread>{}, Number<WPerThread>{}));
auto b_threadwise_transfer = ThreadwiseDynamicTensorSliceTransfer_v2< auto b_threadwise_transfer =
Float, ThreadwiseDynamicTensorSliceTransfer_v2<Float,
Float, Float,
decltype(b_e_n_h_w_global_desc), decltype(b_e_n_h_w_global_desc),
decltype(b_e_n_h_w_thread_desc), decltype(b_e_n_h_w_thread_desc),
Sequence<EPerBlock, 1, HPerThread, WPerThread>, Sequence<EPerBlock, 1, HPerThread, WPerThread>,
Sequence<3, 2, 0, 1>, // BBlockTransferSrcAccessOrder, BBlockTransferSrcAccessOrder,
3, // BBlockTransferSrcVectorDim, BBlockTransferSrcVectorDim,
1, // BBlockTransferSrcScalarPerVector, BBlockTransferSrcScalarPerVector,
AddressSpace::Global, AddressSpace::Global,
AddressSpace::Vgpr, AddressSpace::Vgpr,
InMemoryDataOperation::Set, InMemoryDataOperation::Set,
1, 1,
true>(b_e_n_h_w_global_desc, true>(
make_multi_index(0, 0, h_thread_data_on_global, w_thread_data_on_global)); b_e_n_h_w_global_desc,
make_multi_index(0, 0, h_thread_data_on_global, w_thread_data_on_global));
// LDS allocation for A and B: be careful of alignment // LDS allocation for A and B: be careful of alignment
constexpr auto a_block_space_size = constexpr auto a_block_space_size =
...@@ -387,9 +387,9 @@ struct GridwiseDynamicGemm_km_kn_mn_v2 ...@@ -387,9 +387,9 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
decltype(c_k_n_h_w_thread_desc), decltype(c_k_n_h_w_thread_desc),
decltype(c_k_n_h_w_global_desc), decltype(c_k_n_h_w_global_desc),
Sequence<KPerThread, 1, HPerThread, WPerThread>, Sequence<KPerThread, 1, HPerThread, WPerThread>,
Sequence<3, 2, 0, 1>, // CThreadTransferSrcDstAccessOrder CThreadTransferSrcDstAccessOrder,
3, // CThreadTransferSrcDstVectorDim CThreadTransferSrcDstVectorDim,
1, // CThreadTransferDstScalarPerVector, CThreadTransferDstScalarPerVector,
AddressSpace::Vgpr, AddressSpace::Vgpr,
AddressSpace::Global, AddressSpace::Global,
CGlobalMemoryDataOperation, CGlobalMemoryDataOperation,
...@@ -510,7 +510,6 @@ template <index_t BlockSize, ...@@ -510,7 +510,6 @@ template <index_t BlockSize,
index_t ABlockTransferSrcScalarPerVector, index_t ABlockTransferSrcScalarPerVector,
index_t ABlockTransferDstScalarPerVector_M, index_t ABlockTransferDstScalarPerVector_M,
bool AThreadTransferSrcResetCoordinateAfterRun, bool AThreadTransferSrcResetCoordinateAfterRun,
typename BBlockTransferThreadClusterArrangeOrder,
typename BBlockTransferSrcAccessOrder, typename BBlockTransferSrcAccessOrder,
index_t BBlockTransferSrcVectorDim, index_t BBlockTransferSrcVectorDim,
index_t BBlockTransferSrcScalarPerVector, index_t BBlockTransferSrcScalarPerVector,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment