Commit f1403dac authored by root's avatar root
Browse files

tuning parameters

parent 649dbac0
......@@ -190,7 +190,6 @@ struct DriverDynamicConvolutionForwardImplicitGemm_v5r1_nchw_kcyx_nkhw_pad
GemmABlockTransferDstScalarPerVector_GemmM,
false, // don't move back src coordinate after threadwise copy
Sequence<3, 2, 1, 0>,
Sequence<3, 2, 1, 0>,
3,
GemmBBlockTransferSrcScalarPerVector_GemmN,
GemmBBlockTransferDstScalarPerVector_GemmN,
......
......@@ -34,7 +34,6 @@ template <index_t BlockSize,
index_t ABlockTransferSrcScalarPerVector,
index_t ABlockTransferDstScalarPerVector_M,
bool AThreadTransferSrcResetCoordinateAfterRun,
typename BBlockTransferThreadClusterArrangeOrder,
typename BBlockTransferSrcAccessOrder,
index_t BBlockTransferSrcVectorDim,
index_t BBlockTransferSrcScalarPerVector,
......@@ -195,20 +194,21 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
make_dynamic_naive_tensor_descriptor_packed_v2(make_tuple(
Number<EPerBlock>{}, Number<1>{}, Number<HPerThread>{}, Number<WPerThread>{}));
auto b_threadwise_transfer = ThreadwiseDynamicTensorSliceTransfer_v2<
Float,
auto b_threadwise_transfer =
ThreadwiseDynamicTensorSliceTransfer_v2<Float,
Float,
decltype(b_e_n_h_w_global_desc),
decltype(b_e_n_h_w_thread_desc),
Sequence<EPerBlock, 1, HPerThread, WPerThread>,
Sequence<3, 2, 0, 1>, // BBlockTransferSrcAccessOrder,
3, // BBlockTransferSrcVectorDim,
1, // BBlockTransferSrcScalarPerVector,
BBlockTransferSrcAccessOrder,
BBlockTransferSrcVectorDim,
BBlockTransferSrcScalarPerVector,
AddressSpace::Global,
AddressSpace::Vgpr,
InMemoryDataOperation::Set,
1,
true>(b_e_n_h_w_global_desc,
true>(
b_e_n_h_w_global_desc,
make_multi_index(0, 0, h_thread_data_on_global, w_thread_data_on_global));
// LDS allocation for A and B: be careful of alignment
......@@ -387,9 +387,9 @@ struct GridwiseDynamicGemm_km_kn_mn_v2
decltype(c_k_n_h_w_thread_desc),
decltype(c_k_n_h_w_global_desc),
Sequence<KPerThread, 1, HPerThread, WPerThread>,
Sequence<3, 2, 0, 1>, // CThreadTransferSrcDstAccessOrder
3, // CThreadTransferSrcDstVectorDim
1, // CThreadTransferDstScalarPerVector,
CThreadTransferSrcDstAccessOrder,
CThreadTransferSrcDstVectorDim,
CThreadTransferDstScalarPerVector,
AddressSpace::Vgpr,
AddressSpace::Global,
CGlobalMemoryDataOperation,
......@@ -510,7 +510,6 @@ template <index_t BlockSize,
index_t ABlockTransferSrcScalarPerVector,
index_t ABlockTransferDstScalarPerVector_M,
bool AThreadTransferSrcResetCoordinateAfterRun,
typename BBlockTransferThreadClusterArrangeOrder,
typename BBlockTransferSrcAccessOrder,
index_t BBlockTransferSrcVectorDim,
index_t BBlockTransferSrcScalarPerVector,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment