Commit 3567bf79 authored by Jing Zhang's avatar Jing Zhang
Browse files

clean code

parent e8f5ca1a
...@@ -530,9 +530,9 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v1 ...@@ -530,9 +530,9 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v1
decltype(c_m0_m1_m2_n_thread_desc), decltype(c_m0_m1_m2_n_thread_desc),
decltype(c_m0_m1_m2_n_global_desc), decltype(c_m0_m1_m2_n_global_desc),
Sequence<M0, 1, M2, 1>, Sequence<M0, 1, M2, 1>,
Sequence<0, 1, 2, 3>, // CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstAccessOrder,
3, // CThreadTransferSrcDstVectorDim, CThreadTransferSrcDstVectorDim,
1, // CThreadTransferDstScalarPerVector, CThreadTransferDstScalarPerVector,
CGlobalMemoryDataOperation, CGlobalMemoryDataOperation,
1, 1,
true>{c_m0_m1_m2_n_global_desc, true>{c_m0_m1_m2_n_global_desc,
......
...@@ -445,9 +445,9 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v2 ...@@ -445,9 +445,9 @@ struct GridwiseDynamicGemm_km_kn_m0m1n0n1_xdlops_v2
decltype(c_m0_m1_m2_n_thread_desc), decltype(c_m0_m1_m2_n_thread_desc),
decltype(c_m0_m1_m2_n_global_desc), decltype(c_m0_m1_m2_n_global_desc),
Sequence<M0, 1, M2, 1>, Sequence<M0, 1, M2, 1>,
Sequence<0, 1, 2, 3>, // CThreadTransferSrcDstAccessOrder, CThreadTransferSrcDstAccessOrder,
3, // CThreadTransferSrcDstVectorDim, CThreadTransferSrcDstVectorDim,
1, // CThreadTransferDstScalarPerVector, CThreadTransferDstScalarPerVector,
CGlobalMemoryDataOperation, CGlobalMemoryDataOperation,
1, 1,
true>{c_m0_m1_m2_n_global_desc, true>{c_m0_m1_m2_n_global_desc,
......
...@@ -152,8 +152,12 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw ...@@ -152,8 +152,12 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw
for(index_t i = 0; i < 5; ++i) for(index_t i = 0; i < 5; ++i)
{ {
float ave_time = launch_kernel_dynamic_gemm_xdlops_v2< #if 0
BlockSize, float ave_time = launch_kernel_dynamic_gemm_xdlops_v1
#else
float ave_time = launch_kernel_dynamic_gemm_xdlops_v2
#endif
<BlockSize,
TInWei, TInWei,
TAcc, TAcc,
TOut, TOut,
...@@ -185,8 +189,8 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw ...@@ -185,8 +189,8 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nchw_kcyx_nkhw
1, 1,
GemmBBlockTransferSrcScalarPerVector_GemmN, GemmBBlockTransferSrcScalarPerVector_GemmN,
GemmBBlockTransferDstScalarPerVector_KPack, GemmBBlockTransferDstScalarPerVector_KPack,
false, // don't move back src coordinate after threadwise copy, which will be fused with false, // don't move back src coordinate after threadwise copy, which will be fused
// MoveSrcSliceWindow() to save addr computation // with MoveSrcSliceWindow() to save addr computation
Sequence<2, 3, 0, 1>, Sequence<2, 3, 0, 1>,
3, 3,
GemmCThreadTransferDstScalarPerVector_GemmN1, GemmCThreadTransferDstScalarPerVector_GemmN1,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment