"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "28f1b92ae244fe667de1bb2ee2fca39afad13fb7"
Commit fd30df23 authored by Chao Liu's avatar Chao Liu
Browse files

tweak

parent f74cf520
...@@ -74,6 +74,34 @@ void device_convolution_backward_weight_implicit_gemm_v4r4r2_xdlops_nchw_kcyx_nk ...@@ -74,6 +74,34 @@ void device_convolution_backward_weight_implicit_gemm_v4r4r2_xdlops_nchw_kcyx_nk
constexpr index_t GemmBBlockTransferSrcScalarPerVector_GemmN = 1; constexpr index_t GemmBBlockTransferSrcScalarPerVector_GemmN = 1;
constexpr index_t GemmBBlockTransferDstScalarPerVector_GemmK1 = 8; constexpr index_t GemmBBlockTransferDstScalarPerVector_GemmK1 = 8;
constexpr index_t GemmCThreadTransferDstScalarPerVector = 1;
#elif 1
// [M, N, K0, K1] = [128, 128, 4, 8] for fp16
constexpr index_t BlockSize = 256;
constexpr index_t GemmMPerBlock = 256;
constexpr index_t GemmNPerBlock = 128;
constexpr index_t GemmKPerBlock = 4;
constexpr index_t GemmMPerWave = 32;
constexpr index_t GemmNPerWave = 32;
constexpr index_t GemmK1 = 8;
constexpr index_t MRepeat = 4;
constexpr index_t NRepeat = 2;
using GemmABlockTransferThreadSliceLengths_GemmK0_GemmM_GemmK1 = Sequence<1, 4, 8>;
using GemmABlockTransferThreadClusterLengths_GemmK0_GemmM_GemmK1 = Sequence<4, 64, 1>;
// using vector load 4, so config's wo*ho must be a multiple of 4
constexpr index_t GemmABlockTransferSrcScalarPerVector_GemmK1 = 4;
constexpr index_t GemmABlockTransferDstScalarPerVector_GemmK1 = 4;
using GemmBBlockTransferThreadSliceLengths_GemmK0_GemmN_GemmK1 = Sequence<1, 2, 8>;
using GemmBBlockTransferThreadClusterLengths_GemmK0_GemmN_GemmK1 = Sequence<4, 64, 1>;
constexpr index_t GemmBBlockTransferSrcScalarPerVector_GemmN = 1;
constexpr index_t GemmBBlockTransferDstScalarPerVector_GemmK1 = 8;
constexpr index_t GemmCThreadTransferDstScalarPerVector = 1; constexpr index_t GemmCThreadTransferDstScalarPerVector = 1;
#endif #endif
......
...@@ -95,8 +95,8 @@ int main(int argc, char* argv[]) ...@@ -95,8 +95,8 @@ int main(int argc, char* argv[])
constexpr auto Y = Number<3>{}; constexpr auto Y = Number<3>{};
constexpr auto X = Number<3>{}; constexpr auto X = Number<3>{};
constexpr auto conv_stride_h = I2; constexpr auto conv_stride_h = I1;
constexpr auto conv_stride_w = I2; constexpr auto conv_stride_w = I1;
constexpr auto conv_dilation_h = I1; constexpr auto conv_dilation_h = I1;
constexpr auto conv_dilation_w = I1; constexpr auto conv_dilation_w = I1;
constexpr auto in_left_pad_h = I1; constexpr auto in_left_pad_h = I1;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment