Commit 1fb77ae6 authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed incorrect results due to typo

parent 64705e7d
...@@ -111,7 +111,7 @@ void device_convolution_add_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0 ...@@ -111,7 +111,7 @@ void device_convolution_add_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0
constexpr auto WoPerBlock = 32; constexpr auto WoPerBlock = 32;
constexpr auto E1 = C0 * 9; constexpr auto E1 = C0 * 9;
constexpr auto E2 = 1; constexpr auto E2 = C1 / InWeiVectorSize;
constexpr auto K2 = 2; constexpr auto K2 = 2;
constexpr auto E1PerBlock = C0; constexpr auto E1PerBlock = C0;
......
...@@ -104,7 +104,7 @@ void device_convolution_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1 ...@@ -104,7 +104,7 @@ void device_convolution_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1
constexpr index_t WoPerBlock = 32; constexpr index_t WoPerBlock = 32;
constexpr index_t E1 = C0 * Y * X; constexpr index_t E1 = C0 * Y * X;
constexpr index_t E2 = 1; constexpr index_t E2 = C1 / InWeiVectorSize;
constexpr index_t K2 = 2; constexpr index_t K2 = 2;
constexpr index_t E1PerBlock = C0; constexpr index_t E1PerBlock = C0;
...@@ -126,9 +126,9 @@ void device_convolution_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1 ...@@ -126,9 +126,9 @@ void device_convolution_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1_nk0hwk1
#endif #endif
const auto in_n_c0_hi_wi_c1_desc = const auto in_n_c0_hi_wi_c1_desc =
make_naive_tensor_descriptor_packed(make_tuple(N, C0, Hi, Wi, C1)); make_naive_tensor_descriptor_packed(make_tuple(N, C0, Hi, Wi, E2));
const auto wei_k_c0_y_x_c1_desc = const auto wei_k_c0_y_x_c1_desc =
make_naive_tensor_descriptor_packed(make_tuple(K, C0, Y, X, C1)); make_naive_tensor_descriptor_packed(make_tuple(K, C0, Y, X, E2));
const auto out_n_k0_ho_wo_k1_desc = const auto out_n_k0_ho_wo_k1_desc =
make_naive_tensor_descriptor_packed(make_tuple(N, K0, Ho, Wo, K1)); make_naive_tensor_descriptor_packed(make_tuple(N, K0, Ho, Wo, K1));
......
...@@ -113,7 +113,7 @@ void device_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1 ...@@ -113,7 +113,7 @@ void device_convolution_maxpool_forward_implicit_gemm_v5r1_dlops_nc0hwc1_kc0yxc1
constexpr auto WoPerBlock = 32; constexpr auto WoPerBlock = 32;
constexpr auto E1 = C0 * 9; constexpr auto E1 = C0 * 9;
constexpr auto E2 = 1; constexpr auto E2 = C1 / InWeiVectorSize;
constexpr auto K2 = 2; constexpr auto K2 = 2;
constexpr auto E1PerBlock = C0; constexpr auto E1PerBlock = C0;
......
...@@ -128,7 +128,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0 ...@@ -128,7 +128,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
// input tensor // input tensor
const auto in_n_c0_hip_wip_e2_global_desc = transform_tensor_descriptor( const auto in_n_c0_hip_wip_e2_global_desc = transform_tensor_descriptor(
make_naive_tensor_descriptor_packed(make_tuple(K, C0, Hi, Wi, E2)), make_naive_tensor_descriptor_packed(make_tuple(N, C0, Hi, Wi, E2)),
make_tuple(make_pass_through_transform(N), make_tuple(make_pass_through_transform(N),
make_pass_through_transform(C0), make_pass_through_transform(C0),
make_pad_transform(Hi, InLeftPadH, InRightPadH), make_pad_transform(Hi, InLeftPadH, InRightPadH),
...@@ -149,7 +149,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0 ...@@ -149,7 +149,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
make_tuple( make_tuple(
Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{}, Sequence<6>{})); Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{}, Sequence<6>{}));
const auto b_e_n_ho_wo_e2_grid_desc = transform_tensor_descriptor( const auto in_e_n_ho_wo_e2_grid_desc = transform_tensor_descriptor(
in_n_c0_y_ho_x_wo_e2_global_desc, in_n_c0_y_ho_x_wo_e2_global_desc,
make_tuple(make_merge_transform(make_tuple(C0, Y, X)), make_tuple(make_merge_transform(make_tuple(C0, Y, X)),
make_pass_through_transform(N), make_pass_through_transform(N),
...@@ -161,7 +161,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0 ...@@ -161,7 +161,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}, Sequence<4>{})); make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}, Sequence<4>{}));
const auto b_e0_e1_n_ho_wo_e2_grid_desc = transform_tensor_descriptor( const auto b_e0_e1_n_ho_wo_e2_grid_desc = transform_tensor_descriptor(
b_e_n_ho_wo_e2_grid_desc, in_e_n_ho_wo_e2_grid_desc,
make_tuple(make_unmerge_transform(make_tuple(E0, E1)), make_tuple(make_unmerge_transform(make_tuple(E0, E1)),
make_pass_through_transform(N), make_pass_through_transform(N),
make_pass_through_transform(Hop), make_pass_through_transform(Hop),
......
...@@ -123,7 +123,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0 ...@@ -123,7 +123,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
// input tensor // input tensor
const auto in_n_c0_hip_wip_e2_global_desc = transform_tensor_descriptor( const auto in_n_c0_hip_wip_e2_global_desc = transform_tensor_descriptor(
make_naive_tensor_descriptor_packed(make_tuple(K, C0, Hi, Wi, E2)), make_naive_tensor_descriptor_packed(make_tuple(N, C0, Hi, Wi, E2)),
make_tuple(make_pass_through_transform(N), make_tuple(make_pass_through_transform(N),
make_pass_through_transform(C0), make_pass_through_transform(C0),
make_pad_transform(Hi, InLeftPadH, InRightPadH), make_pad_transform(Hi, InLeftPadH, InRightPadH),
...@@ -144,7 +144,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0 ...@@ -144,7 +144,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
make_tuple( make_tuple(
Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{}, Sequence<6>{})); Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{}, Sequence<6>{}));
const auto b_e_n_ho_wo_e2_grid_desc = transform_tensor_descriptor( const auto in_e_n_ho_wo_e2_grid_desc = transform_tensor_descriptor(
in_n_c0_y_ho_x_wo_e2_global_desc, in_n_c0_y_ho_x_wo_e2_global_desc,
make_tuple(make_merge_transform(make_tuple(C0, Y, X)), make_tuple(make_merge_transform(make_tuple(C0, Y, X)),
make_pass_through_transform(N), make_pass_through_transform(N),
...@@ -156,7 +156,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0 ...@@ -156,7 +156,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}, Sequence<4>{})); make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}, Sequence<4>{}));
const auto b_e0_e1_n_ho_wo_e2_grid_desc = transform_tensor_descriptor( const auto b_e0_e1_n_ho_wo_e2_grid_desc = transform_tensor_descriptor(
b_e_n_ho_wo_e2_grid_desc, in_e_n_ho_wo_e2_grid_desc,
make_tuple(make_unmerge_transform(make_tuple(E0, E1)), make_tuple(make_unmerge_transform(make_tuple(E0, E1)),
make_pass_through_transform(N), make_pass_through_transform(N),
make_pass_through_transform(Hop), make_pass_through_transform(Hop),
......
...@@ -129,7 +129,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0 ...@@ -129,7 +129,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
// input tensor // input tensor
const auto in_n_c0_hip_wip_e2_global_desc = transform_tensor_descriptor( const auto in_n_c0_hip_wip_e2_global_desc = transform_tensor_descriptor(
make_naive_tensor_descriptor_packed(make_tuple(K, C0, Hi, Wi, E2)), make_naive_tensor_descriptor_packed(make_tuple(N, C0, Hi, Wi, E2)),
make_tuple(make_pass_through_transform(N), make_tuple(make_pass_through_transform(N),
make_pass_through_transform(C0), make_pass_through_transform(C0),
make_pad_transform(Hi, InLeftPadH, InRightPadH), make_pad_transform(Hi, InLeftPadH, InRightPadH),
...@@ -150,7 +150,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0 ...@@ -150,7 +150,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
make_tuple( make_tuple(
Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{}, Sequence<6>{})); Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{}, Sequence<6>{}));
const auto b_e_n_ho_wo_e2_grid_desc = transform_tensor_descriptor( const auto in_e_n_ho_wo_e2_grid_desc = transform_tensor_descriptor(
in_n_c0_y_ho_x_wo_e2_global_desc, in_n_c0_y_ho_x_wo_e2_global_desc,
make_tuple(make_merge_transform(make_tuple(C0, Y, X)), make_tuple(make_merge_transform(make_tuple(C0, Y, X)),
make_pass_through_transform(N), make_pass_through_transform(N),
...@@ -162,7 +162,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0 ...@@ -162,7 +162,7 @@ struct DriverDynamicConvolutionForwardImplicitGemmDlops_v5r1_nc0hwc1_kc0yxc1_nk0
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}, Sequence<4>{})); make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}, Sequence<4>{}));
const auto b_e0_e1_n_ho_wo_e2_grid_desc = transform_tensor_descriptor( const auto b_e0_e1_n_ho_wo_e2_grid_desc = transform_tensor_descriptor(
b_e_n_ho_wo_e2_grid_desc, in_e_n_ho_wo_e2_grid_desc,
make_tuple(make_unmerge_transform(make_tuple(E0, E1)), make_tuple(make_unmerge_transform(make_tuple(E0, E1)),
make_pass_through_transform(N), make_pass_through_transform(N),
make_pass_through_transform(Hop), make_pass_through_transform(Hop),
......
...@@ -105,14 +105,14 @@ int main(int argc, char* argv[]) ...@@ -105,14 +105,14 @@ int main(int argc, char* argv[])
constexpr auto C1 = Number<8>{}; constexpr auto C1 = Number<8>{};
constexpr auto K0 = Number<1>{}; constexpr auto K0 = Number<1>{};
constexpr auto K1 = Number<4>{}; constexpr auto K1 = Number<4>{};
#elif 0 #elif 1
constexpr auto N = Number<1>{}; constexpr auto N = Number<1>{};
constexpr auto Hi = Number<1080>{}; constexpr auto Hi = Number<1080>{};
constexpr auto Wi = Number<1920>{}; constexpr auto Wi = Number<1920>{};
constexpr auto Y = Number<3>{}; constexpr auto Y = Number<3>{};
constexpr auto X = Number<3>{}; constexpr auto X = Number<3>{};
constexpr auto C0 = Number<2>{}; constexpr auto C0 = Number<3>{};
constexpr auto C1 = Number<8>{}; constexpr auto C1 = Number<4>{};
constexpr auto K0 = Number<2>{}; constexpr auto K0 = Number<2>{};
constexpr auto K1 = Number<8>{}; constexpr auto K1 = Number<8>{};
#elif 0 #elif 0
...@@ -135,7 +135,7 @@ int main(int argc, char* argv[]) ...@@ -135,7 +135,7 @@ int main(int argc, char* argv[])
constexpr auto C1 = Number<8>{}; constexpr auto C1 = Number<8>{};
constexpr auto K0 = Number<2>{}; constexpr auto K0 = Number<2>{};
constexpr auto K1 = Number<8>{}; constexpr auto K1 = Number<8>{};
#elif 1 #elif 0
constexpr auto N = Number<128>{}; constexpr auto N = Number<128>{};
constexpr auto Hi = Number<270>{}; constexpr auto Hi = Number<270>{};
constexpr auto Wi = Number<480>{}; constexpr auto Wi = Number<480>{};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment