for SWDEV-278306

b8adad54 · Chao Liu · 8bf95038 · b8adad54 · b8adad54 · b8adad54
Commit b8adad54 authored Mar 22, 2021 by Chao Liu
3 changed files
--- a/composable_kernel/include/utility/config.amd.hpp.in
+++ b/composable_kernel/include/utility/config.amd.hpp.in
@@ -11,9 +11,9 @@
 #define CK_DEVICE_BACKEND_AMD 1
 // GPU ID
-#define CK_AMD_GPU_GFX906 0
+#define CK_AMD_GPU_GFX906 1
 #define CK_AMD_GPU_GFX908 0
-#define CK_AMD_GPU_GFX1030 1
+#define CK_AMD_GPU_GFX1030 0
 // HIP version
 #ifndef CK_HIP_VERSION_FLAT
@@ -105,9 +105,9 @@
 #endif
 // pass tensor descriptor by value, pointer or void*
-#define CK_EXPERIMENTAL_PASS_TENSOR_DESCRIPTOR_BY_VALUE 1
+#define CK_EXPERIMENTAL_PASS_TENSOR_DESCRIPTOR_BY_VALUE 0
 #define CK_EXPERIMENTAL_PASS_TENSOR_DESCRIPTOR_BY_POINTER 0
-#define CK_EXPERIMENTAL_PASS_TENSOR_DESCRIPTOR_BY_VOID_POINTER 0
+#define CK_EXPERIMENTAL_PASS_TENSOR_DESCRIPTOR_BY_VOID_POINTER 1
 // hack: have underlying assumption that need to be satsified, otherwise it's a bug
 // hack for forcing register to keep idx_diff_low_const in SGPR. idx_diff_low_const must be

--- a/driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
+++ b/driver/include/device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.hpp
@@ -40,7 +40,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(
    wei_k_c_y_x_device_buf.ToDevice(wei_k_c_y_x.mData.data());
    out_n_k_ho_wo_device_buf.ToDevice(out_n_k_ho_wo.mData.data());
-#if 0
+#if 1
    // run-time variables
    const auto in_n_c_hi_wi_desc =
        make_dynamic_naive_tensor_descriptor_packed_v2(to_multi_index(InDesc::GetLengths()));
@@ -167,7 +167,7 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw(
    constexpr index_t GemmBBlockTransferDstScalarPerVector_GemmN = 1;
    constexpr index_t GemmCThreadTransferDstScalarPerVector_GemmN1 = 4;
-#elif 1
+#elif 0
    // cdata = 64, BlockSize 64, 16x256x4
    constexpr index_t BlockSize = 64;

--- a/driver/src/conv_driver.cpp
+++ b/driver/src/conv_driver.cpp
@@ -48,8 +48,8 @@ int main(int argc, char* argv[])
    using ConvStrides   = Sequence<1, 1>;
    using ConvDilations = Sequence<1, 1>;
-    using LeftPads                   = Sequence<0, 0>;
+    using LeftPads  = Sequence<0, 0>;
-    using RightPads                  = Sequence<0, 0>;
+    using RightPads = Sequence<0, 0>;
 #elif 0
    constexpr index_t N  = 1;
    constexpr index_t C  = 16;
@@ -78,7 +78,7 @@ int main(int argc, char* argv[])
    using LeftPads  = Sequence<1, 1>;
    using RightPads = Sequence<1, 1>;
-#elif 1
+#elif 0
    constexpr index_t N  = 1;
    constexpr index_t C  = 1;
    constexpr index_t HI = 1024;
@@ -150,7 +150,7 @@ int main(int argc, char* argv[])
    using LeftPads  = Sequence<0, 0>;
    using RightPads = Sequence<0, 0>;
-#elif 0
+#elif 1
    // 3x3, 71x71
    constexpr index_t N  = 128;
    constexpr index_t C  = 192;
@@ -630,7 +630,7 @@ int main(int argc, char* argv[])
    print_array("ConvStrides", to_multi_index(ConvStrides{}));
    print_array("ConvDilations", to_multi_index(ConvDilations{}));
-#if 0
+#if 1
    using in_data_t                  = float;
    constexpr index_t in_vector_size = 1;
    using acc_data_t                 = float;
@@ -719,7 +719,7 @@ int main(int argc, char* argv[])
                                                                 LeftPads{},
                                                                 RightPads{},
                                                                 nrepeat);
-#elif 0
+#elif 1
    device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw<in_data_t,
                                                                         in_vector_size,
                                                                         acc_data_t,