enable fwd conv on navi4x

bae8112d · Jing Zhang · 255fbc56 · bae8112d · bae8112d · bae8112d
Commit bae8112d authored Mar 09, 2024 by Jing Zhang
6 changed files
--- a/example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt
+++ b/example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt
 list(APPEND gpu_list1 gfx908 gfx90a gfx940 gfx941 gfx942 gfx950)
-list(APPEND gpu_list2 gfx1100 gfx1101 gfx1102)
+list(APPEND gpu_list2 gfx1100 gfx1101 gfx1102 gfx1200)
 set(target 0)
 foreach(gpu IN LISTS GPU_TARGETS)

--- a/example/30_grouped_conv_fwd_multiple_d/common.hpp
+++ b/example/30_grouped_conv_fwd_multiple_d/common.hpp
@@ -90,10 +90,10 @@ struct ExecutionConfig final
    bool time_kernel     = true;
 };
-#define DefaultConvParam                                                       \
+#define DefaultConvParam                                                     \
-    ck::utils::conv::ConvParam                                                 \
+    ck::utils::conv::ConvParam                                               \
-    {                                                                          \
+    {                                                                        \
-        2, 32, 2, 256, 192, {3, 3}, {71, 71}, {2, 2}, {1, 1}, {1, 1}, { 1, 1 } \
+        2, 32, 2, 32, 32, {3, 3}, {14, 14}, {2, 2}, {1, 1}, {1, 1}, { 1, 1 } \
    }
 inline void print_help_msg()

--- a/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp
+++ b/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp
@@ -90,10 +90,10 @@ struct ExecutionConfig final
    bool time_kernel     = true;
 };
-#define DefaultConvParam                                                       \
+#define DefaultConvParam                                                     \
-    ck::utils::conv::ConvParam                                                 \
+    ck::utils::conv::ConvParam                                               \
-    {                                                                          \
+    {                                                                        \
-        2, 32, 2, 256, 192, {3, 3}, {71, 71}, {2, 2}, {1, 1}, {1, 1}, { 1, 1 } \
+        2, 32, 2, 32, 32, {3, 3}, {14, 14}, {2, 2}, {1, 1}, {1, 1}, { 1, 1 } \
    }
 inline void print_help_msg()

--- a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp
@@ -581,7 +581,7 @@ struct DeviceGroupedConvFwdMultipleD_Wmma_CShuffle
        namespace ctc = tensor_layout::convolution;
        // check device
-        if(ck::is_navi3_supported())
+        if(ck::is_navi3_supported() || ck::is_navi4_supported())
        {
            if constexpr(!(is_same_v<AccDataType, float> || is_same_v<AccDataType, int32_t>))
            {

--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp
@@ -340,7 +340,8 @@ struct GridwiseGemmMultipleD_Wmma
    static constexpr auto MWaves = MPerBlock / (MRepeat * MPerWmma);
    static constexpr auto NWaves = NPerBlock / (NRepeat * NPerWmma);
-    static constexpr auto WmmaK  = K1 == 16 ? 32 : 16;
+    // static constexpr auto WmmaK  = K1 == 16 ? 32 : 16;
+    static constexpr auto WmmaK = 16;
    using ThisThreadBlock = ThisThreadBlock<BlockSize>;

--- a/library/include/ck/library/utility/check_err.hpp
+++ b/library/include/ck/library/utility/check_err.hpp
@@ -156,7 +156,7 @@ check_err(const Range& out,
        {
            max_err = err > max_err ? err : max_err;
            err_count++;
-            // if(err_count < 5)
+            if(err_count < 5)
            {
                std::cerr << msg << std::setw(12) << std::setprecision(7) << " out[" << i
                          << "] != ref[" << i << "]: " << o << " != " << r << std::endl;