merge develop

b93575ca · Jing Zhang · 54df59bf · c8a8385f · b93575ca · b93575ca
Commit b93575ca authored Aug 28, 2023 by Jing Zhang
12 changed files
--- a/test/grouped_gemm/CMakeLists.txt
+++ b/test/grouped_gemm/CMakeLists.txt
+if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
 list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
 set(target 0)
 foreach(gpu IN LISTS GPU_TARGETS)
@@ -12,3 +13,4 @@ foreach(gpu IN LISTS GPU_TARGETS)
   set(target 1)
 endif()
 endforeach()
+endif()
--- a/test/grouped_gemm/test_grouped_gemm_interface.cpp
+++ b/test/grouped_gemm/test_grouped_gemm_interface.cpp
@@ -108,7 +108,7 @@ TEST_F(TestGGemmSplitKInterface_MKNKMN, KLoops)
    // kloops % 2
    Ks = std::vector<int>{256, 512, 320, 768};
-    EXPECT_FALSE(
+    EXPECT_TRUE(
        DefaultGGemmInstance{}.IsSupported(Ms, Ns, Ks, StrideAs, StrideBs, StrideCs, kbatch));
    // Not all gemms have same value for main_k0_block_loop!

--- a/test/grouped_gemm/test_grouped_gemm_util.hpp
+++ b/test/grouped_gemm/test_grouped_gemm_util.hpp
@@ -147,14 +147,14 @@ struct DeviceGroupedGemmSplitkInstanceWrapper
            32,
            4,
            2,
-            S<1, 4, 32, 1>,
+            S<1, 4, 16, 1>,
            ABlockTransferThreadClusterArrageOrder,
            ABlockTransferSrcAccessOrder,
            ABlockTransferSrcVectorDim::value,
            ABlockTransferSrcScalarPerVector,
            ABlockTransferDstScalarPerVector_K1::value,
            ABlockLdsAddExtraM::value,
-            S<1, 4, 32, 1>,
+            S<1, 4, 16, 1>,
            BBlockTransferThreadClusterArrageOrder,
            BBlockTransferSrcAccessOrder,
            BBlockTransferSrcVectorDim::value,

--- a/test/normalization/CMakeLists.txt
+++ b/test/normalization/CMakeLists.txt
-add_custom_target(test_normalization)
+if(DTYPES MATCHES "fp16" OR DTYPES MATCHES "fp32" OR NOT DEFINED DTYPES)
+  add_custom_target(test_normalization)
-add_gtest_executable(test_layernorm2d_fp32 test_layernorm2d_fp32.cpp)
+endif()
-add_gtest_executable(test_layernorm2d_fp16 test_layernorm2d_fp16.cpp)
+if(DTYPES MATCHES "fp32" OR NOT DEFINED DTYPES)
-add_gtest_executable(test_groupnorm_fp16 test_groupnorm_fp16.cpp)
+  add_gtest_executable(test_layernorm2d_fp32 test_layernorm2d_fp32.cpp)
-add_gtest_executable(test_groupnorm_fp32 test_groupnorm_fp32.cpp)
+  add_gtest_executable(test_groupnorm_fp32 test_groupnorm_fp32.cpp)
+  target_link_libraries(test_layernorm2d_fp32 PRIVATE utility device_normalization_instance)
-target_link_libraries(test_layernorm2d_fp32 PRIVATE utility device_normalization_instance)
+  target_link_libraries(test_groupnorm_fp32 PRIVATE utility device_normalization_instance)
-target_link_libraries(test_layernorm2d_fp16 PRIVATE utility device_normalization_instance)
+  add_dependencies(test_normalization test_layernorm2d_fp32)
-target_link_libraries(test_groupnorm_fp16 PRIVATE utility device_normalization_instance)
+  add_dependencies(test_normalization test_groupnorm_fp32)
-target_link_libraries(test_groupnorm_fp32 PRIVATE utility device_normalization_instance)
+endif()
+if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
-add_dependencies(test_normalization test_layernorm2d_fp32)
+  add_gtest_executable(test_layernorm2d_fp16 test_layernorm2d_fp16.cpp)
-add_dependencies(test_normalization test_layernorm2d_fp16)
+  add_gtest_executable(test_groupnorm_fp16 test_groupnorm_fp16.cpp)
-add_dependencies(test_normalization test_groupnorm_fp16)
+  target_link_libraries(test_layernorm2d_fp16 PRIVATE utility device_normalization_instance)
-add_dependencies(test_normalization test_groupnorm_fp32)
+  target_link_libraries(test_groupnorm_fp16 PRIVATE utility device_normalization_instance)
+  add_dependencies(test_normalization test_layernorm2d_fp16)
+  add_dependencies(test_normalization test_groupnorm_fp16)
+endif()
--- a/test/pool_fwd/CMakeLists.txt
+++ b/test/pool_fwd/CMakeLists.txt
 add_custom_target(test_pool_fwd)
-add_gtest_executable(test_avg_pool2d_fwd test_avg_pool2d_fwd.cpp)
 add_gtest_executable(test_avg_pool3d_fwd test_avg_pool3d_fwd.cpp)
-add_gtest_executable(test_max_pool2d_fwd test_max_pool2d_fwd.cpp)
 add_gtest_executable(test_max_pool3d_fwd test_max_pool3d_fwd.cpp)
-target_link_libraries(test_avg_pool2d_fwd PRIVATE utility device_pool_fwd_instance)
+target_link_libraries(test_avg_pool3d_fwd PRIVATE utility device_pool3d_fwd_instance)
-target_link_libraries(test_avg_pool3d_fwd PRIVATE utility device_pool_fwd_instance)
+target_link_libraries(test_max_pool3d_fwd PRIVATE utility device_pool3d_fwd_instance)
-target_link_libraries(test_max_pool2d_fwd PRIVATE utility device_pool_fwd_instance)
-target_link_libraries(test_max_pool3d_fwd PRIVATE utility device_pool_fwd_instance)
-add_dependencies(test_pool_fwd test_avg_pool2d_fwd)
 add_dependencies(test_pool_fwd test_avg_pool3d_fwd)
-add_dependencies(test_pool_fwd test_max_pool2d_fwd)
 add_dependencies(test_pool_fwd test_max_pool3d_fwd)
--- a/test/pool_fwd/test_avg_pool2d_fwd.cpp
+++ b/test/pool_fwd/test_avg_pool2d_fwd.cpp
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
-#include "gtest/gtest.h"
-#include "profiler/profile_pool2d_fwd_impl.hpp"
-#include "test_pool_fwd_common.hpp"
-template <typename Tuple>
-class TestAvgPool2dFwd : public ::testing::Test
-{
-    protected:
-    using InDataType      = std::tuple_element_t<0, Tuple>;
-    using OutDataType     = std::tuple_element_t<1, Tuple>;
-    using ComputeDataType = std::tuple_element_t<2, Tuple>;
-    using IndexDataType   = std::tuple_element_t<3, Tuple>;
-    std::vector<PoolingParam> params;
-    void Run()
-    {
-        for(auto param : params)
-        {
-            bool success =
-                ck::profiler::profile_pool2d_fwd_impl<InDataType,
-                                                      OutDataType,
-                                                      ComputeDataType,
-                                                      IndexDataType,
-                                                      ck::ReduceTensorOp::AVG,
-                                                      false,
-                                                      false>(true,
-                                                             2,
-                                                             false,
-                                                             false,
-                                                             param.length_,
-                                                             param.window_spatial_lengths_,
-                                                             param.window_strides_,
-                                                             param.input_left_pads_,
-                                                             param.input_right_pads_);
-            EXPECT_TRUE(success);
-        }
-    }
-};
-using KernelTypes =
-    ::testing::Types<std::tuple<F16, F16, F32, I32>, std::tuple<F32, F32, F32, I32>>;
-TYPED_TEST_SUITE(TestAvgPool2dFwd, KernelTypes);
-TYPED_TEST(TestAvgPool2dFwd, Test_Pool)
-{
-    // length, window_length, window_stride, left_pad, right_pad
-    this->params = {{{1, 1, 1, 1}, {1, 1}, {1, 1}, {0, 0}, {0, 0}},
-                    {{2, 16, 64, 64}, {64, 64}, {1, 1}, {0, 0}, {0, 0}},
-                    {{2, 32, 30, 30}, {2, 2}, {2, 2}, {1, 1}, {1, 1}}};
-    this->Run();
-}
--- a/test/pool_fwd/test_avg_pool3d_fwd.cpp
+++ b/test/pool_fwd/test_avg_pool3d_fwd.cpp
@@ -25,6 +25,8 @@ class TestAvgPool3dFwd : public ::testing::Test
                                                      OutDataType,
                                                      ComputeDataType,
                                                      IndexDataType,
+                                                      ck::tensor_layout::convolution::NDHWC,
+                                                      ck::tensor_layout::convolution::NDHWC,
                                                      ck::ReduceTensorOp::AVG,
                                                      false,
                                                      false>(true,
@@ -34,23 +36,27 @@ class TestAvgPool3dFwd : public ::testing::Test
                                                             param.length_,
                                                             param.window_spatial_lengths_,
                                                             param.window_strides_,
+                                                             param.window_dilations_,
                                                             param.input_left_pads_,
                                                             param.input_right_pads_);
            EXPECT_TRUE(success);
        }
    }
 };
+#ifdef CK_ENABLE_FP16
 using KernelTypes =
    ::testing::Types<std::tuple<F16, F16, F32, I32>, std::tuple<F32, F32, F32, I32>>;
+#else
+using KernelTypes = ::testing::Types<std::tuple<F32, F32, F32, I32>>;
+#endif
 TYPED_TEST_SUITE(TestAvgPool3dFwd, KernelTypes);
 TYPED_TEST(TestAvgPool3dFwd, Test_Pool)
 {
-    // length, window_length, window_stride, left_pad, right_pad
+    // length, window_length, window_stride, window_dilation, left_pad, right_pad
-    this->params = {{{1, 1, 1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}},
+    this->params = {{{1, 1, 1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}},
-                    {{2, 16, 64, 64, 64}, {64, 64, 64}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}},
+                    {{2, 16, 64, 64, 64}, {64, 64, 64}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}},
-                    {{2, 32, 30, 30, 30}, {2, 2, 2}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}}};
+                    {{2, 16, 64, 64, 64}, {4, 4, 4}, {4, 4, 4}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}},
+                    {{2, 32, 30, 30, 30}, {2, 2, 2}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}};
    this->Run();
 }
--- a/test/pool_fwd/test_max_pool2d_fwd.cpp
+++ b/test/pool_fwd/test_max_pool2d_fwd.cpp
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
-#include "gtest/gtest.h"
-#include "profiler/profile_pool2d_fwd_impl.hpp"
-#include "test_pool_fwd_common.hpp"
-template <typename Tuple>
-class TestMaxPool2dFwd : public ::testing::Test
-{
-    protected:
-    using InDataType      = std::tuple_element_t<0, Tuple>;
-    using OutDataType     = std::tuple_element_t<1, Tuple>;
-    using ComputeDataType = std::tuple_element_t<2, Tuple>;
-    using IndexDataType   = std::tuple_element_t<3, Tuple>;
-    std::vector<PoolingParam> params;
-    void Run()
-    {
-        for(auto param : params)
-        {
-            // max pool
-            bool success =
-                ck::profiler::profile_pool2d_fwd_impl<InDataType,
-                                                      OutDataType,
-                                                      ComputeDataType,
-                                                      IndexDataType,
-                                                      ck::ReduceTensorOp::MAX,
-                                                      false,
-                                                      false>(true,
-                                                             2,
-                                                             false,
-                                                             false,
-                                                             param.length_,
-                                                             param.window_spatial_lengths_,
-                                                             param.window_strides_,
-                                                             param.input_left_pads_,
-                                                             param.input_right_pads_);
-            EXPECT_TRUE(success);
-            // max pool + index
-            success = ck::profiler::profile_pool2d_fwd_impl<InDataType,
-                                                            OutDataType,
-                                                            ComputeDataType,
-                                                            IndexDataType,
-                                                            ck::ReduceTensorOp::MAX,
-                                                            false,
-                                                            true>(true,
-                                                                  2,
-                                                                  false,
-                                                                  false,
-                                                                  param.length_,
-                                                                  param.window_spatial_lengths_,
-                                                                  param.window_strides_,
-                                                                  param.input_left_pads_,
-                                                                  param.input_right_pads_);
-            EXPECT_TRUE(success);
-        }
-    }
-};
-using KernelTypes =
-    ::testing::Types<std::tuple<F16, F16, F16, I32>, std::tuple<F32, F32, F32, I32>>;
-TYPED_TEST_SUITE(TestMaxPool2dFwd, KernelTypes);
-TYPED_TEST(TestMaxPool2dFwd, Test_Pool)
-{
-    // length, window_length, window_stride, left_pad, right_pad
-    this->params = {{{1, 1, 1, 1}, {1, 1}, {1, 1}, {0, 0}, {0, 0}},
-                    {{2, 16, 64, 64}, {64, 64}, {1, 1}, {0, 0}, {0, 0}},
-                    {{2, 32, 30, 30}, {2, 2}, {2, 2}, {1, 1}, {1, 1}}};
-    this->Run();
-}
--- a/test/pool_fwd/test_max_pool3d_fwd.cpp
+++ b/test/pool_fwd/test_max_pool3d_fwd.cpp
@@ -26,6 +26,8 @@ class TestMaxPool3dFwd : public ::testing::Test
                                                      OutDataType,
                                                      ComputeDataType,
                                                      IndexDataType,
+                                                      ck::tensor_layout::convolution::NDHWC,
+                                                      ck::tensor_layout::convolution::NDHWC,
                                                      ck::ReduceTensorOp::MAX,
                                                      false,
                                                      false>(true,
@@ -35,6 +37,7 @@ class TestMaxPool3dFwd : public ::testing::Test
                                                             param.length_,
                                                             param.window_spatial_lengths_,
                                                             param.window_strides_,
+                                                             param.window_dilations_,
                                                             param.input_left_pads_,
                                                             param.input_right_pads_);
            EXPECT_TRUE(success);
@@ -44,6 +47,8 @@ class TestMaxPool3dFwd : public ::testing::Test
                                                            OutDataType,
                                                            ComputeDataType,
                                                            IndexDataType,
+                                                            ck::tensor_layout::convolution::NDHWC,
+                                                            ck::tensor_layout::convolution::NDHWC,
                                                            ck::ReduceTensorOp::MAX,
                                                            false,
                                                            true>(true,
@@ -53,6 +58,7 @@ class TestMaxPool3dFwd : public ::testing::Test
                                                                  param.length_,
                                                                  param.window_spatial_lengths_,
                                                                  param.window_strides_,
+                                                                  param.window_dilations_,
                                                                  param.input_left_pads_,
                                                                  param.input_right_pads_);
            EXPECT_TRUE(success);
@@ -60,16 +66,21 @@ class TestMaxPool3dFwd : public ::testing::Test
    }
 };
+#ifdef CK_ENABLE_FP16
 using KernelTypes =
-    ::testing::Types<std::tuple<F16, F16, F16, I32>, std::tuple<F32, F32, F32, I32>>;
+    ::testing::Types<std::tuple<F16, F16, F32, I32>, std::tuple<F32, F32, F32, I32>>;
+#else
+using KernelTypes = ::testing::Types<std::tuple<F32, F32, F32, I32>>;
+#endif
 TYPED_TEST_SUITE(TestMaxPool3dFwd, KernelTypes);
 TYPED_TEST(TestMaxPool3dFwd, Test_Pool)
 {
-    // length, window_length, window_stride, left_pad, right_pad
+    // length, window_length, window_stride, window_dilation, left_pad, right_pad
-    this->params = {{{1, 1, 1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}},
+    this->params = {{{1, 1, 1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}},
-                    {{2, 16, 64, 64, 64}, {64, 64, 64}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}},
+                    {{2, 16, 64, 64, 64}, {64, 64, 64}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}},
-                    {{2, 32, 30, 30, 30}, {2, 2, 2}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}}};
+                    {{2, 16, 64, 64, 64}, {4, 4, 4}, {4, 4, 4}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}},
+                    {{2, 32, 30, 30, 30}, {2, 2, 2}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}};
    this->Run();
 }
--- a/test/pool_fwd/test_pool_fwd_common.hpp
+++ b/test/pool_fwd/test_pool_fwd_common.hpp
@@ -14,11 +14,13 @@ struct PoolingParam
    PoolingParam(const std::vector<index_t>& length,
                 const std::vector<index_t>& window_spatial_lengths,
                 const std::vector<index_t>& window_strides,
+                 const std::vector<index_t>& window_dilations,
                 const std::vector<index_t>& input_left_pads,
                 const std::vector<index_t>& input_right_pads)
        : length_(length),
          window_spatial_lengths_(window_spatial_lengths),
          window_strides_(window_strides),
+          window_dilations_(window_dilations),
          input_left_pads_(input_left_pads),
          input_right_pads_(input_right_pads)
    {
@@ -26,6 +28,7 @@ struct PoolingParam
    std::vector<index_t> length_;
    std::vector<index_t> window_spatial_lengths_;
    std::vector<index_t> window_strides_;
+    std::vector<index_t> window_dilations_;
    std::vector<index_t> input_left_pads_;
    std::vector<index_t> input_right_pads_;
 };
--- a/test/softmax/test_softmax_rank3.cpp
+++ b/test/softmax/test_softmax_rank3.cpp
@@ -10,8 +10,9 @@
 template <ck::index_t N>
 using I = ck::Number<N>;
+#ifdef CK_ENABLE_FP16
 using F16 = ck::half_t;
+#endif
 using F32 = float;
 template <typename Tuple>
@@ -22,7 +23,9 @@ class TestSoftmax : public ck::TestSoftmax<Tuple>
 // clang-format off
 using KernelTypes = ::testing::Types<
    //         InDataType, AccDataType, OutDataType, Rank
+#ifdef CK_ENABLE_FP16
    std::tuple<       F16,         F32,         F16,    I<3>>,
+#endif
    std::tuple<       F32,         F32,         F32,    I<3>>
    >;
 // clang-format on

--- a/test/softmax/test_softmax_rank4.cpp
+++ b/test/softmax/test_softmax_rank4.cpp
@@ -10,8 +10,9 @@
 template <ck::index_t N>
 using I = ck::Number<N>;
+#ifdef CK_ENABLE_FP16
 using F16 = ck::half_t;
+#endif
 using F32 = float;
 template <typename Tuple>
@@ -22,7 +23,9 @@ class TestSoftmax : public ck::TestSoftmax<Tuple>
 // clang-format off
 using KernelTypes = ::testing::Types<
    //         InDataType, AccDataType, OutDataType, Rank
+#ifdef CK_ENABLE_FP16
    std::tuple<       F16,         F32,         F16,    I<4>>,
+#endif
    std::tuple<       F32,         F32,         F32,    I<4>>
    >;
 // clang-format on