Merge branch 'develop' into gemm_v2r3_kpad_fix

a8629a98 · zjing14 · GitHub · 8dc713ea · 94bfa502 · a8629a98
Unverified Commit a8629a98 authored Sep 26, 2023 by zjing14 Committed by GitHub Sep 26, 2023
14 changed files
--- a/test/elementwise_normalization/CMakeLists.txt
+++ b/test/elementwise_normalization/CMakeLists.txt
-if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
-  add_custom_target(test_elementwise_normalization)
-  add_gtest_executable(test_elementwise_layernorm_fp16 test_elementwise_layernorm_fp16.cpp)
+add_custom_target(test_elementwise_normalization)
+add_gtest_executable(test_elementwise_layernorm_fp16 test_elementwise_layernorm_fp16.cpp)
+if(result EQUAL 0)
  target_link_libraries(test_elementwise_layernorm_fp16 PRIVATE utility device_elementwise_normalization_instance)
  add_dependencies(test_elementwise_normalization test_elementwise_layernorm_fp16)
 endif()
\ No newline at end of file
--- a/test/gemm/CMakeLists.txt
+++ b/test/gemm/CMakeLists.txt
-if(DTYPES MATCHES "fp32" OR NOT DEFINED DTYPES)
 add_test_executable(test_gemm_fp32 gemm_fp32.cpp)
-target_link_libraries(test_gemm_fp32 PRIVATE utility)
-target_link_libraries(test_gemm_fp32 PRIVATE device_gemm_instance)
+if(result EQUAL 0)
+    target_link_libraries(test_gemm_fp32 PRIVATE utility device_gemm_instance)
 endif()
-if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
 add_test_executable(test_gemm_fp16 gemm_fp16.cpp)
-target_link_libraries(test_gemm_fp16 PRIVATE utility)
-target_link_libraries(test_gemm_fp16 PRIVATE device_gemm_instance)
-add_library(gemm_standalone_xdl_fp16_instances STATIC
+if(result EQUAL 0)
+    target_link_libraries(test_gemm_fp16 PRIVATE utility device_gemm_instance)
+    add_library(gemm_standalone_xdl_fp16_instances STATIC
    instance/gemm_f16_nn_instance.cpp
    instance/gemm_f16_nt_instance.cpp
    instance/gemm_f16_tn_instance.cpp
    instance/gemm_wavelet_f16_tn_instance.cpp
    instance/gemm_f16_tt_instance.cpp
-)
+    )
+endif()
 add_test_executable(test_gemm_standalone_xdl_fp16 gemm_standalone_xdl_fp16.cpp)
-target_link_libraries(test_gemm_standalone_xdl_fp16 PRIVATE gemm_standalone_xdl_fp16_instances utility)
-target_include_directories(test_gemm_standalone_xdl_fp16 PRIVATE instance/)
+if(result EQUAL 0)
+    target_link_libraries(test_gemm_standalone_xdl_fp16 PRIVATE gemm_standalone_xdl_fp16_instances utility)
+    target_include_directories(test_gemm_standalone_xdl_fp16 PRIVATE instance/)
 endif()
-if(DTYPES MATCHES "bf16" OR NOT DEFINED DTYPES)
 add_test_executable(test_gemm_bf16 gemm_bf16.cpp)
-target_link_libraries(test_gemm_bf16 PRIVATE utility)
-target_link_libraries(test_gemm_bf16 PRIVATE device_gemm_instance)
+if(result EQUAL 0)
+    target_link_libraries(test_gemm_bf16 PRIVATE utility device_gemm_instance)
 endif()
-if(DTYPES MATCHES "int8" OR NOT DEFINED DTYPES)
 add_test_executable(test_gemm_int8 gemm_int8.cpp)
-target_link_libraries(test_gemm_int8 PRIVATE utility)
-target_link_libraries(test_gemm_int8 PRIVATE device_gemm_instance)
+if(result EQUAL 0)
+    target_link_libraries(test_gemm_int8 PRIVATE utility device_gemm_instance)
 endif()
\ No newline at end of file
--- a/test/gemm_layernorm/CMakeLists.txt
+++ b/test/gemm_layernorm/CMakeLists.txt
@@ -2,12 +2,12 @@ list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
 set(target 0)
 foreach(gpu IN LISTS GPU_TARGETS)
 if(gpu IN_LIST gpu_list AND target EQUAL 0)
-  if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
   add_custom_target(test_gemm_layernorm)
   add_gtest_executable(test_gemm_add_relu_add_layernorm_fp16 test_gemm_add_relu_add_layernorm_fp16.cpp)
-   target_link_libraries(test_gemm_add_relu_add_layernorm_fp16 PRIVATE utility device_gemm_add_relu_add_layernorm_instance)
-   add_dependencies(test_gemm_layernorm test_gemm_add_relu_add_layernorm_fp16)
-   set(target 1)
-  endif()
+   if(result EQUAL 0)
+     target_link_libraries(test_gemm_add_relu_add_layernorm_fp16 PRIVATE utility device_gemm_add_relu_add_layernorm_instance)
+     add_dependencies(test_gemm_layernorm test_gemm_add_relu_add_layernorm_fp16)
+     set(target 1)
+   endif()
 endif()
 endforeach()
--- a/test/gemm_reduce/CMakeLists.txt
+++ b/test/gemm_reduce/CMakeLists.txt
-if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
-  add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp)
-  target_link_libraries(test_gemm_reduce_fp16 PRIVATE utility)
-  target_link_libraries(test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance)
+add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp)
+if(result EQUAL 0)
+  target_link_libraries(test_gemm_reduce_fp16 PRIVATE utility device_gemm_reduce_instance)
 endif()
\ No newline at end of file
--- a/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp
+++ b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp
@@ -14,6 +14,8 @@

 #include "profiler/profile_grouped_conv_bwd_weight_impl.hpp"

+using namespace ck::tensor_layout::convolution;
+
 template <typename Tuple>
 class TestGroupedConvndBwdWeight : public ::testing::Test
 {
@@ -27,28 +29,59 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
    using NDimSpatial = std::tuple_element_t<6, Tuple>;

    std::vector<ck::utils::conv::ConvParam> conv_params;
-    ck::index_t split_k{2};
+    std::vector<ck::index_t> split_ks{1, 2};
+
+    bool skip_case(const ck::utils::conv::ConvParam& params, const ck::index_t split_k)
+    {
+        // Odd K or C values are supported only by DL kernel (only applies to fp16)
+        // DL kernel currently supports only `split_k=1`
+        if constexpr(std::is_same_v<InDataType, ck::half_t>)
+        {
+            if(split_k != 1 && (params.K_ % 2 != 0 || params.C_ % 2 != 0))
+            {
+                return true;
+            }
+        }
+
+        // 1d NWGC is only supported by DL kernel
+        // DL kernel is only supported for split_k=1
+        if constexpr(std::is_same_v<InLayout, NWGC> && std::is_same_v<OutLayout, NWGK>)
+        {
+            if(split_k != 1)
+            {
+                return true;
+            }
+        }
+
+        return false;
+    }

    void Run()
    {
        EXPECT_FALSE(conv_params.empty());
        bool pass = true;

-        for(auto& param : conv_params)
+        for(auto split_k : split_ks)
        {
-            pass = pass && ck::profiler::profile_grouped_conv_bwd_weight_impl<NDimSpatial{},
-                                                                              InLayout,
-                                                                              WeiLayout,
-                                                                              OutLayout,
-                                                                              InDataType,
-                                                                              WeiDataType,
-                                                                              OutDataType>(
-                               true,  // do_verification
-                               1,     // init_method: integer value
-                               false, // do_log
-                               false, // time_kernel
-                               param,
-                               split_k);
+            for(auto& param : conv_params)
+            {
+                if(!skip_case(param, split_k))
+                {
+                    pass = pass && ck::profiler::profile_grouped_conv_bwd_weight_impl<NDimSpatial{},
+                                                                                      InLayout,
+                                                                                      WeiLayout,
+                                                                                      OutLayout,
+                                                                                      InDataType,
+                                                                                      WeiDataType,
+                                                                                      OutDataType>(
+                                       true,  // do_verification
+                                       1,     // init_method: integer value
+                                       false, // do_log
+                                       false, // time_kernel
+                                       param,
+                                       split_k);
+                }
+            }
        }
        EXPECT_TRUE(pass);
    }
@@ -69,12 +102,13 @@ class TestGroupedConvndBwdWeight3d : public TestGroupedConvndBwdWeight<Tuple>
 {
 };

-using namespace ck::tensor_layout::convolution;
-
 using KernelTypes1d = ::testing::Types<
    std::tuple<float, float, float, GNWC, GKXC, GNWK, ck::Number<1>>,
    std::tuple<ck::half_t, ck::half_t, ck::half_t, GNWC, GKXC, GNWK, ck::Number<1>>,
-    std::tuple<ck::bhalf_t, float, ck::bhalf_t, GNWC, GKXC, GNWK, ck::Number<1>>>;
+    std::tuple<ck::bhalf_t, float, ck::bhalf_t, GNWC, GKXC, GNWK, ck::Number<1>>,
+    std::tuple<float, float, float, NWGC, GKXC, NWGK, ck::Number<1>>,
+    std::tuple<ck::half_t, ck::half_t, ck::half_t, NWGC, GKXC, NWGK, ck::Number<1>>,
+    std::tuple<ck::bhalf_t, float, ck::bhalf_t, NWGC, GKXC, NWGK, ck::Number<1>>>;
 using KernelTypes2d = ::testing::Types<
    std::tuple<float, float, float, GNHWC, GKYXC, GNHWK, ck::Number<2>>,
    std::tuple<ck::half_t, ck::half_t, ck::half_t, GNHWC, GKYXC, GNHWK, ck::Number<2>>,

--- a/test/grouped_convnd_fwd/CMakeLists.txt
+++ b/test/grouped_convnd_fwd/CMakeLists.txt
-add_gtest_executable(test_grouped_convnd_fwd grouped_convnd_fwd.cpp)
+add_gtest_executable(test_grouped_convnd_fwd test_grouped_convnd_fwd.cpp)
 target_link_libraries(test_grouped_convnd_fwd PRIVATE utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance)

--- a/test/grouped_convnd_fwd/grouped_convnd_fwd.cpp
+++ b/test/grouped_convnd_fwd/grouped_convnd_fwd.cpp
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
-
-#include <cstdlib>
-#include <iostream>
-#include <initializer_list>
-#include <vector>
-#include <gtest/gtest.h>
-
-#include "profiler/profile_grouped_conv_fwd_impl.hpp"
-
-class TestGroupedConvNdFwd : public ::testing::Test
-{
-    protected:
-    std::vector<ck::utils::conv::ConvParam> conv_params;
-};
-
-// 1d GNWC/GKXC/GNWK
-TEST_F(TestGroupedConvNdFwd, GroupedConv1dFwdGNWC)
-{
-    conv_params.clear();
-    conv_params.push_back({1, 2, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
-    conv_params.push_back({1, 2, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
-    conv_params.push_back({1, 2, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
-    conv_params.push_back({1, 1, 1, 1, 32, {3}, {32}, {1}, {1}, {1}, {1}});
-    conv_params.push_back({1, 1, 1, 64, 3, {3}, {32}, {1}, {1}, {1}, {1}});
-
-    for(auto& param : conv_params)
-    {
-        bool pass;
-
-        // fp32
-        pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
-                                                           ck::tensor_layout::convolution::GNWC,
-                                                           ck::tensor_layout::convolution::GKXC,
-                                                           ck::tensor_layout::convolution::GNWK,
-                                                           float,
-                                                           float,
-                                                           float>(true,  // do_verification
-                                                                  1,     // init_method
-                                                                  false, // do_log
-                                                                  false, // time_kernel
-                                                                  param);
-
-        EXPECT_TRUE(pass);
-
-        // fp16
-        pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
-                                                           ck::tensor_layout::convolution::GNWC,
-                                                           ck::tensor_layout::convolution::GKXC,
-                                                           ck::tensor_layout::convolution::GNWK,
-                                                           ck::half_t,
-                                                           ck::half_t,
-                                                           ck::half_t>(true,  // do_verification
-                                                                       1,     // init_method
-                                                                       false, // do_log
-                                                                       false, // time_kernel
-                                                                       param);
-
-        EXPECT_TRUE(pass);
-
-        // bf16
-        pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
-                                                           ck::tensor_layout::convolution::GNWC,
-                                                           ck::tensor_layout::convolution::GKXC,
-                                                           ck::tensor_layout::convolution::GNWK,
-                                                           ck::bhalf_t,
-                                                           ck::bhalf_t,
-                                                           ck::bhalf_t>(true,  // do_verification
-                                                                        1,     // init_method
-                                                                        false, // do_log
-                                                                        false, // time_kernel
-                                                                        param);
-
-        EXPECT_TRUE(pass);
-
-        // int8
-        pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
-                                                           ck::tensor_layout::convolution::GNWC,
-                                                           ck::tensor_layout::convolution::GKXC,
-                                                           ck::tensor_layout::convolution::GNWK,
-                                                           int8_t,
-                                                           int8_t,
-                                                           int8_t>(true,  // do_verification
-                                                                   1,     // init_method
-                                                                   false, // do_log
-                                                                   false, // time_kernel
-                                                                   param);
-
-        EXPECT_TRUE(pass);
-    }
-}
-
-// 2d GNHWC/GKYXC/GNHWK
-TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdGNHWC)
-{
-    conv_params.clear();
-    conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
-    conv_params.push_back({2, 2, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
-    conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
-    conv_params.push_back({2, 1, 1, 1, 32, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
-    conv_params.push_back({2, 1, 1, 64, 3, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
-    conv_params.push_back({2, 1, 1, 1, 1, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
-
-    for(auto& param : conv_params)
-    {
-        bool pass;
-
-        // fp32
-        pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
-                                                           ck::tensor_layout::convolution::GNHWC,
-                                                           ck::tensor_layout::convolution::GKYXC,
-                                                           ck::tensor_layout::convolution::GNHWK,
-                                                           float,
-                                                           float,
-                                                           float>(true,  // do_verification
-                                                                  1,     // init_method
-                                                                  false, // do_log
-                                                                  false, // time_kernel
-                                                                  param);
-
-        EXPECT_TRUE(pass);
-
-        // fp16
-        pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
-                                                           ck::tensor_layout::convolution::GNHWC,
-                                                           ck::tensor_layout::convolution::GKYXC,
-                                                           ck::tensor_layout::convolution::GNHWK,
-                                                           ck::half_t,
-                                                           ck::half_t,
-                                                           ck::half_t>(true,  // do_verification
-                                                                       1,     // init_method
-                                                                       false, // do_log
-                                                                       false, // time_kernel
-                                                                       param);
-
-        EXPECT_TRUE(pass);
-
-        // bf16
-        pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
-                                                           ck::tensor_layout::convolution::GNHWC,
-                                                           ck::tensor_layout::convolution::GKYXC,
-                                                           ck::tensor_layout::convolution::GNHWK,
-                                                           ck::bhalf_t,
-                                                           ck::bhalf_t,
-                                                           ck::bhalf_t>(true,  // do_verification
-                                                                        1,     // init_method
-                                                                        false, // do_log
-                                                                        false, // time_kernel
-                                                                        param);
-
-        EXPECT_TRUE(pass);
-
-        // int8
-        pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
-                                                           ck::tensor_layout::convolution::GNHWC,
-                                                           ck::tensor_layout::convolution::GKYXC,
-                                                           ck::tensor_layout::convolution::GNHWK,
-                                                           int8_t,
-                                                           int8_t,
-                                                           int8_t>(true,  // do_verification
-                                                                   1,     // init_method
-                                                                   false, // do_log
-                                                                   false, // time_kernel
-                                                                   param);
-
-        EXPECT_TRUE(pass);
-    }
-}
-
-// 3d GNDHWC/GKZYXC/GNDHWK
-TEST_F(TestGroupedConvNdFwd, GroupedConv3dFwdGNDHWC)
-{
-    conv_params.clear();
-    conv_params.push_back(
-        {3, 2, 128, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
-    conv_params.push_back(
-        {3, 2, 128, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
-    conv_params.push_back(
-        {3, 2, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
-    conv_params.push_back(
-        {3, 1, 1, 1, 32, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
-    this->conv_params.push_back(
-        {3, 1, 1, 64, 3, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
-    conv_params.push_back(
-        {3, 1, 1, 1, 1, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
-
-    for(auto& param : conv_params)
-    {
-        bool pass;
-
-        // fp32
-        pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
-                                                           ck::tensor_layout::convolution::GNDHWC,
-                                                           ck::tensor_layout::convolution::GKZYXC,
-                                                           ck::tensor_layout::convolution::GNDHWK,
-                                                           float,
-                                                           float,
-                                                           float>(true,  // do_verification
-                                                                  1,     // init_method
-                                                                  false, // do_log
-                                                                  false, // time_kernel
-                                                                  param);
-
-        EXPECT_TRUE(pass);
-
-        // fp16
-        pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
-                                                           ck::tensor_layout::convolution::GNDHWC,
-                                                           ck::tensor_layout::convolution::GKZYXC,
-                                                           ck::tensor_layout::convolution::GNDHWK,
-                                                           ck::half_t,
-                                                           ck::half_t,
-                                                           ck::half_t>(true,  // do_verification
-                                                                       1,     // init_method
-                                                                       false, // do_log
-                                                                       false, // time_kernel
-                                                                       param);
-
-        EXPECT_TRUE(pass);
-
-        // bf16
-        pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
-                                                           ck::tensor_layout::convolution::GNDHWC,
-                                                           ck::tensor_layout::convolution::GKZYXC,
-                                                           ck::tensor_layout::convolution::GNDHWK,
-                                                           ck::bhalf_t,
-                                                           ck::bhalf_t,
-                                                           ck::bhalf_t>(true,  // do_verification
-                                                                        1,     // init_method
-                                                                        false, // do_log
-                                                                        false, // time_kernel
-                                                                        param);
-
-        EXPECT_TRUE(pass);
-
-        // int8
-        pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
-                                                           ck::tensor_layout::convolution::GNDHWC,
-                                                           ck::tensor_layout::convolution::GKZYXC,
-                                                           ck::tensor_layout::convolution::GNDHWK,
-                                                           int8_t,
-                                                           int8_t,
-                                                           int8_t>(true,  // do_verification
-                                                                   1,     // init_method
-                                                                   false, // do_log
-                                                                   false, // time_kernel
-                                                                   param);
-
-        EXPECT_TRUE(pass);
-    }
-}
-
-// 2d NHWGC/KYXGC/NHWGK
-TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdNHWGC)
-{
-    conv_params.clear();
-    conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
-    conv_params.push_back({2, 2, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
-    conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
-    conv_params.push_back({2, 1, 1, 1, 32, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
-    conv_params.push_back({2, 1, 1, 64, 3, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
-    conv_params.push_back({2, 1, 1, 1, 1, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
-
-    for(auto& param : conv_params)
-    {
-        bool pass;
-
-        // fp16
-        pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
-                                                           ck::tensor_layout::convolution::NHWGC,
-                                                           ck::tensor_layout::convolution::GKYXC,
-                                                           ck::tensor_layout::convolution::NHWGK,
-                                                           ck::half_t,
-                                                           ck::half_t,
-                                                           ck::half_t>(true,  // do_verification
-                                                                       1,     // init_method
-                                                                       false, // do_log
-                                                                       false, // time_kernel
-                                                                       param);
-
-        EXPECT_TRUE(pass);
-    }
-}
--- a/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
+++ b/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+
+#include <cstdlib>
+#include <iostream>
+#include <initializer_list>
+#include <vector>
+#include <gtest/gtest.h>
+
+#include "profiler/profile_grouped_conv_fwd_impl.hpp"
+
+template <typename Tuple>
+class TestGroupedConvndFwd : public ::testing::Test
+{
+    protected:
+    using DataType  = std::tuple_element_t<0, Tuple>;
+    using InLayout  = std::tuple_element_t<1, Tuple>;
+    using WeiLayout = std::tuple_element_t<2, Tuple>;
+    using OutLayout = std::tuple_element_t<3, Tuple>;
+
+    std::vector<ck::utils::conv::ConvParam> conv_params;
+
+    template <ck::index_t NDimSpatial>
+    void Run()
+    {
+        EXPECT_FALSE(conv_params.empty());
+        bool pass = true;
+        for(auto& param : conv_params)
+        {
+            pass = pass && ck::profiler::profile_grouped_conv_fwd_impl<NDimSpatial,
+                                                                       InLayout,
+                                                                       WeiLayout,
+                                                                       OutLayout,
+                                                                       DataType,
+                                                                       DataType,
+                                                                       DataType>(
+                               true,  // do_verification
+                               1,     // init_method: integer value
+                               false, // do_log
+                               false, // time_kernel
+                               param);
+        }
+        EXPECT_TRUE(pass);
+    }
+};
+
+using namespace ck::tensor_layout::convolution;
+
+using KernelTypes1d = ::testing::Types<std::tuple<float, GNWC, GKXC, GNWK>,
+                                       std::tuple<ck::half_t, GNWC, GKXC, GNWK>,
+                                       std::tuple<ck::bhalf_t, GNWC, GKXC, GNWK>,
+                                       std::tuple<int8_t, GNWC, GKXC, GNWK>>;
+
+using KernelTypes2d = ::testing::Types<std::tuple<float, GNHWC, GKYXC, GNHWK>,
+                                       std::tuple<ck::half_t, GNHWC, GKYXC, GNHWK>,
+                                       std::tuple<ck::bhalf_t, GNHWC, GKYXC, GNHWK>,
+                                       std::tuple<int8_t, GNHWC, GKYXC, GNHWK>,
+                                       std::tuple<float, NHWGC, GKYXC, NHWGK>,
+                                       std::tuple<ck::half_t, NHWGC, GKYXC, NHWGK>,
+                                       std::tuple<ck::bhalf_t, NHWGC, GKYXC, NHWGK>,
+                                       std::tuple<int8_t, NHWGC, GKYXC, NHWGK>>;
+
+using KernelTypes3d = ::testing::Types<std::tuple<float, GNDHWC, GKZYXC, GNDHWK>,
+                                       std::tuple<ck::half_t, GNDHWC, GKZYXC, GNDHWK>,
+                                       std::tuple<ck::bhalf_t, GNDHWC, GKZYXC, GNDHWK>,
+                                       std::tuple<int8_t, GNDHWC, GKZYXC, GNDHWK>,
+                                       std::tuple<float, NDHWGC, GKZYXC, NDHWGK>,
+                                       std::tuple<ck::half_t, NDHWGC, GKZYXC, NDHWGK>,
+                                       std::tuple<ck::bhalf_t, NDHWGC, GKZYXC, NDHWGK>,
+                                       std::tuple<int8_t, NDHWGC, GKZYXC, NDHWGK>>;
+
+template <typename Tuple>
+class TestGroupedConvndFwd1d : public TestGroupedConvndFwd<Tuple>
+{
+};
+
+template <typename Tuple>
+class TestGroupedConvndFwd2d : public TestGroupedConvndFwd<Tuple>
+{
+};
+
+template <typename Tuple>
+class TestGroupedConvndFwd3d : public TestGroupedConvndFwd<Tuple>
+{
+};
+
+TYPED_TEST_SUITE(TestGroupedConvndFwd1d, KernelTypes1d);
+TYPED_TEST_SUITE(TestGroupedConvndFwd2d, KernelTypes2d);
+TYPED_TEST_SUITE(TestGroupedConvndFwd3d, KernelTypes3d);
+
+TYPED_TEST(TestGroupedConvndFwd1d, Test1D)
+{
+    this->conv_params.clear();
+    this->conv_params.push_back({1, 2, 32, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
+    this->conv_params.push_back({1, 2, 32, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
+    this->conv_params.push_back({1, 2, 32, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
+    this->conv_params.push_back({1, 1, 1, 1, 32, {3}, {32}, {1}, {1}, {1}, {1}});
+    this->conv_params.push_back({1, 1, 1, 64, 3, {3}, {32}, {1}, {1}, {1}, {1}});
+    this->template Run<1>();
+}
+
+TYPED_TEST(TestGroupedConvndFwd2d, Test2D)
+{
+    this->conv_params.clear();
+    this->conv_params.push_back(
+        {2, 2, 32, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
+    this->conv_params.push_back(
+        {2, 2, 32, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
+    this->conv_params.push_back(
+        {2, 2, 32, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
+    this->conv_params.push_back({2, 1, 1, 1, 32, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
+    this->conv_params.push_back({2, 1, 1, 64, 3, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
+    this->conv_params.push_back({2, 1, 1, 1, 1, {3, 3}, {32, 32}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
+    this->template Run<2>();
+}
+
+TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
+{
+    this->conv_params.clear();
+    this->conv_params.push_back(
+        {3, 2, 32, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
+    this->conv_params.push_back(
+        {3, 2, 32, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
+    this->conv_params.push_back(
+        {3, 2, 32, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
+    this->conv_params.push_back(
+        {3, 1, 1, 1, 32, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
+    this->conv_params.push_back(
+        {3, 1, 1, 64, 3, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
+    this->conv_params.push_back(
+        {3, 1, 1, 1, 1, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
+    this->template Run<3>();
+}
--- a/test/grouped_gemm/CMakeLists.txt
+++ b/test/grouped_gemm/CMakeLists.txt
-if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
 list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
 set(target 0)
 foreach(gpu IN LISTS GPU_TARGETS)
@@ -13,4 +12,3 @@ foreach(gpu IN LISTS GPU_TARGETS)
   set(target 1)
 endif()
 endforeach()
-endif()
--- a/test/image_to_column/CMakeLists.txt
+++ b/test/image_to_column/CMakeLists.txt
+add_gtest_executable(test_image_to_column test_image_to_column.cpp)
+target_link_libraries(test_image_to_column PRIVATE utility device_image_to_column_instance)
+add_gtest_executable(test_image_to_column_interface test_image_to_column_interface.cpp)
+target_link_libraries(test_image_to_column_interface PRIVATE utility)
--- a/test/image_to_column/test_image_to_column.cpp
+++ b/test/image_to_column/test_image_to_column.cpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+
+#include <cstdlib>
+#include <iostream>
+#include <initializer_list>
+#include <tuple>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "profiler/profile_image_to_column_impl.hpp"
+
+template <typename Tuple>
+class TestImageToColumn : public ::testing::Test
+{
+    protected:
+    using InDataType  = std::tuple_element_t<0, Tuple>;
+    using OutDataType = std::tuple_element_t<1, Tuple>;
+    using InLayout    = std::tuple_element_t<2, Tuple>;
+
+    std::vector<ck::utils::conv::ConvParam> conv_params;
+
+    template <ck::index_t NDimSpatial>
+    void Run()
+    {
+        EXPECT_FALSE(conv_params.empty());
+        bool pass = true;
+        for(auto& param : conv_params)
+        {
+            pass = pass && ck::profiler::profile_image_to_column_impl<NDimSpatial,
+                                                                      InLayout,
+                                                                      InDataType,
+                                                                      OutDataType>(
+                               true,  // do_verification
+                               1,     // init_method: integer value
+                               false, // do_log
+                               false, // time_kernel
+                               param);
+        }
+        EXPECT_TRUE(pass);
+    }
+};
+
+using namespace ck::tensor_layout::convolution;
+
+using KernelTypes1d = ::testing::Types<std::tuple<float, float, GNWC>,
+                                       std::tuple<ck::bhalf_t, ck::bhalf_t, GNWC>,
+                                       std::tuple<ck::half_t, ck::half_t, GNWC>,
+                                       std::tuple<int8_t, int8_t, GNWC>>;
+
+using KernelTypes2d = ::testing::Types<std::tuple<float, float, GNHWC>,
+                                       std::tuple<ck::bhalf_t, ck::bhalf_t, GNHWC>,
+                                       std::tuple<ck::half_t, ck::half_t, GNHWC>,
+                                       std::tuple<int8_t, int8_t, GNHWC>>;
+
+using KernelTypes3d = ::testing::Types<std::tuple<float, float, GNDHWC>,
+                                       std::tuple<ck::bhalf_t, ck::bhalf_t, GNDHWC>,
+                                       std::tuple<ck::half_t, ck::half_t, GNDHWC>,
+                                       std::tuple<int8_t, int8_t, GNDHWC>>;
+
+template <typename Tuple>
+class TestImageToColumn1d : public TestImageToColumn<Tuple>
+{
+};
+
+template <typename Tuple>
+class TestImageToColumn2d : public TestImageToColumn<Tuple>
+{
+};
+
+template <typename Tuple>
+class TestImageToColumn3d : public TestImageToColumn<Tuple>
+{
+};
+
+TYPED_TEST_SUITE(TestImageToColumn1d, KernelTypes1d);
+TYPED_TEST_SUITE(TestImageToColumn2d, KernelTypes2d);
+TYPED_TEST_SUITE(TestImageToColumn3d, KernelTypes3d);
+
+TYPED_TEST(TestImageToColumn1d, Test1D)
+{
+    this->conv_params.clear();
+
+    this->conv_params.push_back({1, 1, 4, 1, 192, {3}, {28}, {1}, {1}, {1}, {1}});
+    this->conv_params.push_back({1, 1, 64, 1, 64, {3}, {14}, {1}, {1}, {1}, {1}});
+    this->conv_params.push_back({1, 1, 64, 1, 64, {1}, {7}, {2}, {1}, {0}, {0}});
+    this->conv_params.push_back({1, 1, 64, 1, 64, {1}, {3}, {1}, {1}, {0}, {0}});
+    // ScalarPerVector should be 1
+    this->conv_params.push_back({1, 1, 4, 1, 1, {3}, {28}, {1}, {1}, {1}, {1}});
+    // stride != 1
+    this->conv_params.push_back({1, 1, 1, 1, 4, {3}, {28}, {2}, {1}, {1}, {1}});
+    // dilation != 1
+    this->conv_params.push_back({1, 1, 1, 1, 4, {3}, {28}, {1}, {2}, {1}, {1}});
+    this->template Run<1>();
+}
+
+TYPED_TEST(TestImageToColumn2d, Test2D)
+{
+    this->conv_params.clear();
+
+    this->conv_params.push_back(
+        {2, 1, 4, 1, 192, {3, 3}, {28, 28}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
+    this->conv_params.push_back(
+        {2, 1, 64, 1, 64, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
+    this->conv_params.push_back({2, 1, 64, 1, 64, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
+    this->conv_params.push_back({2, 1, 64, 1, 64, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
+    this->template Run<2>();
+}
+
+TYPED_TEST(TestImageToColumn3d, Test3D)
+{
+    this->conv_params.clear();
+    this->conv_params.push_back(
+        {3, 1, 16, 1, 64, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
+    this->conv_params.push_back(
+        {3, 1, 2, 1, 64, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
+    this->conv_params.push_back(
+        {3, 1, 32, 1, 64, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
+    this->template Run<3>();
+}
--- a/test/image_to_column/test_image_to_column_interface.cpp
+++ b/test/image_to_column/test_image_to_column_interface.cpp
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+
+#include <cstdlib>
+#include <iostream>
+#include <initializer_list>
+#include <tuple>
+#include <vector>
+
+#include "ck/ck.hpp"
+#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
+#include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp"
+
+#include "ck/library/utility/convolution_parameter.hpp"
+#include "ck/library/utility/algorithm.hpp"
+#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
+
+#include <gtest/gtest.h>
+
+using DataType = float;
+using InLayout = ck::tensor_layout::convolution::GNWC;
+
+template <ck::index_t... Is>
+using S = ck::Sequence<Is...>;
+
+template <ck::index_t ScalarPerVector, bool IsCPacked>
+class TestImageToColumnInterface : public ::testing::Test
+{
+    protected:
+    static constexpr ck::index_t NDimSpatial = 1;
+
+    // clang-format off
+    using DeviceImgToColInstance = ck::tensor_operation::device::DeviceImageToColumnImpl
+        //#####################|        Num| InLayout| InDataType| OutDataType| Block|  MPer|  KPer|    Thread|         Scalar|
+        //#####################|        Dim|         |           |            |  Size| Block| Block|   Cluster|            Per|
+        //#####################|    Spatial|         |           |            |      |      |      |   Lengths|         Vector|
+        //#####################|           |         |           |            |      |      |      |          |               |
+                              < NDimSpatial, InLayout,   DataType,    DataType,   256,   128,   128, S<16, 16>,ScalarPerVector>;
+    // clang-format on
+
+    ck::utils::conv::ConvParam conv_param;
+
+    bool Run()
+    {
+
+        const auto N = conv_param.N_;
+        const auto C = conv_param.C_;
+        const auto FakeC =
+            conv_param.C_ / 2; // Fake C to simulate the behavior that C is not packed
+
+        const ck::index_t NDoHoWo =
+            N *
+            ck::accumulate_n<ck::index_t>(
+                conv_param.output_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
+        const ck::index_t CZYX =
+            C *
+            ck::accumulate_n<ck::index_t>(
+                conv_param.filter_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
+
+        const auto in_desc =
+            ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(
+                conv_param);
+        const auto out_desc = HostTensorDescriptor({NDoHoWo, CZYX});
+
+        std::array<ck::index_t, NDimSpatial> input_spatial_lengths{};
+        std::array<ck::index_t, NDimSpatial> filter_spatial_lengths{};
+        std::array<ck::index_t, NDimSpatial> output_spatial_lengths{};
+        std::array<ck::index_t, NDimSpatial + 3> input_g_n_c_wis_strides{};
+        std::array<ck::index_t, 2> output_m_k_strides{};
+        std::array<ck::index_t, NDimSpatial> conv_filter_strides{};
+        std::array<ck::index_t, NDimSpatial> conv_filter_dilations{};
+        std::array<ck::index_t, NDimSpatial> input_left_pads{};
+        std::array<ck::index_t, NDimSpatial> input_right_pads{};
+
+        auto copy = [](const auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); };
+
+        copy(conv_param.input_spatial_lengths_, input_spatial_lengths);
+        copy(conv_param.filter_spatial_lengths_, filter_spatial_lengths);
+        copy(conv_param.output_spatial_lengths_, output_spatial_lengths);
+        copy(in_desc.GetStrides(), input_g_n_c_wis_strides);
+        copy(out_desc.GetStrides(), output_m_k_strides);
+        copy(conv_param.conv_filter_strides_, conv_filter_strides);
+        copy(conv_param.conv_filter_dilations_, conv_filter_dilations);
+        copy(conv_param.input_left_pads_, input_left_pads);
+        copy(conv_param.input_right_pads_, input_right_pads);
+
+        auto img2col  = DeviceImgToColInstance{};
+        auto argument = img2col.MakeArgument(nullptr,
+                                             nullptr,
+                                             N,
+                                             IsCPacked ? C : FakeC,
+                                             input_spatial_lengths,
+                                             filter_spatial_lengths,
+                                             output_spatial_lengths,
+                                             input_g_n_c_wis_strides,
+                                             output_m_k_strides,
+                                             conv_filter_strides,
+                                             conv_filter_dilations,
+                                             input_left_pads,
+                                             input_right_pads);
+
+        return img2col.IsSupportedArgument(argument);
+    }
+};
+
+class TestImageToColumnInterface1ScalarPerVector : public TestImageToColumnInterface<1, true>
+{
+};
+
+class TestImageToColumnInterface4ScalarPerVector : public TestImageToColumnInterface<4, true>
+{
+};
+
+class TestImageToColumnInterface4ScalarPerVectorFakeC : public TestImageToColumnInterface<4, false>
+{
+};
+
+TEST_F(TestImageToColumnInterface1ScalarPerVector, X1ScalarPerVector)
+{
+    // vector load C * X % ScalarPerVector
+    this->conv_param  = {1, 1, 1, 1, 1, {3}, {3}, {1}, {1}, {0}, {0}};
+    bool is_supported = this->Run();
+    EXPECT_TRUE(is_supported);
+    // vector load C * left_pad_x % ScalarPerVector
+    this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {3}, {0}};
+    is_supported     = this->Run();
+    EXPECT_TRUE(is_supported);
+    // vector load C * right_pad_x % ScalarPerVector
+    this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {0}, {3}};
+    is_supported     = this->Run();
+    EXPECT_TRUE(is_supported);
+
+    // vector load C % ScalarPerVector, right_pad and stride
+    this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {0}, {3}};
+    is_supported     = this->Run();
+    EXPECT_TRUE(is_supported);
+    // vector load C % ScalarPerVector, left_pad and stride
+    this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {3}, {0}};
+    is_supported     = this->Run();
+    EXPECT_TRUE(is_supported);
+    // vector load C % ScalarPerVector, dilation
+    this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {2}, {0}, {0}};
+    is_supported     = this->Run();
+    EXPECT_TRUE(is_supported);
+
+    // C = 4
+    this->conv_param = {1, 1, 1, 1, 4, {3}, {3}, {1}, {1}, {3}, {3}};
+    is_supported     = this->Run();
+    EXPECT_TRUE(is_supported);
+}
+
+TEST_F(TestImageToColumnInterface4ScalarPerVector, X4ScalarPerVector)
+{
+    // vector load C * X % ScalarPerVector
+    this->conv_param  = {1, 1, 1, 1, 1, {3}, {3}, {1}, {1}, {0}, {0}};
+    bool is_supported = this->Run();
+    EXPECT_FALSE(is_supported);
+    // vector load C * left_pad_x % ScalarPerVector
+    this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {3}, {0}};
+    is_supported     = this->Run();
+    EXPECT_FALSE(is_supported);
+    // vector load C * right_pad_x % ScalarPerVector
+    this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {0}, {3}};
+    is_supported     = this->Run();
+    EXPECT_FALSE(is_supported);
+
+    // vector load C % ScalarPerVector, right_pad and stride
+    this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {0}, {3}};
+    is_supported     = this->Run();
+    EXPECT_FALSE(is_supported);
+    // vector load C % ScalarPerVector, left_pad and stride
+    this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {3}, {0}};
+    is_supported     = this->Run();
+    EXPECT_FALSE(is_supported);
+    // vector load C % ScalarPerVector, dilation
+    this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {2}, {0}, {0}};
+    is_supported     = this->Run();
+    EXPECT_FALSE(is_supported);
+
+    // C = 4
+    this->conv_param = {1, 1, 1, 1, 4, {3}, {3}, {1}, {1}, {3}, {3}};
+    is_supported     = this->Run();
+    EXPECT_TRUE(is_supported);
+}
+
+TEST_F(TestImageToColumnInterface4ScalarPerVectorFakeC, X4ScalarPerVectorFakeC)
+{
+    // C = 3
+    this->conv_param  = {1, 1, 1, 1, 3, {4}, {3}, {1}, {1}, {0}, {0}};
+    bool is_supported = this->Run();
+    EXPECT_FALSE(is_supported);
+    // C = 4
+    this->conv_param = {1, 1, 1, 1, 8, {4}, {3}, {1}, {1}, {0}, {0}};
+    is_supported     = this->Run();
+    EXPECT_TRUE(is_supported);
+}
--- a/test/normalization/CMakeLists.txt
+++ b/test/normalization/CMakeLists.txt
-if(DTYPES MATCHES "fp16" OR DTYPES MATCHES "fp32" OR NOT DEFINED DTYPES)
-  add_custom_target(test_normalization)
-endif()
-if(DTYPES MATCHES "fp32" OR NOT DEFINED DTYPES)
-  add_gtest_executable(test_layernorm2d_fp32 test_layernorm2d_fp32.cpp)
-  add_gtest_executable(test_groupnorm_fp32 test_groupnorm_fp32.cpp)
+add_custom_target(test_normalization)
+add_gtest_executable(test_layernorm2d_fp32 test_layernorm2d_fp32.cpp)
+if(result EQUAL 0)
  target_link_libraries(test_layernorm2d_fp32 PRIVATE utility device_normalization_instance)
-  target_link_libraries(test_groupnorm_fp32 PRIVATE utility device_normalization_instance)
  add_dependencies(test_normalization test_layernorm2d_fp32)
+endif()
+add_gtest_executable(test_groupnorm_fp32 test_groupnorm_fp32.cpp)
+if(result EQUAL 0)
+  target_link_libraries(test_groupnorm_fp32 PRIVATE utility device_normalization_instance)
  add_dependencies(test_normalization test_groupnorm_fp32)
 endif()
-if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
-  add_gtest_executable(test_layernorm2d_fp16 test_layernorm2d_fp16.cpp)
-  add_gtest_executable(test_groupnorm_fp16 test_groupnorm_fp16.cpp)
+add_gtest_executable(test_layernorm2d_fp16 test_layernorm2d_fp16.cpp)
+if(result EQUAL 0)
  target_link_libraries(test_layernorm2d_fp16 PRIVATE utility device_normalization_instance)
-  target_link_libraries(test_groupnorm_fp16 PRIVATE utility device_normalization_instance)
  add_dependencies(test_normalization test_layernorm2d_fp16)
+endif()
+add_gtest_executable(test_groupnorm_fp16 test_groupnorm_fp16.cpp)
+if(result EQUAL 0)
+  target_link_libraries(test_groupnorm_fp16 PRIVATE utility device_normalization_instance)
  add_dependencies(test_normalization test_groupnorm_fp16)
 endif()
--- a/test/reduce/CMakeLists.txt
+++ b/test/reduce/CMakeLists.txt
 add_test_executable(test_reduce_no_index reduce_no_index.cpp)
 add_test_executable(test_reduce_with_index reduce_with_index.cpp)
-target_link_libraries(test_reduce_no_index PRIVATE utility)
-target_link_libraries(test_reduce_no_index PRIVATE device_reduce_instance)
-target_link_libraries(test_reduce_with_index PRIVATE utility)
-target_link_libraries(test_reduce_with_index PRIVATE device_reduce_instance)
+target_link_libraries(test_reduce_no_index PRIVATE utility device_reduce_instance)
+target_link_libraries(test_reduce_with_index PRIVATE utility device_reduce_instance)