fixed

6433eede · Jing Zhang · afeccb5f · 6433eede · 6433eede · 6433eede
Commit 6433eede authored Dec 12, 2024 by Jing Zhang
3 changed files
--- a/example/01_gemm/gemm_xdl_fp16_pk_i4_v3.cpp
+++ b/example/01_gemm/gemm_xdl_fp16_pk_i4_v3.cpp
@@ -21,8 +21,8 @@ using CElementOp = PassThrough;
 static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
-static constexpr bool PermuteA = false;
+static constexpr bool PermuteA         = false;
-static constexpr bool PermuteB = true;
+static constexpr bool PermuteB         = true;
 static constexpr ck::index_t KPerBlock = 128;
 // clang-format off

--- a/include/ck/utility/static_buffer.hpp
+++ b/include/ck/utility/static_buffer.hpp
@@ -114,7 +114,10 @@ struct StaticBufferTupleOfVector
    // Get X
    // i is offset of S, not X. i should be aligned to X
-    template <typename X, index_t I>
+    template <typename X,
+              index_t I,
+              typename enable_if<has_same_scalar_type<S, X>::value || !is_native_type<S>(),
+                                 bool>::type = false>
    __host__ __device__ constexpr auto GetAsType(Number<I> i) const
    {
        constexpr auto s_per_x = Number<scalar_type<remove_cvref_t<X>>::vector_size>{};
@@ -130,7 +133,10 @@ struct StaticBufferTupleOfVector
    // Set X
    // i is offset of S, not X. i should be aligned to X
-    template <typename X, index_t I>
+    template <typename X,
+              index_t I,
+              typename enable_if<has_same_scalar_type<S, X>::value || !is_native_type<S>(),
+                                 bool>::type = false>
    __host__ __device__ constexpr void SetAsType(Number<I> i, X x)
    {
        constexpr auto s_per_x = Number<scalar_type<remove_cvref_t<X>>::vector_size>{};

--- a/profiler/src/CMakeLists.txt
+++ b/profiler/src/CMakeLists.txt
@@ -43,7 +43,6 @@ if(SUPPORTED_GPU_TARGETS MATCHES "gfx9")
    list(APPEND PROFILER_SOURCES profile_gemm_add_silu.cpp)
    list(APPEND PROFILER_SOURCES profile_gemm_add_relu_add_layernorm.cpp)
    list(APPEND PROFILER_SOURCES profile_grouped_gemm_fixed_nk.cpp)
-    list(APPEND PROFILER_SOURCES profile_grouped_gemm_two_stage.cpp)
    list(APPEND PROFILER_SOURCES profile_grouped_gemm_fastgelu.cpp)
    list(APPEND PROFILER_SOURCES profile_grouped_gemm_tile_loop.cpp)
    list(APPEND PROFILER_SOURCES profile_grouped_gemm_multiply_tile_loop.cpp)