Fix compilation errors generated by forthcoming Clang changes

Without this change, the following diagnostic is generated: a template argument list is expected after a name prefixed by the template keyword [-Wmissing-template-arg-list-after-template-kw] See C++17 spec [temp.names] p5.

Fix compilation errors generated by forthcoming Clang changes
Without this change, the following diagnostic is generated: a template argument list is expected after a name prefixed by the template keyword [-Wmissing-template-arg-list-after-template-kw] See C++17 spec [temp.names] p5.
d79f7731 · Matthew Curtis · 294cb823 · d79f7731 · d79f7731 · d79f7731
Commit d79f7731 authored Oct 02, 2024 by Matthew Curtis
3 changed files
--- a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v1_ab_scale.hpp
+++ b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v1_ab_scale.hpp
@@ -308,7 +308,7 @@ struct BlockwiseGemmXdlops_pipeline_v1_ab_scale<BlockGemmPipelineScheduler::Intr
                                typename vector_type<ComputeDataType,
                                                     xdlops_gemm.K1PerXdlops>::type;
-                            xdlops_gemm.template Run(
+                            xdlops_gemm.template Run<>(
                                a_thread_vec.template AsType<mfma_input_type>(),
                                b_thread_vec.template AsType<mfma_input_type>(),
                                c_thread_buf_per_scale.GetVectorTypeReference(I0));
@@ -390,9 +390,10 @@ struct BlockwiseGemmXdlops_pipeline_v1_ab_scale<BlockGemmPipelineScheduler::Intr
                        using mfma_input_type =
                            typename vector_type<ComputeDataType, xdlops_gemm.K1PerXdlops>::type;
-                        xdlops_gemm.template Run(a_thread_vec.template AsType<mfma_input_type>(),
+                        xdlops_gemm.template Run<>(
-                                                 b_thread_vec.template AsType<mfma_input_type>(),
+                            a_thread_vec.template AsType<mfma_input_type>(),
-                                                 c_thread_buf_per_scale.GetVectorTypeReference(I0));
+                            b_thread_vec.template AsType<mfma_input_type>(),
+                            c_thread_buf_per_scale.GetVectorTypeReference(I0));
                    });
                    static_for<0, xdlops_gemm.GetRegSizePerXdlops(), 1>{}([&](auto t) {
                        constexpr index_t c_offset =

--- a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v2_ab_scale.hpp
+++ b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v2_ab_scale.hpp
@@ -350,7 +350,7 @@ struct BlockwiseGemmXdlops_pipeline_v2_ab_scale<BlockGemmPipelineScheduler::Intr
                                    typename vector_type<ComputeDataType,
                                                         xdlops_gemm.K1PerXdlops>::type;
-                                xdlops_gemm.template Run(
+                                xdlops_gemm.template Run<>(
                                    a_thread_vec.template AsType<mfma_input_type>(),
                                    b_thread_vec.template AsType<mfma_input_type>(),
                                    c_thread_buf_per_scale.GetVectorTypeReference(I0));
@@ -443,7 +443,7 @@ struct BlockwiseGemmXdlops_pipeline_v2_ab_scale<BlockGemmPipelineScheduler::Intr
                                typename vector_type<ComputeDataType,
                                                     xdlops_gemm.K1PerXdlops>::type;
-                            xdlops_gemm.template Run(
+                            xdlops_gemm.template Run<>(
                                a_thread_vec.template AsType<mfma_input_type>(),
                                b_thread_vec.template AsType<mfma_input_type>(),
                                c_thread_buf_per_scale.GetVectorTypeReference(I0));
@@ -518,9 +518,10 @@ struct BlockwiseGemmXdlops_pipeline_v2_ab_scale<BlockGemmPipelineScheduler::Intr
                        using mfma_input_type =
                            typename vector_type<ComputeDataType, xdlops_gemm.K1PerXdlops>::type;
-                        xdlops_gemm.template Run(a_thread_vec.template AsType<mfma_input_type>(),
+                        xdlops_gemm.template Run<>(
-                                                 b_thread_vec.template AsType<mfma_input_type>(),
+                            a_thread_vec.template AsType<mfma_input_type>(),
-                                                 c_thread_buf_per_scale.GetVectorTypeReference(I0));
+                            b_thread_vec.template AsType<mfma_input_type>(),
+                            c_thread_buf_per_scale.GetVectorTypeReference(I0));
                    });
                    static_for<0, xdlops_gemm.GetRegSizePerXdlops(), 1>{}([&](auto t) {
                        constexpr index_t c_offset =
@@ -575,9 +576,10 @@ struct BlockwiseGemmXdlops_pipeline_v2_ab_scale<BlockGemmPipelineScheduler::Intr
                        using mfma_input_type =
                            typename vector_type<ComputeDataType, xdlops_gemm.K1PerXdlops>::type;
-                        xdlops_gemm.template Run(a_thread_vec.template AsType<mfma_input_type>(),
+                        xdlops_gemm.template Run<>(
-                                                 b_thread_vec.template AsType<mfma_input_type>(),
+                            a_thread_vec.template AsType<mfma_input_type>(),
-                                                 c_thread_buf_per_scale.GetVectorTypeReference(I0));
+                            b_thread_vec.template AsType<mfma_input_type>(),
+                            c_thread_buf_per_scale.GetVectorTypeReference(I0));
                    });
                    static_for<0, xdlops_gemm.GetRegSizePerXdlops(), 1>{}([&](auto t) {
                        constexpr index_t c_offset =

--- a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v3_ab_scale.hpp
+++ b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops_v3_ab_scale.hpp
@@ -427,7 +427,7 @@ struct BlockwiseGemmXdlops_pipeline_v3_ab_scale<BlockGemmPipelineScheduler::Intr
                                typename vector_type<ComputeDataType,
                                                     xdlops_gemm.K1PerXdlops>::type;
-                            xdlops_gemm.template Run(
+                            xdlops_gemm.template Run<>(
                                a_thread_vec.template AsType<mfma_input_type>(),
                                b_thread_vec.template AsType<mfma_input_type>(),
                                c_thread_buf_per_scale.GetVectorTypeReference(I0));
@@ -504,9 +504,10 @@ struct BlockwiseGemmXdlops_pipeline_v3_ab_scale<BlockGemmPipelineScheduler::Intr
                        using mfma_input_type =
                            typename vector_type<ComputeDataType, xdlops_gemm.K1PerXdlops>::type;
-                        xdlops_gemm.template Run(a_thread_vec.template AsType<mfma_input_type>(),
+                        xdlops_gemm.template Run<>(
-                                                 b_thread_vec.template AsType<mfma_input_type>(),
+                            a_thread_vec.template AsType<mfma_input_type>(),
-                                                 c_thread_buf_per_scale.GetVectorTypeReference(I0));
+                            b_thread_vec.template AsType<mfma_input_type>(),
+                            c_thread_buf_per_scale.GetVectorTypeReference(I0));
                    });
                    static_for<0, xdlops_gemm.GetRegSizePerXdlops(), 1>{}([&](auto t) {
                        constexpr index_t c_offset =