Remove conditional cleaning c_thread buff.

277ad347 · Adam Osewski · 4dae6d81 · 277ad347 · 277ad347 · 277ad347
Commit 277ad347 authored Jul 03, 2024 by Adam Osewski
3 changed files
--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle_v2.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle_v2.hpp
@@ -818,8 +818,6 @@ class GridwiseGemmMultipleD_xdl_splitk_cshuffle_v2
        const bool has_k_block_main_loop =
            gridwise_gemm_pipeline.CalculateHasMainLoop(num_k_block_main_loop);

-        bool clear_c_thread_buf = true;
-
        auto blockwise_gemm = BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_Selector<
            BlockSize,
            ComputeType,
@@ -850,8 +848,7 @@ class GridwiseGemmMultipleD_xdl_splitk_cshuffle_v2
                                                      b_block_slice_copy_step,
                                                      blockwise_gemm,
                                                      c_thread_buf,
-                                                      num_k_block_main_loop,
-                                                      clear_c_thread_buf);
+                                                      num_k_block_main_loop);
        }
        else
        {
@@ -869,8 +866,7 @@ class GridwiseGemmMultipleD_xdl_splitk_cshuffle_v2
                                                       b_block_slice_copy_step,
                                                       blockwise_gemm,
                                                       c_thread_buf,
-                                                       num_k_block_main_loop,
-                                                       clear_c_thread_buf);
+                                                       num_k_block_main_loop);
        }
    }


--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp
@@ -54,8 +54,7 @@ struct GridwiseGemmPipeline_v1<1, true, true>
                               const BBlockTransferStep& b_block_copy_step,
                               const BlockwiseGemm& blockwise_gemm,
                               CThreadBuffer& c_thread_buf,
-                               index_t num_loop,
-                               bool clear_c_thread_buf = true)
+                               index_t num_loop)
    {
        // preload data into LDS
        a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf);
@@ -65,7 +64,6 @@ struct GridwiseGemmPipeline_v1<1, true, true>
        b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step);

        // Initialize C
-        if(clear_c_thread_buf)
        c_thread_buf.Clear();

        a_blockwise_copy.RunWrite(a_block_desc, a_block_buf);
@@ -154,8 +152,7 @@ struct GridwiseGemmPipeline_v1<2, true, true>
                               const BBlockTransferStep& b_block_copy_step,
                               const BlockwiseGemm& blockwise_gemm,
                               CThreadBuffer& c_thread_buf,
-                               index_t num_loop,
-                               bool clear_c_thread_buf = true)
+                               index_t num_loop)
    {
        // preload data into LDS
        {
@@ -173,7 +170,6 @@ struct GridwiseGemmPipeline_v1<2, true, true>
        }

        // Initialize C
-        if(clear_c_thread_buf)
        c_thread_buf.Clear();

        // main body
@@ -699,8 +695,7 @@ struct GridwiseGemmPipelineInterwave_v1<1>
                               const BBlockTransferStep& b_block_copy_step,
                               const BlockwiseGemm& blockwise_gemm,
                               CThreadBuffer& c_thread_buf,
-                               index_t num_loop,
-                               bool clear_c_thread_buf = true)
+                               index_t num_loop)
    {
        // preload data into LDS
        a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf);
@@ -710,7 +705,6 @@ struct GridwiseGemmPipelineInterwave_v1<1>
        b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step);

        // Initialize C
-        if(clear_c_thread_buf)
        c_thread_buf.Clear();

        a_blockwise_copy.RunWrite(a_block_desc, a_block_buf);

--- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp
+++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.

 #pragma once

@@ -49,8 +49,7 @@ struct GridwiseGemmPipeline_v2
                               const BBlockTransferStep& b_block_copy_step,
                               const BlockwiseGemm& blockwise_gemm,
                               CThreadBuffer& c_thread_buf,
-                               index_t num_loop,
-                               bool clear_c_thread_buf = true)
+                               index_t num_loop)
    {
        // global read 0
        a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf);
@@ -61,7 +60,6 @@ struct GridwiseGemmPipeline_v2
        b_blockwise_copy.MoveSrcSliceWindow(b_grid_desc, b_block_copy_step);

        // Initialize C
-        if(clear_c_thread_buf)
        c_thread_buf.Clear();

        // LDS write 0