Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
277ad347
Commit
277ad347
authored
Jul 03, 2024
by
Adam Osewski
Browse files
Remove conditional cleaning c_thread buff.
parent
4dae6d81
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
11 additions
and
23 deletions
+11
-23
include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle_v2.hpp
.../grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle_v2.hpp
+2
-6
include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp
...k/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp
+6
-12
include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp
...k/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp
+3
-5
No files found.
include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle_v2.hpp
View file @
277ad347
...
...
@@ -818,8 +818,6 @@ class GridwiseGemmMultipleD_xdl_splitk_cshuffle_v2
const
bool
has_k_block_main_loop
=
gridwise_gemm_pipeline
.
CalculateHasMainLoop
(
num_k_block_main_loop
);
bool
clear_c_thread_buf
=
true
;
auto
blockwise_gemm
=
BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_Selector
<
BlockSize
,
ComputeType
,
...
...
@@ -850,8 +848,7 @@ class GridwiseGemmMultipleD_xdl_splitk_cshuffle_v2
b_block_slice_copy_step
,
blockwise_gemm
,
c_thread_buf
,
num_k_block_main_loop
,
clear_c_thread_buf
);
num_k_block_main_loop
);
}
else
{
...
...
@@ -869,8 +866,7 @@ class GridwiseGemmMultipleD_xdl_splitk_cshuffle_v2
b_block_slice_copy_step
,
blockwise_gemm
,
c_thread_buf
,
num_k_block_main_loop
,
clear_c_thread_buf
);
num_k_block_main_loop
);
}
}
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp
View file @
277ad347
...
...
@@ -54,8 +54,7 @@ struct GridwiseGemmPipeline_v1<1, true, true>
const
BBlockTransferStep
&
b_block_copy_step
,
const
BlockwiseGemm
&
blockwise_gemm
,
CThreadBuffer
&
c_thread_buf
,
index_t
num_loop
,
bool
clear_c_thread_buf
=
true
)
index_t
num_loop
)
{
// preload data into LDS
a_blockwise_copy
.
RunRead
(
a_grid_desc
,
a_grid_buf
);
...
...
@@ -65,7 +64,6 @@ struct GridwiseGemmPipeline_v1<1, true, true>
b_blockwise_copy
.
MoveSrcSliceWindow
(
b_grid_desc
,
b_block_copy_step
);
// Initialize C
if
(
clear_c_thread_buf
)
c_thread_buf
.
Clear
();
a_blockwise_copy
.
RunWrite
(
a_block_desc
,
a_block_buf
);
...
...
@@ -154,8 +152,7 @@ struct GridwiseGemmPipeline_v1<2, true, true>
const
BBlockTransferStep
&
b_block_copy_step
,
const
BlockwiseGemm
&
blockwise_gemm
,
CThreadBuffer
&
c_thread_buf
,
index_t
num_loop
,
bool
clear_c_thread_buf
=
true
)
index_t
num_loop
)
{
// preload data into LDS
{
...
...
@@ -173,7 +170,6 @@ struct GridwiseGemmPipeline_v1<2, true, true>
}
// Initialize C
if
(
clear_c_thread_buf
)
c_thread_buf
.
Clear
();
// main body
...
...
@@ -699,8 +695,7 @@ struct GridwiseGemmPipelineInterwave_v1<1>
const
BBlockTransferStep
&
b_block_copy_step
,
const
BlockwiseGemm
&
blockwise_gemm
,
CThreadBuffer
&
c_thread_buf
,
index_t
num_loop
,
bool
clear_c_thread_buf
=
true
)
index_t
num_loop
)
{
// preload data into LDS
a_blockwise_copy
.
RunRead
(
a_grid_desc
,
a_grid_buf
);
...
...
@@ -710,7 +705,6 @@ struct GridwiseGemmPipelineInterwave_v1<1>
b_blockwise_copy
.
MoveSrcSliceWindow
(
b_grid_desc
,
b_block_copy_step
);
// Initialize C
if
(
clear_c_thread_buf
)
c_thread_buf
.
Clear
();
a_blockwise_copy
.
RunWrite
(
a_block_desc
,
a_block_buf
);
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp
View file @
277ad347
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -49,8 +49,7 @@ struct GridwiseGemmPipeline_v2
const
BBlockTransferStep
&
b_block_copy_step
,
const
BlockwiseGemm
&
blockwise_gemm
,
CThreadBuffer
&
c_thread_buf
,
index_t
num_loop
,
bool
clear_c_thread_buf
=
true
)
index_t
num_loop
)
{
// global read 0
a_blockwise_copy
.
RunRead
(
a_grid_desc
,
a_grid_buf
);
...
...
@@ -61,7 +60,6 @@ struct GridwiseGemmPipeline_v2
b_blockwise_copy
.
MoveSrcSliceWindow
(
b_grid_desc
,
b_block_copy_step
);
// Initialize C
if
(
clear_c_thread_buf
)
c_thread_buf
.
Clear
();
// LDS write 0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment