Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
22329520
Commit
22329520
authored
Jul 05, 2024
by
Harisankar Sadasivan
Browse files
added default value support for grid_size and streamk-polic selection set to -1
parent
e553bcf1
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
17 additions
and
30 deletions
+17
-30
example/01_gemm/run_gemm_example_streamk_v2.inc
example/01_gemm/run_gemm_example_streamk_v2.inc
+16
-5
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
+0
-24
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_streamk_v3.hpp
...ration/gpu/grid/gridwise_gemm_xdl_cshuffle_streamk_v3.hpp
+1
-1
No files found.
example/01_gemm/run_gemm_example_streamk_v2.inc
View file @
22329520
...
@@ -116,27 +116,38 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
...
@@ -116,27 +116,38 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
};
};
auto
f_get_default_stride
=
auto
f_get_default_stride
=
[](
std
::
size_t
row
,
std
::
size_t
col
,
std
::
size
_t
stride
,
auto
layout
)
{
[](
std
::
size_t
row
,
std
::
size_t
col
,
ck
::
index
_t
stride
,
auto
layout
)
{
if
(
stride
==
-
1
)
if
(
stride
==
-
1
)
{
{
// give a chance if stride is
zero
, return a default packed stride
// give a chance if stride is
-1
, return a default packed stride
if
constexpr
(
std
::
is_same_v
<
decltype
(
layout
),
ck
::
tensor_layout
::
gemm
::
RowMajor
>
)
if
constexpr
(
std
::
is_same_v
<
decltype
(
layout
),
ck
::
tensor_layout
::
gemm
::
RowMajor
>
)
{
{
return
col
;
return
static_cast
<
std
::
size_t
>
(
col
)
;
}
}
else
else
{
{
return
row
;
return
static_cast
<
std
::
size_t
>
(
row
)
;
}
}
}
}
else
else
return
stride
;
return
static_cast
<
std
::
size_t
>
(
stride
)
;
};
};
auto
f_get_default_streamk_policy
=
[](
ck
::
index_t
streamk_sel
)
{
if
(
streamk_sel
==
-
1
)
{
return
static_cast
<
std
::
size_t
>
(
4
);
}
else
return
static_cast
<
std
::
size_t
>
(
streamk_sel
);
};
StrideA
=
f_get_default_stride
(
M
,
K
,
StrideA
,
ALayout
{});
StrideA
=
f_get_default_stride
(
M
,
K
,
StrideA
,
ALayout
{});
StrideB
=
f_get_default_stride
(
K
,
N
,
StrideB
,
BLayout
{});
StrideB
=
f_get_default_stride
(
K
,
N
,
StrideB
,
BLayout
{});
StrideC
=
f_get_default_stride
(
M
,
N
,
StrideC
,
CLayout
{});
StrideC
=
f_get_default_stride
(
M
,
N
,
StrideC
,
CLayout
{});
Streamk_sel
=
f_get_default_streamk_policy
(
Streamk_sel
);
Tensor
<
ADataType
>
a_m_k
(
f_host_tensor_descriptor
(
M
,
K
,
StrideA
,
ALayout
{}));
Tensor
<
ADataType
>
a_m_k
(
f_host_tensor_descriptor
(
M
,
K
,
StrideA
,
ALayout
{}));
Tensor
<
BDataType
>
b_k_n
(
f_host_tensor_descriptor
(
K
,
N
,
StrideB
,
BLayout
{}));
Tensor
<
BDataType
>
b_k_n
(
f_host_tensor_descriptor
(
K
,
N
,
StrideB
,
BLayout
{}));
...
...
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
View file @
22329520
...
@@ -1517,30 +1517,6 @@ struct BlockToCTileMap_GemmStreamK_v2
...
@@ -1517,30 +1517,6 @@ struct BlockToCTileMap_GemmStreamK_v2
equiv_tiles_big
=
MDiv
(
upper_big
/
k_iters_per_tile
.
get
());
equiv_tiles_big
=
MDiv
(
upper_big
/
k_iters_per_tile
.
get
());
equiv_tiles_little
=
MDiv
(
upper_little
/
k_iters_per_tile
.
get
());
equiv_tiles_little
=
MDiv
(
upper_little
/
k_iters_per_tile
.
get
());
}
}
if
(
ck
::
EnvIsEnabled
(
CK_ENV
(
CK_LOGGING
)))
{
printf
(
"streamk_sel=%0d,grid_size=%0d, num_tiles:%d, dp_tiles:%d, sk_tiles:%u, "
"sk_num_blocks:%d,dp_num_blocks:%d,sk_num_big_blocks:%d, "
"sk_total_iters:%d, dp_start_block_idx:%d, "
"k_iters_per_tile:%d, k_iters_per_big_block:%d, reduction_start_block_idx:%u, "
" workspace(acc float):%u
\n
"
,
streamk_sel
,
grid_size
,
num_tiles
,
dp_tiles
,
get_sk_tiles
(),
sk_num_blocks
,
dp_num_blocks
,
sk_num_big_blocks
,
sk_total_iters
,
dp_start_block_idx
,
k_iters_per_tile
.
get
(),
k_iters_per_big_block
,
reduction_start_block_idx
,
get_workspace_size
(
sizeof
(
float
)));
}
}
}
__host__
__device__
static
constexpr
index_t
CalculateGridSize
(
index_t
M
,
index_t
N
)
__host__
__device__
static
constexpr
index_t
CalculateGridSize
(
index_t
M
,
index_t
N
)
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_streamk_v3.hpp
View file @
22329520
...
@@ -1315,7 +1315,7 @@ struct GridwiseGemm_xdl_cshuffle_streamk_v3
...
@@ -1315,7 +1315,7 @@ struct GridwiseGemm_xdl_cshuffle_streamk_v3
num_k_block_main_loop
=
__builtin_amdgcn_readfirstlane
(
num_k_block_main_loop
=
__builtin_amdgcn_readfirstlane
(
(
a_grid_desc_ak0_m_ak1
.
GetLength
(
I0
)
*
a_grid_desc_ak0_m_ak1
.
GetLength
(
I2
))
/
(
a_grid_desc_ak0_m_ak1
.
GetLength
(
I0
)
*
a_grid_desc_ak0_m_ak1
.
GetLength
(
I2
))
/
KPerBlock
);
:
AK0
*
KPadded
/
KPerBlock
KPerBlock
);
blockwise_gemm_pipeline
.
template
Run
<
HasMainKBlockLoop
,
TailNum
>(
blockwise_gemm_pipeline
.
template
Run
<
HasMainKBlockLoop
,
TailNum
>(
a_grid_desc_ak0_m_ak1
,
a_grid_desc_ak0_m_ak1
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment