Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
3f032f25
Commit
3f032f25
authored
Jun 02, 2022
by
Anthony Chang
Browse files
compile time M01
parent
132cd364
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
7 deletions
+5
-7
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
+3
-5
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp
...k/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp
+1
-1
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp
...tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp
+1
-1
No files found.
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
View file @
3f032f25
...
...
@@ -107,7 +107,7 @@ struct BlockToCTileMap_M00_N0_M01
// Rows of column-vectors
// This C-tile map dynamically adjusts M01 when C-tile index is out of range
template
<
index_t
MPerBlock
,
index_t
NPerBlock
,
typename
CGridDesc_M_N
>
template
<
index_t
MPerBlock
,
index_t
NPerBlock
,
typename
CGridDesc_M_N
,
index_t
M01
=
1
>
struct
BlockToCTileMap_M00_N0_M01Adapt
{
static
constexpr
auto
I0
=
Number
<
0
>
{};
...
...
@@ -117,8 +117,7 @@ struct BlockToCTileMap_M00_N0_M01Adapt
__host__
__device__
BlockToCTileMap_M00_N0_M01Adapt
()
=
default
;
__host__
__device__
BlockToCTileMap_M00_N0_M01Adapt
(
const
CGridDesc_M_N
&
c_grid_desc_m_n
,
index_t
M01
=
1
)
__host__
__device__
BlockToCTileMap_M00_N0_M01Adapt
(
const
CGridDesc_M_N
&
c_grid_desc_m_n
)
:
M01_
(
M01
),
c_grid_desc_m_n_
(
c_grid_desc_m_n
)
{
}
...
...
@@ -172,7 +171,7 @@ struct BlockToCTileMap_M00_N0_M01Adapt
// 2D slices of column-vectors in 3D space
// This C-tile map dynamically adjusts M01 when C-tile index is out of range
template
<
index_t
MPerBlock
,
index_t
NPerBlock
,
typename
CGridDesc_M_N
>
template
<
index_t
MPerBlock
,
index_t
NPerBlock
,
typename
CGridDesc_M_N
,
index_t
M01
=
1
>
struct
BlockToCTileMap_KSplit_M00_N0_M01Adapt
{
static
constexpr
auto
I0
=
Number
<
0
>
{};
...
...
@@ -183,7 +182,6 @@ struct BlockToCTileMap_KSplit_M00_N0_M01Adapt
__host__
__device__
BlockToCTileMap_KSplit_M00_N0_M01Adapt
()
=
default
;
__host__
__device__
BlockToCTileMap_KSplit_M00_N0_M01Adapt
(
const
CGridDesc_M_N
&
c_grid_desc_m_n
,
index_t
M01
=
8
,
index_t
KSplit
=
1
)
:
M01_
(
M01
),
KSplit_
(
KSplit
),
c_grid_desc_m_n_
(
c_grid_desc_m_n
)
{
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp
View file @
3f032f25
...
...
@@ -268,7 +268,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4
const
CMNGridDesc
&
c_m_n_grid_desc
,
index_t
/* M01 */
,
index_t
/* N01 */
,
index_t
KBatch
)
{
return
BlockToCTileMap_KSplit_M00_N0_M01Adapt
<
MPerBlock
,
NPerBlock
,
CMNGridDesc
>
(
c_m_n_grid_desc
,
1
,
KBatch
);
c_m_n_grid_desc
,
KBatch
);
}
using
CM0N0M1N1M2M3M4N2GridDesc
=
decltype
(
MakeCM0N0M1N1M2M3M4N2GridDescriptor
(
CMNGridDesc
{}));
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp
View file @
3f032f25
...
...
@@ -242,7 +242,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
const
CMNGridDesc
&
c_m_n_grid_desc
,
index_t
/* M01 */
,
index_t
/* N01 */
,
index_t
KBatch
)
{
return
BlockToCTileMap_KSplit_M00_N0_M01Adapt
<
MPerBlock
,
NPerBlock
,
CMNGridDesc
>
(
c_m_n_grid_desc
,
1
,
KBatch
);
c_m_n_grid_desc
,
KBatch
);
}
__host__
__device__
static
constexpr
auto
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment