Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
bd5008af
Commit
bd5008af
authored
Jan 21, 2025
by
Adam Osewski
Browse files
Small refactoring + doc
parent
bee700b0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
25 additions
and
19 deletions
+25
-19
include/ck_tile/core/tensor/tile_window.hpp
include/ck_tile/core/tensor/tile_window.hpp
+20
-3
include/ck_tile/ops/gemm/block/block_universal_gemm_as_bs_cr.hpp
.../ck_tile/ops/gemm/block/block_universal_gemm_as_bs_cr.hpp
+1
-1
include/ck_tile/ops/gemm/pipeline/gemm_pipeline_agmem_bgmem_creg_v1.hpp
...e/ops/gemm/pipeline/gemm_pipeline_agmem_bgmem_creg_v1.hpp
+4
-15
No files found.
include/ck_tile/core/tensor/tile_window.hpp
View file @
bd5008af
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
5
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#pragma once
...
@@ -18,8 +18,17 @@
...
@@ -18,8 +18,17 @@
namespace
ck_tile
{
namespace
ck_tile
{
// Note: this tile window do not support single issue
/**
// you need to use tile_window_linear structure for this purpose
* @brief This class provides tile (windowed) view and access to the device memory.
*
* @note this tile window do not support single issue you need to use tile_window_linear
* structure for this purpose
*
* @tparam BottomTensorView_ Class describing & holding device tensor memory.
* @tparam WindowLengths_ Spatial sizes of windowed view on tensor.
* @tparam StaticTileDistribution_ Thread distribution (mapping) into Tile dimensions
* @tparam NumCoord TBD
*/
template
<
typename
BottomTensorView_
,
template
<
typename
BottomTensorView_
,
typename
WindowLengths_
,
typename
WindowLengths_
,
typename
StaticTileDistribution_
,
typename
StaticTileDistribution_
,
...
@@ -1009,6 +1018,14 @@ CK_TILE_DEVICE void move_tile_window(
...
@@ -1009,6 +1018,14 @@ CK_TILE_DEVICE void move_tile_window(
window
.
move
(
step
);
window
.
move
(
step
);
}
}
/**
* @brief This class provides description of tile windowed view on the device memory.
*
* @note This class does not provide any functions to read or modify device memory.
*
* @tparam BottomTensorView_ Class describing & holding device tensor memory.
* @tparam WindowLengths_ Spatial sizes of windowed view on tensor.
*/
template
<
typename
BottomTensorView_
,
typename
WindowLengths_
>
template
<
typename
BottomTensorView_
,
typename
WindowLengths_
>
struct
tile_window_with_static_lengths
struct
tile_window_with_static_lengths
{
{
...
...
include/ck_tile/ops/gemm/block/block_universal_gemm_as_bs_cr.hpp
View file @
bd5008af
...
@@ -80,7 +80,7 @@ struct BlockUniversalGemmAsBsCr
...
@@ -80,7 +80,7 @@ struct BlockUniversalGemmAsBsCr
static
constexpr
index_t
InterWaveSchedulingMacClusters
=
1
;
static
constexpr
index_t
InterWaveSchedulingMacClusters
=
1
;
static
constexpr
index_t
KPack
=
WarpGemm
::
kKPerThread
;
static
constexpr
index_t
KPack
=
WarpGemm
::
kKPerThread
;
static
constexpr
index_t
KPerThread
=
K
PerBlock
/
WarpGemm
::
kK
*
KPack
;
static
constexpr
index_t
KPerThread
=
K
IterPerWarp
*
KPack
;
static
constexpr
index_t
KRepeat
=
KPerThread
/
KPack
;
static
constexpr
index_t
KRepeat
=
KPerThread
/
KPack
;
};
};
...
...
include/ck_tile/ops/gemm/pipeline/gemm_pipeline_agmem_bgmem_creg_v1.hpp
View file @
bd5008af
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
5
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#pragma once
...
@@ -29,25 +29,14 @@ struct GemmPipelineAGmemBGmemCRegV1
...
@@ -29,25 +29,14 @@ struct GemmPipelineAGmemBGmemCRegV1
static
constexpr
index_t
kNPerBlock
=
BlockGemmShape
::
kN
;
static
constexpr
index_t
kNPerBlock
=
BlockGemmShape
::
kN
;
static
constexpr
index_t
kKPerBlock
=
BlockGemmShape
::
kK
;
static
constexpr
index_t
kKPerBlock
=
BlockGemmShape
::
kK
;
static
constexpr
index_t
VectorSizeA
=
P
roblem
::
VectorSizeA
;
static
constexpr
index_t
VectorSizeA
=
P
olicy
::
template
GetVectorSizeA
<
Problem
>()
;
static
constexpr
index_t
VectorSizeB
=
P
roblem
::
VectorSizeB
;
static
constexpr
index_t
VectorSizeB
=
P
olicy
::
template
GetVectorSizeB
<
Problem
>()
;
static
constexpr
index_t
VectorSizeC
=
P
roblem
::
VectorSizeC
;
static
constexpr
index_t
VectorSizeC
=
P
olicy
::
template
GetVectorSizeC
<
Problem
>()
;
static
constexpr
bool
kPadM
=
Problem
::
kPadM
;
static
constexpr
bool
kPadM
=
Problem
::
kPadM
;
static
constexpr
bool
kPadN
=
Problem
::
kPadN
;
static
constexpr
bool
kPadN
=
Problem
::
kPadN
;
static
constexpr
bool
kPadK
=
Problem
::
kPadK
;
static
constexpr
bool
kPadK
=
Problem
::
kPadK
;
CK_TILE_HOST_DEVICE
static
constexpr
index_t
GetStaticLdsSize
()
{
return
integer_divide_ceil
(
sizeof
(
ADataType
)
*
Policy
::
template
MakeALdsBlockDescriptor
<
Problem
>().
get_element_space_size
(),
16
)
*
16
+
sizeof
(
BDataType
)
*
Policy
::
template
MakeBLdsBlockDescriptor
<
Problem
>().
get_element_space_size
();
}
CK_TILE_HOST_DEVICE
static
constexpr
index_t
GetSmemSize
()
CK_TILE_HOST_DEVICE
static
constexpr
index_t
GetSmemSize
()
{
{
return
Policy
::
template
GetSmemSize
<
Problem
>();
return
Policy
::
template
GetSmemSize
<
Problem
>();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment