Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
670ce6b9
"vscode:/vscode.git/clone" did not exist on "b7184e9db577a4ba391f5c33eb0a3f4e147204e1"
Commit
670ce6b9
authored
May 04, 2023
by
Po-Yen, Chen
Browse files
Make 3rd type parameter optional
parent
b968fd17
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
34 additions
and
15 deletions
+34
-15
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
+34
-15
No files found.
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
View file @
670ce6b9
...
@@ -109,8 +109,11 @@ struct BlockToCTileMap_M00_N0_M01
...
@@ -109,8 +109,11 @@ struct BlockToCTileMap_M00_N0_M01
// Rows of column-vectors
// Rows of column-vectors
// This C-tile map dynamically adjusts M01 when C-tile index is out of range
// This C-tile map dynamically adjusts M01 when C-tile index is out of range
template
<
index_t
MPerBlock
,
index_t
NPerBlock
,
typename
CGridDesc_M_N
>
template
<
index_t
MPerBlock
,
index_t
NPerBlock
,
typename
CGridDesc_M_N
=
void
>
struct
BlockToCTileMap_M00_N0_M01Adapt
struct
BlockToCTileMap_M00_N0_M01Adapt
;
template
<
index_t
MPerBlock
,
index_t
NPerBlock
>
struct
BlockToCTileMap_M00_N0_M01Adapt
<
MPerBlock
,
NPerBlock
,
void
>
{
{
static
constexpr
auto
I0
=
Number
<
0
>
{};
static
constexpr
auto
I0
=
Number
<
0
>
{};
static
constexpr
auto
I1
=
Number
<
1
>
{};
static
constexpr
auto
I1
=
Number
<
1
>
{};
...
@@ -119,9 +122,8 @@ struct BlockToCTileMap_M00_N0_M01Adapt
...
@@ -119,9 +122,8 @@ struct BlockToCTileMap_M00_N0_M01Adapt
__host__
__device__
BlockToCTileMap_M00_N0_M01Adapt
()
=
default
;
__host__
__device__
BlockToCTileMap_M00_N0_M01Adapt
()
=
default
;
__host__
__device__
BlockToCTileMap_M00_N0_M01Adapt
(
const
CGridDesc_M_N
&
c_grid_desc_m_n
,
__host__
__device__
BlockToCTileMap_M00_N0_M01Adapt
(
index_t
M
,
index_t
N
,
index_t
M01
=
8
)
index_t
M01
=
8
)
:
M_
(
M
),
N_
(
N
),
M01_
(
M01
)
:
M01_
(
M01
),
c_grid_desc_m_n_
(
c_grid_desc_m_n
)
{
{
}
}
...
@@ -133,18 +135,13 @@ struct BlockToCTileMap_M00_N0_M01Adapt
...
@@ -133,18 +135,13 @@ struct BlockToCTileMap_M00_N0_M01Adapt
return
M0
*
N0
;
return
M0
*
N0
;
}
}
__host__
constexpr
index_t
CalculateGridSize
(
const
CGridDesc_M_N
&
c_grid_desc_m_n
)
const
{
return
CalculateGridSize
(
c_grid_desc_m_n
.
GetLength
(
I0
),
c_grid_desc_m_n
.
GetLength
(
I1
));
}
template
<
typename
TopIdx
>
template
<
typename
TopIdx
>
__host__
__device__
constexpr
auto
CalculateBottomIndex
(
const
TopIdx
&
idx_top
)
const
__host__
__device__
constexpr
auto
CalculateBottomIndex
(
const
TopIdx
&
idx_top
)
const
{
{
auto
block_1d_id
=
idx_top
[
I0
];
auto
block_1d_id
=
idx_top
[
I0
];
const
auto
M0
=
math
::
integer_divide_ceil
(
c_grid_desc_m_n_
.
GetLength
(
I0
)
,
MPerBlock
);
const
auto
M0
=
math
::
integer_divide_ceil
(
M_
,
MPerBlock
);
const
auto
N0
=
math
::
integer_divide_ceil
(
c_grid_desc_m_n_
.
GetLength
(
I1
)
,
NPerBlock
);
const
auto
N0
=
math
::
integer_divide_ceil
(
N_
,
NPerBlock
);
block_1d_id
=
block_1d_id
%
(
M0
*
N0
);
// swallow batch index
block_1d_id
=
block_1d_id
%
(
M0
*
N0
);
// swallow batch index
...
@@ -212,11 +209,33 @@ struct BlockToCTileMap_M00_N0_M01Adapt
...
@@ -212,11 +209,33 @@ struct BlockToCTileMap_M00_N0_M01Adapt
return
true
;
// always valid provided that user gets grid size from CalculateGridSize()
return
true
;
// always valid provided that user gets grid size from CalculateGridSize()
}
}
__host__
bool
CheckValidity
(
const
CGridDesc_M_N
&
/* c_grid_desc_m_n */
)
const
{
return
true
;
}
private:
private:
index_t
M_
;
index_t
N_
;
index_t
M01_
;
index_t
M01_
;
CGridDesc_M_N
c_grid_desc_m_n_
;
};
template
<
index_t
MPerBlock
,
index_t
NPerBlock
,
typename
CGridDesc_M_N
>
struct
BlockToCTileMap_M00_N0_M01Adapt
:
BlockToCTileMap_M00_N0_M01Adapt
<
MPerBlock
,
NPerBlock
,
void
>
{
using
Parent
=
BlockToCTileMap_M00_N0_M01Adapt
<
MPerBlock
,
NPerBlock
,
void
>
;
using
Parent
::
I0
;
using
Parent
::
I1
;
__host__
__device__
BlockToCTileMap_M00_N0_M01Adapt
(
const
CGridDesc_M_N
&
c_grid_desc_m_n
,
index_t
M01
=
8
)
:
Parent
(
c_grid_desc_m_n
.
GetLength
(
I0
),
c_grid_desc_m_n
.
GetLength
(
I1
),
M01
)
{
}
__host__
constexpr
index_t
CalculateGridSize
(
const
CGridDesc_M_N
&
c_grid_desc_m_n
)
const
{
return
Parent
::
CalculateGridSize
(
c_grid_desc_m_n
.
GetLength
(
I0
),
c_grid_desc_m_n
.
GetLength
(
I1
));
}
__host__
bool
CheckValidity
(
const
CGridDesc_M_N
&
/* c_grid_desc_m_n */
)
const
{
return
true
;
}
};
};
// 2D slices of column-vectors in 3D space
// 2D slices of column-vectors in 3D space
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment