Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
7e2b9da3
Commit
7e2b9da3
authored
Oct 16, 2023
by
Adam Osewski
Browse files
B2E map calculation of workspace size
parent
98220c32
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
2 deletions
+17
-2
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
+17
-2
No files found.
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
View file @
7e2b9da3
...
@@ -1176,7 +1176,7 @@ struct BlockToCTileMap_LinearKSplit
...
@@ -1176,7 +1176,7 @@ struct BlockToCTileMap_LinearKSplit
{
{
}
}
__host__
__device__
constexpr
index_t
CalculateGridSize
(
index_t
M
,
index_t
N
)
__host__
__device__
constexpr
index_t
CalculateGridSize
(
index_t
M
,
index_t
N
)
const
{
{
const
auto
M0
=
math
::
integer_divide_ceil
(
M
,
MPerBlock
);
const
auto
M0
=
math
::
integer_divide_ceil
(
M
,
MPerBlock
);
const
auto
N0
=
math
::
integer_divide_ceil
(
N
,
NPerBlock
);
const
auto
N0
=
math
::
integer_divide_ceil
(
N
,
NPerBlock
);
...
@@ -1185,7 +1185,8 @@ struct BlockToCTileMap_LinearKSplit
...
@@ -1185,7 +1185,8 @@ struct BlockToCTileMap_LinearKSplit
}
}
template
<
typename
CGridDesc_M_N
>
template
<
typename
CGridDesc_M_N
>
__host__
__device__
constexpr
index_t
CalculateGridSize
(
const
CGridDesc_M_N
&
c_grid_desc_m_n
)
__host__
__device__
constexpr
index_t
CalculateGridSize
(
const
CGridDesc_M_N
&
c_grid_desc_m_n
)
const
{
{
return
CalculateGridSize
(
c_grid_desc_m_n
.
GetLength
(
I0
),
c_grid_desc_m_n
.
GetLength
(
I1
));
return
CalculateGridSize
(
c_grid_desc_m_n
.
GetLength
(
I0
),
c_grid_desc_m_n
.
GetLength
(
I1
));
}
}
...
@@ -1267,6 +1268,20 @@ struct BlockToCTileMap_LinearKSplit
...
@@ -1267,6 +1268,20 @@ struct BlockToCTileMap_LinearKSplit
__host__
__device__
index_t
GetTileNIdx
()
const
{
return
N0_idx_
;
}
__host__
__device__
index_t
GetTileNIdx
()
const
{
return
N0_idx_
;
}
__host__
__device__
index_t
GetTileKIdx
()
const
{
return
K0_idx_
;
}
__host__
__device__
index_t
GetTileKIdx
()
const
{
return
K0_idx_
;
}
static
__host__
uint32_t
GetAccWorkspaceSize
(
uint32_t
acc_element_bytes
,
uint32_t
grid_size
)
{
static
constexpr
uint32_t
alignment
=
128
;
uint32_t
acc_buffer_bytes
=
MPerBlock
*
NPerBlock
*
grid_size
*
acc_element_bytes
;
return
(
acc_buffer_bytes
+
alignment
-
1
)
/
alignment
*
alignment
;
}
static
__device__
uint32_t
GetAccWorkspaceSize
(
uint32_t
acc_element_bytes
)
{
static
constexpr
uint32_t
alignment
=
128
;
uint32_t
acc_buffer_bytes
=
MPerBlock
*
NPerBlock
*
get_grid_size
()
*
acc_element_bytes
;
return
(
acc_buffer_bytes
+
alignment
-
1
)
/
alignment
*
alignment
;
}
private:
private:
index_t
M_
;
index_t
M_
;
index_t
N_
;
index_t
N_
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment