Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
f8cbbd1b
Commit
f8cbbd1b
authored
Dec 01, 2023
by
Adam Osewski
Browse files
Change return type from inxed_t to uint32_t for GetFlagValue.
Update doc.
parent
f41a265a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
15 deletions
+17
-15
include/ck/utility/work_scheduling.hpp
include/ck/utility/work_scheduling.hpp
+17
-15
No files found.
include/ck/utility/work_scheduling.hpp
View file @
f8cbbd1b
...
@@ -65,18 +65,19 @@ class StridedReductionTileLoop
...
@@ -65,18 +65,19 @@ class StridedReductionTileLoop
/// @brief Calculate this workgroup flag index.
/// @brief Calculate this workgroup flag index.
///
///
/// @note Note this scheduler intentionaly does not have flag index as its member, since
/// @note Note this scheduler intentionaly does not have flag index as its member, since
///
the number of `k_tiles` may change when iterating (ie. in grouped gemm,
///
current workgroup may process tiles across different MN-output tiles or
/// different
groups may have different `k_tiles` in K dimension
).
///
acorss
different
GEMMs (grouped gemm
).
///
///
/// @param[in] k_tiles The number of data tiles in the reduced dimension.
/// @param[in] k_tiles The number of data tiles in the reduced dimension.
/// @param[in] output_tile_idx The output (MN) tile index (of current GEMM).
/// @param[in] output_tile_idx The output (MN) linear tile index (of current GEMM).
/// @param[in] output_tile_idx_offset The output tile index offset.
/// @param[in] output_tile_idx_offset The accumulated offset of output tiles from previous
/// GEMMs.
///
///
/// @return The workgroup flag index.
/// @return The workgroup flag index.
///
///
__device__
in
dex
_t
GetWorkgroupFlagIdx
(
index_t
k_tiles
,
__device__
u
in
t32
_t
GetWorkgroupFlagIdx
(
index_t
k_tiles
,
index_t
output_tile_idx
,
index_t
output_tile_idx
,
index_t
output_tile_idx_offset
)
const
index_t
output_tile_idx_offset
)
const
{
{
return
(
output_tile_idx
+
output_tile_idx_offset
)
%
GetFlagCount
(
k_tiles
);
return
(
output_tile_idx
+
output_tile_idx_offset
)
%
GetFlagCount
(
k_tiles
);
}
}
...
@@ -91,8 +92,9 @@ class StridedReductionTileLoop
...
@@ -91,8 +92,9 @@ class StridedReductionTileLoop
__device__
void
__device__
void
FlagFinished
(
index_t
k_tiles
,
index_t
output_tile_idx
,
index_t
output_tile_idx_offset
)
FlagFinished
(
index_t
k_tiles
,
index_t
output_tile_idx
,
index_t
output_tile_idx_offset
)
{
{
finished_block_flags_
.
inc
(
const
auto
fidx
=
GetWorkgroupFlagIdx
(
k_tiles
,
output_tile_idx
,
output_tile_idx_offset
);
GetWorkgroupFlagIdx
(
k_tiles
,
output_tile_idx
,
output_tile_idx_offset
));
finished_block_flags_
.
inc
(
fidx
);
}
}
///
///
...
@@ -149,12 +151,12 @@ class StridedReductionTileLoop
...
@@ -149,12 +151,12 @@ class StridedReductionTileLoop
/// @param[in] output_tile_idx The output (MN) tile index.
/// @param[in] output_tile_idx The output (MN) tile index.
/// @param[in] output_tile_idx_offset The output tile index offset.
/// @param[in] output_tile_idx_offset The output tile index offset.
///
///
__device__
in
dex
_t
GetFlagValue
(
index_t
k_tiles
,
__device__
u
in
t32
_t
GetFlagValue
(
index_t
k_tiles
,
index_t
output_tile_idx
,
index_t
output_tile_idx
,
index_t
output_tile_idx_offset
)
const
index_t
output_tile_idx_offset
)
const
{
{
return
static_cast
<
index_t
>
(
finished_block_flags_
.
ld
(
return
finished_block_flags_
.
ld
(
GetWorkgroupFlagIdx
(
k_tiles
,
output_tile_idx
,
output_tile_idx_offset
))
)
;
GetWorkgroupFlagIdx
(
k_tiles
,
output_tile_idx
,
output_tile_idx_offset
));
}
}
const
index_t
tile_count_
;
const
index_t
tile_count_
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment