Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
70974baf
Commit
70974baf
authored
Oct 05, 2023
by
Adam Osewski
Browse files
Change how IsFirstKSplitBlock is implemented
parent
6063db7d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
10 deletions
+13
-10
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
+13
-10
No files found.
include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp
View file @
70974baf
...
@@ -1203,8 +1203,9 @@ struct BlockToCTileMap_LinearKSplit
...
@@ -1203,8 +1203,9 @@ struct BlockToCTileMap_LinearKSplit
M0_idx_
=
block_1d_id
/
(
N0
*
KSplit_
);
M0_idx_
=
block_1d_id
/
(
N0
*
KSplit_
);
block_1d_id
=
block_1d_id
%
(
N0
*
KSplit_
);
block_1d_id
=
block_1d_id
%
(
N0
*
KSplit_
);
N0_idx_
=
block_1d_id
/
KSplit_
;
N0_idx_
=
block_1d_id
/
KSplit_
;
K0_idx_
=
block_1d_id
%
KSplit_
;
K0_idx_
=
block_1d_id
%
KSplit_
;
is_first_k_split_block_
=
K0_idx_
==
0
;
return
make_tuple
(
M0_idx_
,
N0_idx_
,
K0_idx_
);
return
make_tuple
(
M0_idx_
,
N0_idx_
,
K0_idx_
);
}
}
...
@@ -1215,8 +1216,9 @@ struct BlockToCTileMap_LinearKSplit
...
@@ -1215,8 +1216,9 @@ struct BlockToCTileMap_LinearKSplit
M0_idx_
=
__builtin_amdgcn_readfirstlane
(
block_1d_id
/
(
N0
*
KSplit_
));
M0_idx_
=
__builtin_amdgcn_readfirstlane
(
block_1d_id
/
(
N0
*
KSplit_
));
block_1d_id
=
block_1d_id
%
(
N0
*
KSplit_
);
block_1d_id
=
block_1d_id
%
(
N0
*
KSplit_
);
N0_idx_
=
__builtin_amdgcn_readfirstlane
(
block_1d_id
/
KSplit_
);
N0_idx_
=
__builtin_amdgcn_readfirstlane
(
block_1d_id
/
KSplit_
);
K0_idx_
=
__builtin_amdgcn_readfirstlane
(
block_1d_id
%
KSplit_
);
K0_idx_
=
__builtin_amdgcn_readfirstlane
(
block_1d_id
%
KSplit_
);
is_first_k_split_block_
=
K0_idx_
==
0
;
return
make_tuple
(
M0_idx_
,
N0_idx_
,
K0_idx_
);
return
make_tuple
(
M0_idx_
,
N0_idx_
,
K0_idx_
);
}
}
...
@@ -1225,6 +1227,11 @@ struct BlockToCTileMap_LinearKSplit
...
@@ -1225,6 +1227,11 @@ struct BlockToCTileMap_LinearKSplit
return
make_tuple
(
M0_idx_
,
N0_idx_
,
K0_idx_
);
return
make_tuple
(
M0_idx_
,
N0_idx_
,
K0_idx_
);
}
}
///
/// @brief Return linear output tile index.
///
/// @return The output tile index.
///
__host__
__device__
index_t
GetOutputTileIdx
()
const
__host__
__device__
index_t
GetOutputTileIdx
()
const
{
{
const
auto
N0
=
math
::
integer_divide_ceil
(
N_
,
NPerBlock
);
const
auto
N0
=
math
::
integer_divide_ceil
(
N_
,
NPerBlock
);
...
@@ -1252,14 +1259,9 @@ struct BlockToCTileMap_LinearKSplit
...
@@ -1252,14 +1259,9 @@ struct BlockToCTileMap_LinearKSplit
///
///
/// @brief Determines whether the current workgroup processed first tile in K dimension
/// @brief Determines whether the current workgroup processed first tile in K dimension
///
///
/// @param[in] tiles_per_block The number of tiles per block to process per workgroup.
///
/// @return True if the current workgroup processed first tile. False otherwise.
/// @return True if the current workgroup processed first tile. False otherwise.
///
///
__host__
__device__
bool
IsFirstKSplitBlock
(
index_t
tiles_per_block
)
const
__host__
__device__
bool
IsFirstKSplitBlock
()
const
{
return
is_first_k_split_block_
;
}
{
return
(
K0_idx_
+
1
-
tiles_per_block
)
<=
0
;
}
__host__
__device__
index_t
GetTileMIdx
()
const
{
return
M0_idx_
;
}
__host__
__device__
index_t
GetTileMIdx
()
const
{
return
M0_idx_
;
}
__host__
__device__
index_t
GetTileNIdx
()
const
{
return
N0_idx_
;
}
__host__
__device__
index_t
GetTileNIdx
()
const
{
return
N0_idx_
;
}
...
@@ -1272,6 +1274,7 @@ struct BlockToCTileMap_LinearKSplit
...
@@ -1272,6 +1274,7 @@ struct BlockToCTileMap_LinearKSplit
index_t
M0_idx_
;
index_t
M0_idx_
;
index_t
N0_idx_
;
index_t
N0_idx_
;
index_t
K0_idx_
;
index_t
K0_idx_
;
bool
is_first_k_split_block_
{
false
};
};
};
}
// namespace ck
}
// namespace ck
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment