Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
0cd587d9
Commit
0cd587d9
authored
Dec 01, 2022
by
aska-0096
Browse files
workaround for incorrect HIP warpSize return value
parent
9adf2e60
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
3 deletions
+26
-3
include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp
include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp
+1
-1
include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp
include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp
+2
-2
include/ck/utility/common_header.hpp
include/ck/utility/common_header.hpp
+23
-0
No files found.
include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp
View file @
0cd587d9
...
@@ -315,7 +315,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma
...
@@ -315,7 +315,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma
using
DefaultBlock2CTileMap
=
using
DefaultBlock2CTileMap
=
remove_cvref_t
<
decltype
(
MakeDefaultBlock2CTileMap
(
CGridDesc_M_N
{},
1
,
1
))
>
;
remove_cvref_t
<
decltype
(
MakeDefaultBlock2CTileMap
(
CGridDesc_M_N
{},
1
,
1
))
>
;
template
<
bool
HasMainKBlockLoop
,
typename
Block2CTileMap
>
template
<
bool
HasMainKBlockLoop
,
typename
Block2CTileMap
=
DefaultBlock2CTileMap
>
__device__
static
void
__device__
static
void
Run
(
const
FloatAB
*
__restrict__
p_a_grid
,
Run
(
const
FloatAB
*
__restrict__
p_a_grid
,
const
FloatAB
*
__restrict__
p_b_grid
,
const
FloatAB
*
__restrict__
p_b_grid
,
...
...
include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp
View file @
0cd587d9
...
@@ -158,8 +158,8 @@ struct WmmaSelector
...
@@ -158,8 +158,8 @@ struct WmmaSelector
return
WmmaInstr
::
wmma_i32_16x16x16_iu4
;
return
WmmaInstr
::
wmma_i32_16x16x16_iu4
;
}
}
#endif
#endif
// get_warp_size do not return the correct wavesize, hardcode to 32 as workaround
static
constexpr
auto
selected_wmma
=
wmma_type
<
GetWmma
<
src_type
,
dst_type
,
MPerWmma
,
NPerWmma
>
(),
get_warp_size
()
>
{};
static
constexpr
auto
selected_wmma
=
wmma_type
<
GetWmma
<
src_type
,
dst_type
,
MPerWmma
,
NPerWmma
>
(),
Number
<
32
>
{}
>
{};
__host__
__device__
constexpr
WmmaSelector
()
__host__
__device__
constexpr
WmmaSelector
()
{
{
...
...
include/ck/utility/common_header.hpp
View file @
0cd587d9
...
@@ -49,3 +49,26 @@
...
@@ -49,3 +49,26 @@
#ifdef CK_USE_AMD_MFMA
#ifdef CK_USE_AMD_MFMA
#include "ck/utility/amd_xdlops.hpp"
#include "ck/utility/amd_xdlops.hpp"
#endif
#endif
#include <string_view>
template
<
typename
T
>
constexpr
auto
type_name
()
{
std
::
string_view
name
,
prefix
,
suffix
;
#ifdef __clang__
name
=
__PRETTY_FUNCTION__
;
prefix
=
"auto type_name() [T = "
;
suffix
=
"]"
;
#elif defined(__GNUC__)
name
=
__PRETTY_FUNCTION__
;
prefix
=
"constexpr auto type_name() [with T = "
;
suffix
=
"]"
;
#elif defined(_MSC_VER)
name
=
__FUNCSIG__
;
prefix
=
"auto __cdecl type_name<"
;
suffix
=
">(void)"
;
#endif
name
.
remove_prefix
(
prefix
.
size
());
name
.
remove_suffix
(
suffix
.
size
());
return
name
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment