Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
888317e6
Unverified
Commit
888317e6
authored
Jan 04, 2025
by
Bartłomiej Kocot
Committed by
GitHub
Jan 04, 2025
Browse files
Fix universal gemm profiler for pk_i4_t (#1790)
* Fix universal gemm profiler for pk_i4_t * fix
parent
37b35146
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
6 deletions
+28
-6
include/ck/library/utility/host_tensor.hpp
include/ck/library/utility/host_tensor.hpp
+11
-2
include/ck/utility/type_convert.hpp
include/ck/utility/type_convert.hpp
+14
-1
profiler/include/profiler/profile_gemm_universal_impl.hpp
profiler/include/profiler/profile_gemm_universal_impl.hpp
+3
-3
No files found.
include/ck/library/utility/host_tensor.hpp
View file @
888317e6
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
5
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -44,10 +44,19 @@ std::ostream& LogRangeAsType(std::ostream& os, Range&& range, std::string delim)
else
os
<<
delim
;
if
constexpr
(
std
::
is_same_v
<
T
,
ck
::
f8_t
>
||
std
::
is_same_v
<
T
,
ck
::
bf8_t
>
)
using
RangeType
=
ck
::
remove_cvref_t
<
decltype
(
v
)
>
;
if
constexpr
(
std
::
is_same_v
<
RangeType
,
ck
::
f8_t
>
||
std
::
is_same_v
<
RangeType
,
ck
::
bf8_t
>
||
std
::
is_same_v
<
RangeType
,
ck
::
bhalf_t
>
)
{
os
<<
ck
::
type_convert
<
float
>
(
v
);
}
else
if
constexpr
(
std
::
is_same_v
<
RangeType
,
ck
::
pk_i4_t
>
)
{
const
auto
packed_floats
=
ck
::
type_convert
<
ck
::
float2_t
>
(
v
);
const
ck
::
vector_type
<
float
,
2
>
vector_of_floats
{
packed_floats
};
os
<<
vector_of_floats
.
template
AsType
<
float
>()[
ck
::
Number
<
0
>
{}]
<<
delim
<<
vector_of_floats
.
template
AsType
<
float
>()[
ck
::
Number
<
1
>
{}];
}
else
{
os
<<
static_cast
<
T
>
(
v
);
...
...
include/ck/utility/type_convert.hpp
View file @
888317e6
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
5
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -465,6 +465,19 @@ inline __host__ __device__ float2_t type_convert<float2_t, f8x2_ocp_t>(f8x2_ocp_
#endif
}
template
<
>
inline
__host__
__device__
float2_t
type_convert
<
float2_t
,
pk_i4_t
>
(
pk_i4_t
x
)
{
uint8_t
x_u8
=
ck
::
bit_cast
<
uint8_t
>
(
x
);
uint8_t
x_l
=
(
x_u8
&
0x0f
)
>>
0
;
uint8_t
x_h
=
(
x_u8
&
0xf0
)
>>
4
;
auto
l_f32
=
ck
::
type_convert
<
float
>
(
x_l
);
auto
h_f32
=
ck
::
type_convert
<
float
>
(
x_h
);
return
{
l_f32
,
h_f32
};
}
template
<
>
inline
__host__
__device__
half2_t
type_convert
<
half2_t
,
float2_t
>
(
float2_t
x
)
{
...
...
profiler/include/profiler/profile_gemm_universal_impl.hpp
View file @
888317e6
// SPDX-License-Identifier: MIT
// Copyright (c) 2023-202
4
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023-202
5
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -177,7 +177,7 @@ bool profile_gemm_universal_impl(int do_verification,
}
}
if
(
is_same_v
<
BDataType
,
pk_i4_t
>
&&
is_same_v
<
ADataType
,
half_t
>
)
if
constexpr
(
is_same_v
<
BDataType
,
pk_i4_t
>
&&
is_same_v
<
ADataType
,
half_t
>
)
{
// vector pk_i4x4 permute
for
(
int
i
=
0
;
i
<
N
;
i
++
)
...
...
@@ -188,7 +188,7 @@ bool profile_gemm_universal_impl(int do_verification,
for
(
int
k
=
0
;
k
<
4
;
k
++
)
{
int
i4x2
=
b_k_n_permute
(
j
+
k
*
2
,
i
);
int
i4x2
=
b_k_n_permute
(
j
+
k
*
2
,
i
)
.
data
;
input
[
k
*
2
+
0
]
=
(
i4x2
>>
4
)
&
0xf
;
input
[
k
*
2
+
1
]
=
(
i4x2
>>
0
)
&
0xf
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment