Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
FlashMLA
Commits
3a477917
Commit
3a477917
authored
Feb 24, 2026
by
zhanghj2
Browse files
FLASH_MLA_BF16_TYPE控制bf16转换精度
parent
4c0bb04e
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
17 additions
and
0 deletions
+17
-0
csrc/gfx9/decode/combine/combine.cu
csrc/gfx9/decode/combine/combine.cu
+5
-0
csrc/utils.h
csrc/utils.h
+7
-0
setup.py
setup.py
+5
-0
No files found.
csrc/gfx9/decode/combine/combine.cu
View file @
3a477917
...
...
@@ -167,7 +167,12 @@ flash_fwd_mla_combine_kernel(const CombineParams params) {
// }
auto
float2bf16
=
[]
(
float
s
)
->
uint16_t
{
uint32_t
x32
=
reinterpret_cast
<
uint32_t
const
&>
(
s
);
#ifndef FLASH_MLA_BF16_TYPE
#define FLASH_MLA_BF16_TYPE 0
#endif
#if FLASH_MLA_BF16_TYPE == 1
x32
+=
0x8000u
;
#endif
return
uint16_t
(
x32
>>
16
);
};
...
...
csrc/utils.h
View file @
3a477917
...
...
@@ -290,7 +290,14 @@ __forceinline__ __device__ auto convert_type(Tensor<Engine, Layout> const &tenso
#else
{
if
constexpr
(
std
::
is_same_v
<
To_type
,
cutlass
::
bfloat16_t
>
)
{
#ifndef FLASH_MLA_BF16_TYPE
#define FLASH_MLA_BF16_TYPE 0
#endif
#if FLASH_MLA_BF16_TYPE == 0
cutlass
::
NumericArrayConverter
<
To_type
,
From_type
,
numel
,
cutlass
::
FloatRoundStyle
::
round_toward_zero
>
convert_op
;
#else
cutlass
::
NumericArrayConverter
<
To_type
,
From_type
,
numel
,
cutlass
::
FloatRoundStyle
::
round_half_ulp_truncate
>
convert_op
;
#endif
*
result_ptr
=
convert_op
(
*
reinterpret_cast
<
const
cutlass
::
Array
<
From_type
,
numel
>
*>
(
tensor
.
data
()));
}
else
{
cutlass
::
NumericArrayConverter
<
To_type
,
From_type
,
numel
>
convert_op
;
...
...
setup.py
View file @
3a477917
...
...
@@ -19,9 +19,14 @@ def is_flag_set(flag: str) -> bool:
return
os
.
getenv
(
flag
,
"FALSE"
).
lower
()
in
[
"true"
,
"1"
,
"y"
,
"yes"
]
def
get_features_args
():
bf16_type
=
os
.
getenv
(
"FLASH_MLA_BF16_TYPE"
,
"0"
)
assert
bf16_type
==
"0"
or
bf16_type
==
"1"
,
"bf16_type must be 0 or 1"
bf16_mode_names
=
{
"0"
:
"round_toward_zero"
,
"1"
:
"round_half_ulp_truncate"
}
print
(
f
"Using BFloat16 rounding mode:
{
bf16_mode_names
.
get
(
bf16_type
,
'unknown'
)
}
"
)
features_args
=
[]
if
is_flag_set
(
"FLASH_MLA_DISABLE_FP16"
):
features_args
.
append
(
"-DFLASH_MLA_DISABLE_FP16"
)
features_args
.
append
(
f
"-DFLASH_MLA_BF16_TYPE=
{
bf16_type
}
"
)
return
features_args
def
get_arch_flags
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment