Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
04ea3540
Commit
04ea3540
authored
Nov 11, 2025
by
linhai1
Browse files
support fp8_e4m3.
parent
50f7ea0f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
3 deletions
+3
-3
python/sglang/srt/layers/attention/dcu_mla_backend.py
python/sglang/srt/layers/attention/dcu_mla_backend.py
+3
-3
No files found.
python/sglang/srt/layers/attention/dcu_mla_backend.py
View file @
04ea3540
...
...
@@ -465,9 +465,9 @@ class DCUMLABackend(AttentionBackend):
getattr
(
torch
,
"float8_e5m2fnuz"
,
None
),
):
if
k_cache_reshaped
.
dtype
==
torch
.
float8_e4m3fnuz
:
k_cache_
reshaped
=
k_cache_reshaped
.
view
(
torch
.
float
8_e4m3
fn
)
k
v
_cache_
dtype
=
"fp
8_e4m3
"
elif
k_cache_reshaped
.
dtype
==
torch
.
float8_e5m2fnuz
:
k_cache_
reshaped
=
k_cache_reshaped
.
view
(
torch
.
float
8_e5m2
)
k
v
_cache_
dtype
=
"fp
8_e5m2
"
k_scale
=
layer
.
k_scale
if
layer
.
k_scale
is
not
None
else
torch
.
tensor
([
1.0
],
dtype
=
torch
.
float32
,
device
=
reshape_q
.
device
)
o
=
self
.
_call_fp8_decode
(
reshape_q
,
...
...
@@ -476,7 +476,7 @@ class DCUMLABackend(AttentionBackend):
(
forward_batch
.
seq_lens
+
self
.
num_draft_tokens
).
to
(
torch
.
int32
),
layer
.
scaling
,
k_scale
.
to
(
torch
.
float32
),
kv_cache_dtype
=
self
.
data_
type
,
kv_cache_dtype
=
kv_cache_d
type
,
)
else
:
o
=
self
.
_call_decode
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment