Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
73105554
Unverified
Commit
73105554
authored
Apr 07, 2026
by
Jinzhen Lin
Committed by
GitHub
Apr 07, 2026
Browse files
[Bugfix] Fix marlin nvfp4 rescaling (#37502)
Signed-off-by:
Jinzhen Lin
<
jinzhen.ljz@antgroup.com
>
parent
96b5004b
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
4 deletions
+6
-4
vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py
...el_executor/layers/quantization/utils/marlin_utils_fp4.py
+6
-4
No files found.
vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py
View file @
73105554
...
...
@@ -43,9 +43,9 @@ def _nvfp4_compute_scale_factor(
ws_float
=
marlin_scales
.
float
()
*
(
2
**
7
)
nonzero_mask
=
ws_float
>
0
if
nonzero_mask
.
any
():
m
in
_val
=
ws_float
[
nonzero_mask
].
m
in
()
if
m
in
_val
<
2
:
sf
=
(
2
/
m
in
_val
).
log2
().
ceil
().
exp2
()
m
ax
_val
=
ws_float
[
nonzero_mask
].
m
ax
()
if
m
ax
_val
<
448
*
(
2
**
7
)
:
sf
=
(
448
*
(
2
**
7
)
/
m
ax
_val
).
log2
().
floor
().
exp2
()
return
sf
.
item
()
return
1.0
...
...
@@ -105,7 +105,9 @@ def nvfp4_marlin_process_scales(
if
scale_factor
>
1.0
:
marlin_scales
=
(
marlin_scales
.
float
()
*
scale_factor
).
to
(
torch
.
half
)
marlin_scales
=
(
marlin_scales
*
(
2
**
7
)).
view
(
torch
.
int16
)
<<
1
marlin_scales
=
marlin_scales
*
(
2
**
7
)
marlin_scales
[
marlin_scales
<
2
]
=
0
marlin_scales
=
marlin_scales
.
view
(
torch
.
int16
)
<<
1
marlin_scales
=
marlin_scales
.
view
(
torch
.
float8_e4m3fn
)
marlin_scales
=
marlin_scales
[:,
1
::
2
].
contiguous
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment