Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ec58c10c
Unverified
Commit
ec58c10c
authored
Dec 21, 2025
by
Kevin McKay
Committed by
GitHub
Dec 21, 2025
Browse files
[Misc] Fix quantization-related typos (#31116)
Signed-off-by:
c0de128
<
kevin.mckay@outlook.com
>
parent
8c084de5
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
7 additions
and
7 deletions
+7
-7
tests/kernels/moe/modular_kernel_tools/common.py
tests/kernels/moe/modular_kernel_tools/common.py
+5
-5
tests/quantization/test_fp8.py
tests/quantization/test_fp8.py
+1
-1
vllm/utils/deep_gemm.py
vllm/utils/deep_gemm.py
+1
-1
No files found.
tests/kernels/moe/modular_kernel_tools/common.py
View file @
ec58c10c
...
...
@@ -258,16 +258,16 @@ class Config:
f
"
{
self
.
fe_supported_types
()
}
."
)
# Check block quanization support
is_block_quatized
=
self
.
quant_block_shape
is
not
None
if
is_block_quatized
and
self
.
quant_dtype
is
None
:
# Check block quan
t
ization support
is_block_qua
n
tized
=
self
.
quant_block_shape
is
not
None
if
is_block_qua
n
tized
and
self
.
quant_dtype
is
None
:
return
False
,
"No block quantization support."
if
is_block_quatized
and
not
self
.
is_block_quant_supported
():
if
is_block_qua
n
tized
and
not
self
.
is_block_quant_supported
():
return
False
,
"Mismatched block quantization support."
# deep_gemm only works with block-quantized
if
self
.
needs_deep_gemm
()
and
not
is_block_quatized
:
if
self
.
needs_deep_gemm
()
and
not
is_block_qua
n
tized
:
return
False
,
"Needs DeepGEMM but not block quantized."
# Check dependencies (turn into asserts?)
...
...
tests/quantization/test_fp8.py
View file @
ec58c10c
...
...
@@ -217,7 +217,7 @@ def test_scaled_fp8_quant(dtype) -> None:
ref_y
,
inv_scale
=
ops
.
scaled_fp8_quant
(
x
,
None
)
ref_y
=
per_tensor_dequantize
(
ref_y
,
inv_scale
,
dtype
)
# Reference dynamic quantizaton
# Reference dynamic quantizat
i
on
y
=
quantize_ref
(
x
,
inv_scale
)
torch
.
testing
.
assert_close
(
ref_y
,
per_tensor_dequantize
(
y
,
inv_scale
,
dtype
))
...
...
vllm/utils/deep_gemm.py
View file @
ec58c10c
...
...
@@ -389,7 +389,7 @@ def should_use_deepgemm_for_fp8_linear(
# Verify DeepGEMM N/K dims requirements
# NOTE: Also synchronized with test_w8a8_block_fp8_deep_gemm_matmul
# test inside kernels/quatization/test_block_fp8.py
# test inside kernels/qua
n
tization/test_block_fp8.py
N_MULTIPLE
=
64
K_MULTIPLE
=
128
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment