Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
67532a1a
Unverified
Commit
67532a1a
authored
Sep 16, 2025
by
Michael Goin
Committed by
GitHub
Sep 16, 2025
Browse files
[UX] Remove "quantization is not fully optimized yet" log (#25012)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
5672ba90
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
21 deletions
+0
-21
vllm/config/__init__.py
vllm/config/__init__.py
+0
-21
No files found.
vllm/config/__init__.py
View file @
67532a1a
...
...
@@ -1086,22 +1086,6 @@ class ModelConfig:
def
_verify_quantization
(
self
)
->
None
:
supported_quantization
=
me_quant
.
QUANTIZATION_METHODS
optimized_quantization_methods
=
[
"fp8"
,
"modelopt"
,
"gptq_marlin_24"
,
"gptq_marlin"
,
"awq_marlin"
,
"fbgemm_fp8"
,
"compressed-tensors"
,
"experts_int8"
,
"quark"
,
"modelopt_fp4"
,
"bitblas"
,
"gptq_bitblas"
,
"inc"
,
"petit_nvfp4"
,
]
if
self
.
quantization
is
not
None
:
self
.
quantization
=
cast
(
me_quant
.
QuantizationMethods
,
self
.
quantization
)
...
...
@@ -1183,11 +1167,6 @@ class ModelConfig:
f
"be one of
{
supported_quantization
}
."
)
from
vllm.platforms
import
current_platform
current_platform
.
verify_quantization
(
self
.
quantization
)
if
self
.
quantization
not
in
optimized_quantization_methods
:
logger
.
warning
(
"%s quantization is not fully "
"optimized yet. The speed can be slower than "
"non-quantized models."
,
self
.
quantization
)
def
_verify_cuda_graph
(
self
)
->
None
:
# The `max_seq_len_to_capture` was incorrectly
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment