Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4896d0c2
Unverified
Commit
4896d0c2
authored
Feb 04, 2025
by
Kyle Sayers
Committed by
GitHub
Feb 03, 2025
Browse files
[Quant] Fix use_mla TypeError and support loading pure-sparsity Compressed Tensors configs (#12711)
parent
bb392af4
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
2 deletions
+8
-2
vllm/config.py
vllm/config.py
+3
-2
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
...ers/quantization/compressed_tensors/compressed_tensors.py
+5
-0
No files found.
vllm/config.py
View file @
4896d0c2
...
...
@@ -1000,8 +1000,9 @@ class ModelConfig:
# have fp8 for both weights and activations.
if
self
.
quantization
==
"compressed-tensors"
:
quant_config
=
self
.
_parse_quant_hf_config
()
for
group_name
,
cfg
in
quant_config
.
get
(
"config_groups"
,
(
""
,
{})).
items
():
for
group_name
,
cfg
in
quant_config
.
get
(
"config_groups"
,
{
""
:
{}
}).
items
():
act_cfg
=
cfg
.
get
(
"input_activations"
,
{})
act_type
=
None
if
act_cfg
is
None
else
act_cfg
.
get
(
"type"
,
""
)
w_cfg
=
cfg
.
get
(
"weights"
,
{})
...
...
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
View file @
4896d0c2
...
...
@@ -424,6 +424,11 @@ class CompressedTensorsConfig(QuantizationConfig):
or
input_quant
is
not
None
,
weight_quant
=
weight_quant
,
input_quant
=
input_quant
)
elif
weight_quant
is
None
:
logger
.
warning_once
(
"Acceleration for non-quantized schemes is "
"not supported by Compressed Tensors. "
"Falling back to UnquantizedLinearMethod"
)
return
None
else
:
# Find the quant_scheme
scheme
=
self
.
_get_scheme_from_parts
(
# type: ignore
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment