Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0b254989
Unverified
Commit
0b254989
authored
Nov 14, 2025
by
haoyangli-amd
Committed by
GitHub
Nov 14, 2025
Browse files
[Misc] add ignore mapper for quark quantization (#28275)
Signed-off-by:
Haoyang Li
<
lihaoyang0109@gmail.com
>
parent
0aecd913
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
3 deletions
+9
-3
vllm/model_executor/layers/quantization/quark/quark.py
vllm/model_executor/layers/quantization/quark/quark.py
+9
-3
No files found.
vllm/model_executor/layers/quantization/quark/quark.py
View file @
0b254989
...
...
@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
fnmatch
from
typing
import
Any
,
Optional
,
cast
from
typing
import
TYPE_CHECKING
,
Any
,
Optional
,
cast
import
torch
...
...
@@ -34,6 +34,9 @@ from vllm.model_executor.layers.quantization.quark.utils import (
)
from
vllm.platforms
import
current_platform
if
TYPE_CHECKING
:
from
vllm.model_executor.models.utils
import
WeightsMapper
__all__
=
[
"QuarkLinearMethod"
]
logger
=
init_logger
(
__name__
)
...
...
@@ -54,6 +57,7 @@ class QuarkConfig(QuantizationConfig):
self
.
kv_cache_group
=
kv_cache_group
self
.
kv_cache_config
=
kv_cache_config
self
.
pack_method
=
pack_method
self
.
ignore
:
list
[
str
]
=
cast
(
list
[
str
],
self
.
quant_config
.
get
(
"exclude"
,
[]))
def
get_linear_method
(
self
)
->
"QuarkLinearMethod"
:
return
QuarkLinearMethod
(
self
)
...
...
@@ -74,9 +78,8 @@ class QuarkConfig(QuantizationConfig):
from
vllm.attention.layer
import
Attention
# Avoid circular import
# Check if the layer is skipped for quantization.
exclude_layers
=
cast
(
list
[
str
],
self
.
quant_config
.
get
(
"exclude"
))
if
should_ignore_layer
(
prefix
,
ignore
=
exclude_layers
,
fused_mapping
=
self
.
packed_modules_mapping
prefix
,
ignore
=
self
.
ignore
,
fused_mapping
=
self
.
packed_modules_mapping
):
return
UnquantizedLinearMethod
()
if
isinstance
(
layer
,
LinearBase
):
...
...
@@ -90,6 +93,9 @@ class QuarkConfig(QuantizationConfig):
return
QuarkMoEMethod
.
get_moe_method
(
self
,
module
=
layer
,
layer_name
=
prefix
)
return
None
def
apply_vllm_mapper
(
self
,
hf_to_vllm_mapper
:
"WeightsMapper"
):
self
.
ignore
=
hf_to_vllm_mapper
.
apply_list
(
self
.
ignore
)
@
classmethod
def
from_config
(
cls
,
config
:
dict
[
str
,
Any
])
->
"QuarkConfig"
:
export_config
=
config
.
get
(
"export"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment