Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c47b6c85
Unverified
Commit
c47b6c85
authored
Nov 13, 2025
by
zofia
Committed by
GitHub
Nov 13, 2025
Browse files
[XPU] add sym params to IPEXConfig (#28611)
Signed-off-by:
Zhu, Zufang
<
zufang.zhu@intel.com
>
parent
c428e8d8
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
2 deletions
+18
-2
vllm/model_executor/layers/quantization/ipex_quant.py
vllm/model_executor/layers/quantization/ipex_quant.py
+18
-2
No files found.
vllm/model_executor/layers/quantization/ipex_quant.py
View file @
c47b6c85
...
@@ -52,6 +52,7 @@ class IPEXConfig(QuantizationConfig):
...
@@ -52,6 +52,7 @@ class IPEXConfig(QuantizationConfig):
modules_to_not_convert
:
list
[
str
]
|
None
=
None
,
modules_to_not_convert
:
list
[
str
]
|
None
=
None
,
desc_act
:
bool
|
None
=
None
,
desc_act
:
bool
|
None
=
None
,
lm_head_quantized
:
bool
|
None
=
None
,
lm_head_quantized
:
bool
|
None
=
None
,
is_sym
:
bool
|
None
=
None
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
self
.
method
=
method
self
.
method
=
method
...
@@ -60,6 +61,7 @@ class IPEXConfig(QuantizationConfig):
...
@@ -60,6 +61,7 @@ class IPEXConfig(QuantizationConfig):
self
.
modules_to_not_convert
=
modules_to_not_convert
or
[]
self
.
modules_to_not_convert
=
modules_to_not_convert
or
[]
self
.
desc_act
=
desc_act
self
.
desc_act
=
desc_act
self
.
lm_head_quantized
=
lm_head_quantized
self
.
lm_head_quantized
=
lm_head_quantized
self
.
is_sym
=
is_sym
self
.
pack_factor
=
32
//
self
.
weight_bits
self
.
pack_factor
=
32
//
self
.
weight_bits
if
self
.
weight_bits
not
in
[
4
]:
if
self
.
weight_bits
not
in
[
4
]:
...
@@ -108,15 +110,25 @@ class IPEXConfig(QuantizationConfig):
...
@@ -108,15 +110,25 @@ class IPEXConfig(QuantizationConfig):
modules_to_not_convert
=
cls
.
get_from_keys_or
(
modules_to_not_convert
=
cls
.
get_from_keys_or
(
config
,
[
"modules_to_not_convert"
],
None
config
,
[
"modules_to_not_convert"
],
None
)
)
is_sym
=
not
cls
.
get_from_keys_or
(
config
,
[
"zero_point"
],
default
=
False
)
return
cls
(
return
cls
(
method
,
weight_bits
,
group_size
,
modules_to_not_convert
,
False
,
False
method
,
weight_bits
,
group_size
,
modules_to_not_convert
,
False
,
False
,
is_sym
,
)
)
# otherwise for gptq
# otherwise for gptq
weight_bits
=
cls
.
get_from_keys
(
config
,
[
"bits"
])
weight_bits
=
cls
.
get_from_keys
(
config
,
[
"bits"
])
group_size
=
cls
.
get_from_keys
(
config
,
[
"group_size"
])
group_size
=
cls
.
get_from_keys
(
config
,
[
"group_size"
])
lm_head_quantized
=
cls
.
get_from_keys_or
(
config
,
[
"lm_head"
],
default
=
False
)
lm_head_quantized
=
cls
.
get_from_keys_or
(
config
,
[
"lm_head"
],
default
=
False
)
desc_act
=
cls
.
get_from_keys_or
(
config
,
[
"desc_act"
],
default
=
False
)
desc_act
=
cls
.
get_from_keys_or
(
config
,
[
"desc_act"
],
default
=
False
)
return
cls
(
method
,
weight_bits
,
group_size
,
[],
desc_act
,
lm_head_quantized
)
is_sym
=
cls
.
get_from_keys_or
(
config
,
[
"sym"
],
default
=
True
)
return
cls
(
method
,
weight_bits
,
group_size
,
[],
desc_act
,
lm_head_quantized
,
is_sym
)
@
classmethod
@
classmethod
def
override_quantization_method
(
def
override_quantization_method
(
...
@@ -180,6 +192,7 @@ class IPEXGPTQLinearMethod(GPTQLinearMethod):
...
@@ -180,6 +192,7 @@ class IPEXGPTQLinearMethod(GPTQLinearMethod):
# The float activation will be quantized (dynamic, per-token) to INT8.
# The float activation will be quantized (dynamic, per-token) to INT8.
act_quant_mode
=
ipex
.
quantization
.
WoqActQuantMode
.
PER_BATCH_IC_BLOCK
act_quant_mode
=
ipex
.
quantization
.
WoqActQuantMode
.
PER_BATCH_IC_BLOCK
assert
isinstance
(
self
.
quant_config
,
IPEXConfig
)
qconfig
=
ipex
.
quantization
.
get_weight_only_quant_qconfig_mapping
(
qconfig
=
ipex
.
quantization
.
get_weight_only_quant_qconfig_mapping
(
weight_dtype
=
weight_dtype
,
weight_dtype
=
weight_dtype
,
lowp_mode
=
lowp_mode
,
lowp_mode
=
lowp_mode
,
...
@@ -200,6 +213,7 @@ class IPEXGPTQLinearMethod(GPTQLinearMethod):
...
@@ -200,6 +213,7 @@ class IPEXGPTQLinearMethod(GPTQLinearMethod):
bias
=
bias
,
bias
=
bias
,
group_size
=
self
.
quant_config
.
group_size
,
group_size
=
self
.
quant_config
.
group_size
,
quant_method
=
IPEXConfig
.
IPEX_QUANT_METHOD_MAP
[
"gptq"
],
quant_method
=
IPEXConfig
.
IPEX_QUANT_METHOD_MAP
[
"gptq"
],
weight_qscheme
=
"sym"
if
self
.
quant_config
.
is_sym
else
"asym"
,
)
)
)
)
...
@@ -250,6 +264,7 @@ class IPEXAWQLinearMethod(AWQLinearMethod):
...
@@ -250,6 +264,7 @@ class IPEXAWQLinearMethod(AWQLinearMethod):
# The float activation will be quantized (dynamic, per-token) to INT8.
# The float activation will be quantized (dynamic, per-token) to INT8.
act_quant_mode
=
ipex
.
quantization
.
WoqActQuantMode
.
PER_BATCH
act_quant_mode
=
ipex
.
quantization
.
WoqActQuantMode
.
PER_BATCH
assert
isinstance
(
self
.
quant_config
,
IPEXConfig
)
qconfig
=
ipex
.
quantization
.
get_weight_only_quant_qconfig_mapping
(
qconfig
=
ipex
.
quantization
.
get_weight_only_quant_qconfig_mapping
(
weight_dtype
=
weight_dtype
,
weight_dtype
=
weight_dtype
,
lowp_mode
=
lowp_mode
,
lowp_mode
=
lowp_mode
,
...
@@ -269,6 +284,7 @@ class IPEXAWQLinearMethod(AWQLinearMethod):
...
@@ -269,6 +284,7 @@ class IPEXAWQLinearMethod(AWQLinearMethod):
bias
=
bias
,
bias
=
bias
,
group_size
=
self
.
quant_config
.
group_size
,
group_size
=
self
.
quant_config
.
group_size
,
quant_method
=
IPEXConfig
.
IPEX_QUANT_METHOD_MAP
[
"awq"
],
# type: ignore
quant_method
=
IPEXConfig
.
IPEX_QUANT_METHOD_MAP
[
"awq"
],
# type: ignore
weight_qscheme
=
"sym"
if
self
.
quant_config
.
is_sym
else
"asym"
,
)
)
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment