Unverified Commit 29350922 authored by Heng Guo's avatar Heng Guo Committed by GitHub
Browse files

[Feature][Quantization] auto_round format add support for regex (#24024)


Signed-off-by: default avatarn1ck-guo <heng.guo@intel.com>
Signed-off-by: default avatarHeng Guo <heng.guo@intel.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent 8ae16928
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
from fractions import Fraction from fractions import Fraction
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any
import regex as re
import torch import torch
from vllm.logger import init_logger from vllm.logger import init_logger
...@@ -128,12 +129,45 @@ class AutoRoundConfig(QuantizationConfig): ...@@ -128,12 +129,45 @@ class AutoRoundConfig(QuantizationConfig):
def get_layer_config(self, layer, layer_name: str): def get_layer_config(self, layer, layer_name: str):
def get_config(name: str, quantized: bool = True): def get_config(name: str, quantized: bool = True):
cfg = self.extra_config.get(name, {}) if self.extra_config else {} if not self.extra_config:
return (
self.weight_bits if quantized else 16,
self.group_size if quantized else -1,
self.sym if quantized else True,
)
# exact match first
if name in self.extra_config:
cfg = self.extra_config[name]
return (
cfg.get("bits", self.weight_bits if quantized else 16),
cfg.get("group_size", self.group_size if quantized else -1),
cfg.get("sym", self.sym if quantized else True),
)
REGEX_SPECIAL_CHARS = set(r"*+?^$()[]{}|\\")
for pattern, cfg in self.extra_config.items():
if not isinstance(pattern, str) or not any(
c in REGEX_SPECIAL_CHARS for c in pattern
):
continue
try:
if re.search(re.compile(pattern), name) is not None:
return ( return (
cfg.get("bits", self.weight_bits if quantized else 16), cfg.get("bits", self.weight_bits if quantized else 16),
cfg.get("group_size", self.group_size if quantized else -1), cfg.get("group_size", self.group_size if quantized else -1),
cfg.get("sym", self.sym if quantized else True), cfg.get("sym", self.sym if quantized else True),
) )
except re.error:
# Invalid regex, ignore.
continue
return (
self.weight_bits if quantized else 16,
self.group_size if quantized else -1,
self.sym if quantized else True,
)
# 1. Exact match from config # 1. Exact match from config
if self.extra_config and layer_name in self.extra_config: if self.extra_config and layer_name in self.extra_config:
...@@ -176,7 +210,7 @@ class AutoRoundConfig(QuantizationConfig): ...@@ -176,7 +210,7 @@ class AutoRoundConfig(QuantizationConfig):
f"consistent quant config for {sub_names}" f"consistent quant config for {sub_names}"
) )
# 5. Fallback # 5. Fallback or try a regular expression match
return get_config(layer_name, quantized) return get_config(layer_name, quantized)
def check_quantized(self, weight_bits: int) -> bool: def check_quantized(self, weight_bits: int) -> bool:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment