Unverified Commit 0310029a authored by Alexander Matveev's avatar Alexander Matveev Committed by GitHub
Browse files

[Bugfix] Fix awq_marlin and gptq_marlin flags (#6745)

parent 309aaef8
...@@ -25,7 +25,7 @@ class AWQMarlinConfig(QuantizationConfig): ...@@ -25,7 +25,7 @@ class AWQMarlinConfig(QuantizationConfig):
def __init__(self, weight_bits: int, group_size: int, has_zp: bool, def __init__(self, weight_bits: int, group_size: int, has_zp: bool,
lm_head_quantized: bool) -> None: lm_head_quantized: bool) -> None:
self.weight_bits = weight_bits self.weight_bits = weight_bits
self.pack_factor = 32 // self.weight_bits # packed into int32 self.pack_factor = 32 // self.weight_bits # packed into 32bits
self.group_size = group_size self.group_size = group_size
self.has_zp = has_zp self.has_zp = has_zp
self.lm_head_quantized = lm_head_quantized self.lm_head_quantized = lm_head_quantized
...@@ -69,7 +69,8 @@ class AWQMarlinConfig(QuantizationConfig): ...@@ -69,7 +69,8 @@ class AWQMarlinConfig(QuantizationConfig):
def override_quantization_method(cls, hf_quant_cfg, def override_quantization_method(cls, hf_quant_cfg,
user_quant) -> Optional[str]: user_quant) -> Optional[str]:
can_convert = cls.is_awq_marlin_compatible(hf_quant_cfg) can_convert = cls.is_awq_marlin_compatible(hf_quant_cfg)
is_valid_user_quant = (user_quant is None or user_quant == "marlin") is_valid_user_quant = (user_quant is None or user_quant == "marlin"
or user_quant == "awq_marlin")
if can_convert and is_valid_user_quant: if can_convert and is_valid_user_quant:
msg = ("The model is convertible to {} during runtime." msg = ("The model is convertible to {} during runtime."
......
...@@ -79,7 +79,8 @@ class GPTQMarlinConfig(QuantizationConfig): ...@@ -79,7 +79,8 @@ class GPTQMarlinConfig(QuantizationConfig):
user_quant) -> Optional[str]: user_quant) -> Optional[str]:
can_convert = cls.is_gptq_marlin_compatible(hf_quant_cfg) can_convert = cls.is_gptq_marlin_compatible(hf_quant_cfg)
is_valid_user_quant = (user_quant is None or user_quant == "marlin") is_valid_user_quant = (user_quant is None or user_quant == "marlin"
or user_quant == "gptq_marlin")
if can_convert and is_valid_user_quant: if can_convert and is_valid_user_quant:
msg = ("The model is convertible to {} during runtime." msg = ("The model is convertible to {} during runtime."
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment