Unverified Commit acc1a6e1 authored by Jun-Howie's avatar Jun-Howie Committed by GitHub
Browse files

Fix the bug related to loading GPTP INT3 weights. (#23328)


Signed-off-by: default avatarJunHowie <JunHowie@aliyun.com>
Co-authored-by: default avatarJunHowie <JunHowie@aliyun.com>
Co-authored-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
parent 8c742a66
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from copy import deepcopy from copy import deepcopy
from fractions import Fraction
from typing import Optional, Union from typing import Optional, Union
import regex as re import regex as re
...@@ -29,7 +30,7 @@ def override_config(config: QuantizationConfig, prefix: str): ...@@ -29,7 +30,7 @@ def override_config(config: QuantizationConfig, prefix: str):
if isinstance(desc_act, bool): if isinstance(desc_act, bool):
config.desc_act = desc_act config.desc_act = desc_act
config.pack_factor = 32 // config.weight_bits # packed into int32 config.pack_factor = Fraction(32, config.weight_bits) # packed into int32
if config.get_name() == "gptq_marlin": if config.get_name() == "gptq_marlin":
is_sym = get_dynamic_override(config, prefix, "sym", config.is_sym) is_sym = get_dynamic_override(config, prefix, "sym", config.is_sym)
if isinstance(is_sym, bool): if isinstance(is_sym, bool):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment