Commit 2d8b3257 authored by zhuwenwen's avatar zhuwenwen
Browse files

修复了awq的shape的bug,以及兼容了lmslim注册导入的的情况

parent d76fc11e
...@@ -34,6 +34,9 @@ if TYPE_CHECKING: ...@@ -34,6 +34,9 @@ if TYPE_CHECKING:
from vllm.model_executor.layers.quantization.awq_triton import awq_gemm_triton from vllm.model_executor.layers.quantization.awq_triton import awq_gemm_triton
triton_configs_dict={} triton_configs_dict={}
def is_layer_skipped_awq(prefix: str, modules_to_not_convert: list[str]):
return any(module_name in prefix for module_name in modules_to_not_convert)
def get_triton_cache(file_path): def get_triton_cache(file_path):
#会将所报错的json文件以字典的形式return出来 #会将所报错的json文件以字典的形式return出来
...@@ -377,7 +380,7 @@ class AWQLinearMethod(LinearMethodBase): ...@@ -377,7 +380,7 @@ class AWQLinearMethod(LinearMethodBase):
qzeros = layer.qzeros qzeros = layer.qzeros
scales = layer.scales scales = layer.scales
pack_factor = self.quant_config.pack_factor pack_factor = self.quant_config.pack_factor
out_shape = x.shape[:-1] + (qweight.shape[-1] * pack_factor,) out_shape = (x.shape[:-1] + (qweight.shape[0] * 1, ))
reshaped_x = x.reshape(-1, x.shape[-1]) reshaped_x = x.reshape(-1, x.shape[-1])
m = reshaped_x.shape[0] m = reshaped_x.shape[0]
......
...@@ -588,7 +588,7 @@ class RocmPlatform(Platform): ...@@ -588,7 +588,7 @@ class RocmPlatform(Platform):
" is not set, enabling VLLM_USE_TRITON_AWQ." " is not set, enabling VLLM_USE_TRITON_AWQ."
) )
envs.VLLM_USE_TRITON_AWQ = False envs.VLLM_USE_TRITON_AWQ = False
os.environ["VLLM_USE_TRITON_AWQ"] = "1" # os.environ["VLLM_USE_TRITON_AWQ"] = "1"
@classmethod @classmethod
def get_punica_wrapper(cls) -> str: def get_punica_wrapper(cls) -> str:
......
...@@ -7,6 +7,7 @@ import uuid ...@@ -7,6 +7,7 @@ import uuid
import torch import torch
from vllm.utils.torch_utils import direct_register_custom_op
MASK_64_BITS = (1 << 64) - 1 MASK_64_BITS = (1 << 64) - 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment