Commit ca4598a4 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.11.0-dev-yql-12.24' into 'v0.11.0-dev'

在v0.11中暂不支持w4a16的moe算子scale和zero合并操作

See merge request dcutoolkit/deeplearing/vllm!318
parents 9ff617d7 0ba1219a
...@@ -195,6 +195,8 @@ class MoeWNA16Method(FusedMoEMethodBase): ...@@ -195,6 +195,8 @@ class MoeWNA16Method(FusedMoEMethodBase):
moe: "FusedMoEConfig") -> None: moe: "FusedMoEConfig") -> None:
super().__init__(moe) super().__init__(moe)
self.quant_config = quant_config self.quant_config = quant_config
# if os.getenv('AWQ_MOE_SZ') == None:
# os.environ['AWQ_MOE_SZ'] = '1'
self.use_w4a16_moe_sz = os.environ.get('AWQ_MOE_SZ') == '1' self.use_w4a16_moe_sz = os.environ.get('AWQ_MOE_SZ') == '1'
self.use_w4a16_cuda = 0 self.use_w4a16_cuda = 0
self.use_moe_lmslim = 0 self.use_moe_lmslim = 0
......
...@@ -244,8 +244,8 @@ def _get_model_architecture( ...@@ -244,8 +244,8 @@ def _get_model_architecture(
# awq相关配置 # awq相关配置
try: try:
if os.getenv('AWQ_MOE_SZ') == None: # if os.getenv('AWQ_MOE_SZ') == None:
os.environ['AWQ_MOE_SZ'] = '1' # os.environ['AWQ_MOE_SZ'] = '1'
if os.getenv('AWQ_PAD') == None and (torch.cuda.get_device_properties(torch.cuda.current_device()).multi_processor_count == 120): if os.getenv('AWQ_PAD') == None and (torch.cuda.get_device_properties(torch.cuda.current_device()).multi_processor_count == 120):
os.environ['AWQ_PAD'] = '1' os.environ['AWQ_PAD'] = '1'
except Exception as e: except Exception as e:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment