Commit d04137d6 authored by 王敏's avatar 王敏
Browse files

[BUGFIX]修复w4a8低延迟报错

parent d5538a81
......@@ -599,6 +599,7 @@ def int8_w4a8_moe_quant_config(
per_act_token_quant=per_act_token_quant,
per_out_ch_quant=False,
block_shape=block_shape,
weight_dtype="int4"
)
......
......@@ -3,6 +3,7 @@ import os
import torch
from torch.nn.parameter import Parameter
from vllm.logger import init_logger
import vllm.envs as envs
from vllm import _custom_ops as ops
from vllm.config import get_current_vllm_config
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment