Commit 0d4ff65d authored by zhuwenwen's avatar zhuwenwen
Browse files

update lmslim import

parent 3ae8665d
...@@ -25,10 +25,13 @@ from vllm.model_executor.layers.fused_moe.deep_gemm_moe import ( ...@@ -25,10 +25,13 @@ from vllm.model_executor.layers.fused_moe.deep_gemm_moe import (
from vllm.model_executor.layers.fused_moe.moe_align_block_size import ( from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
moe_align_block_size) moe_align_block_size)
from lmslim.layers.gemm.int8_utils import ( try:
per_token_group_quant_int8, per_token_quant_int8) from lmslim.layers.gemm.int8_utils import (
from lmslim.layers.fused_moe.fuse_moe_int8 import (fused_experts_impl_int8, get_w8a8moe_json) per_token_group_quant_int8, per_token_quant_int8)
from lmslim.layers.fused_moe.fuse_moe_w4a8 import fused_experts_impl_w4a8 from lmslim.layers.fused_moe.fuse_moe_int8 import (fused_experts_impl_int8, get_w8a8moe_json)
from lmslim.layers.fused_moe.fuse_moe_w4a8 import fused_experts_impl_w4a8
except Exception:
print("INFO: Please install lmslim if you want to infer the quantitative model of moe.\n")
from vllm.model_executor.layers.fused_moe.prepare_finalize import ( from vllm.model_executor.layers.fused_moe.prepare_finalize import (
MoEPrepareAndFinalizeNoEP) MoEPrepareAndFinalizeNoEP)
......
...@@ -8,8 +8,11 @@ import torch ...@@ -8,8 +8,11 @@ import torch
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.model_executor.layers.quantization.utils.fp8_utils import ( from vllm.model_executor.layers.quantization.utils.fp8_utils import (
per_token_group_quant_fp8) per_token_group_quant_fp8)
from lmslim.layers.gemm.int8_utils import ( try:
per_token_group_quant_int8, per_token_quant_int8) from lmslim.layers.gemm.int8_utils import (
per_token_group_quant_int8, per_token_quant_int8)
except Exception:
print("INFO: Please install lmslim if you want to use int utils.\n")
from vllm.utils import cdiv from vllm.utils import cdiv
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment