Commit 87d03653 authored by zhuwenwen's avatar zhuwenwen
Browse files

update lmslim import

parent 92160378
...@@ -25,10 +25,13 @@ from vllm.model_executor.layers.fused_moe.deep_gemm_moe import ( ...@@ -25,10 +25,13 @@ from vllm.model_executor.layers.fused_moe.deep_gemm_moe import (
from vllm.model_executor.layers.fused_moe.moe_align_block_size import ( from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
moe_align_block_size) moe_align_block_size)
from lmslim.layers.gemm.int8_utils import ( try:
from lmslim.layers.gemm.int8_utils import (
per_token_group_quant_int8, per_token_quant_int8) per_token_group_quant_int8, per_token_quant_int8)
from lmslim.layers.fused_moe.fuse_moe_int8 import (fused_experts_impl_int8, get_w8a8moe_json) from lmslim.layers.fused_moe.fuse_moe_int8 import (fused_experts_impl_int8, get_w8a8moe_json)
from lmslim.layers.fused_moe.fuse_moe_w4a8 import fused_experts_impl_w4a8 from lmslim.layers.fused_moe.fuse_moe_w4a8 import fused_experts_impl_w4a8
except Exception:
print("INFO: Please install lmslim if you want to infer the quantitative model of moe.\n")
from vllm.model_executor.layers.fused_moe.prepare_finalize import ( from vllm.model_executor.layers.fused_moe.prepare_finalize import (
......
...@@ -8,8 +8,12 @@ import torch ...@@ -8,8 +8,12 @@ import torch
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
from vllm.model_executor.layers.quantization.utils.fp8_utils import ( from vllm.model_executor.layers.quantization.utils.fp8_utils import (
per_token_group_quant_fp8) per_token_group_quant_fp8)
from lmslim.layers.gemm.int8_utils import ( try:
from lmslim.layers.gemm.int8_utils import (
per_token_group_quant_int8, per_token_quant_int8) per_token_group_quant_int8, per_token_quant_int8)
except Exception:
print("INFO: Please install lmslim if you want to use int utils.\n")
from vllm.model_executor.layers.quantization.utils.mxfp4_utils import ( from vllm.model_executor.layers.quantization.utils.mxfp4_utils import (
quant_dequant_mxfp4) quant_dequant_mxfp4)
from vllm.platforms import current_platform from vllm.platforms import current_platform
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment