update lmslim import

0d4ff65d · zhuwenwen · 3ae8665d · 0d4ff65d · 0d4ff65d
Commit 0d4ff65d authored Aug 06, 2025 by zhuwenwen
Showing with 12 additions and 6 deletions

vllm/model_executor/layers/fused_moe/fused_moe.py vllm/model_executor/layers/fused_moe/fused_moe.py +7 -4

vllm/model_executor/layers/fused_moe/utils.py vllm/model_executor/layers/fused_moe/utils.py +5 -2

No files found.
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -25,10 +25,13 @@ from vllm.model_executor.layers.fused_moe.deep_gemm_moe import (
 from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
    moe_align_block_size)
-from lmslim.layers.gemm.int8_utils import (
+try:
-   per_token_group_quant_int8, per_token_quant_int8)
+    from lmslim.layers.gemm.int8_utils import (
-from lmslim.layers.fused_moe.fuse_moe_int8 import (fused_experts_impl_int8, get_w8a8moe_json)
+    per_token_group_quant_int8, per_token_quant_int8)
-from lmslim.layers.fused_moe.fuse_moe_w4a8 import fused_experts_impl_w4a8
+    from lmslim.layers.fused_moe.fuse_moe_int8 import (fused_experts_impl_int8, get_w8a8moe_json)
+    from lmslim.layers.fused_moe.fuse_moe_w4a8 import fused_experts_impl_w4a8
+except Exception:
+    print("INFO: Please install lmslim if you want to infer the quantitative model of moe.\n") 
 from vllm.model_executor.layers.fused_moe.prepare_finalize import (
    MoEPrepareAndFinalizeNoEP)

--- a/vllm/model_executor/layers/fused_moe/utils.py
+++ b/vllm/model_executor/layers/fused_moe/utils.py
@@ -8,8 +8,11 @@ import torch
 from vllm import _custom_ops as ops
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
    per_token_group_quant_fp8)
-from lmslim.layers.gemm.int8_utils import (
+try:
-    per_token_group_quant_int8, per_token_quant_int8)
+    from lmslim.layers.gemm.int8_utils import (
+        per_token_group_quant_int8, per_token_quant_int8)
+except Exception:
+    print("INFO: Please install lmslim if you want to use int utils.\n") 
 from vllm.utils import cdiv