修复w8a8_marlin tp pp

158efb14 · maxiao1 · eed591c9 · 158efb14
Commit 158efb14 authored Nov 19, 2025 by maxiao1
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe_marlin.py ...ation/compressed_tensors/compressed_tensors_moe_marlin.py +2 -1

No files found.
--- a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe_marlin.py
+++ b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe_marlin.py
@@ -15,6 +15,7 @@ from sglang.srt.layers.quantization.base_config import FusedMoEMethodBase

 from sglang.srt.utils import set_weight_attrs
 from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
+from sglang.srt.layers.moe.utils import get_moe_a2a_backend
 try:
    from lmslim.layers.fused_moe.fuse_moe_int8_marlin import fused_experts_impl_int8_marlin
 except Exception:
@@ -77,7 +78,7 @@ class CompressedTensorsW8A8Int8MarlinMoEMethod(CompressedTensorsMarlinMoEMethod)
            "weights")
        self.input_quant = self.quant_config.target_scheme_map["Linear"].get(
            "input_activations")
-        self.use_deepep = True
+        self.use_deepep = get_moe_a2a_backend().is_deepep()
        per_channel = (
            self.weight_quant.strategy == QuantizationStrategy.CHANNEL
            and self.input_quant.strategy == QuantizationStrategy.TOKEN)