"...git@developer.sourcefind.cn:OpenDAS/torch-harmonics.git" did not exist on "9c26a6d8163b2cda1a16e85267b30b6e82c9a41b"
Commit 158efb14 authored by maxiao1's avatar maxiao1
Browse files

修复w8a8_marlin tp pp

parent eed591c9
......@@ -15,6 +15,7 @@ from sglang.srt.layers.quantization.base_config import FusedMoEMethodBase
from sglang.srt.utils import set_weight_attrs
from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
from sglang.srt.layers.moe.utils import get_moe_a2a_backend
try:
from lmslim.layers.fused_moe.fuse_moe_int8_marlin import fused_experts_impl_int8_marlin
except Exception:
......@@ -77,7 +78,7 @@ class CompressedTensorsW8A8Int8MarlinMoEMethod(CompressedTensorsMarlinMoEMethod)
"weights")
self.input_quant = self.quant_config.target_scheme_map["Linear"].get(
"input_activations")
self.use_deepep = True
self.use_deepep = get_moe_a2a_backend().is_deepep()
per_channel = (
self.weight_quant.strategy == QuantizationStrategy.CHANNEL
and self.input_quant.strategy == QuantizationStrategy.TOKEN)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment