Unverified Commit 74e7e457 authored by Stefan He's avatar Stefan He Committed by GitHub
Browse files

Fix DEEPEP BF16 compatibility for Deepseek Style model like GLM 4.5 (#8469)


Co-authored-by: default avatarMinglei Zhu <mingleizhu1122@gmail.com>
parent 1466c1b8
...@@ -800,11 +800,6 @@ class DeepEPMoE(EPMoE): ...@@ -800,11 +800,6 @@ class DeepEPMoE(EPMoE):
routed_scaling_factor=routed_scaling_factor, routed_scaling_factor=routed_scaling_factor,
) )
self.deepep_mode = deepep_mode self.deepep_mode = deepep_mode
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM:
assert self.use_fp8_w8a8, (
"DeepGEMM requires an fp8_w8a8 model; "
"alternatively, you can disable DeepGEMM by turning off the ENABLE_JIT_DEEPGEMM environment variable."
)
# TODO: move to the beginning of the file # TODO: move to the beginning of the file
from sglang.srt.distributed.parallel_state import get_tp_group from sglang.srt.distributed.parallel_state import get_tp_group
...@@ -897,7 +892,7 @@ class DeepEPMoE(EPMoE): ...@@ -897,7 +892,7 @@ class DeepEPMoE(EPMoE):
# in forward_aiter, we skip token permutation and unpermutation, which have been fused inside aiter kernel # in forward_aiter, we skip token permutation and unpermutation, which have been fused inside aiter kernel
return self.forward_aiter(dispatch_output) return self.forward_aiter(dispatch_output)
if dispatch_output.format.is_deepep_normal(): if dispatch_output.format.is_deepep_normal():
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM: if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM and self.use_fp8_w8a8:
return self.forward_deepgemm_contiguous(dispatch_output) return self.forward_deepgemm_contiguous(dispatch_output)
else: else:
return self.forward_normal(dispatch_output) return self.forward_normal(dispatch_output)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment