"examples/vscode:/vscode.git/clone" did not exist on "a848aa3e9b14184195151f0b986ddddbf65e8cdc"
Unverified Commit 74e7e457 authored by Stefan He's avatar Stefan He Committed by GitHub
Browse files

Fix DEEPEP BF16 compatibility for Deepseek Style model like GLM 4.5 (#8469)


Co-authored-by: default avatarMinglei Zhu <mingleizhu1122@gmail.com>
parent 1466c1b8
...@@ -800,11 +800,6 @@ class DeepEPMoE(EPMoE): ...@@ -800,11 +800,6 @@ class DeepEPMoE(EPMoE):
routed_scaling_factor=routed_scaling_factor, routed_scaling_factor=routed_scaling_factor,
) )
self.deepep_mode = deepep_mode self.deepep_mode = deepep_mode
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM:
assert self.use_fp8_w8a8, (
"DeepGEMM requires an fp8_w8a8 model; "
"alternatively, you can disable DeepGEMM by turning off the ENABLE_JIT_DEEPGEMM environment variable."
)
# TODO: move to the beginning of the file # TODO: move to the beginning of the file
from sglang.srt.distributed.parallel_state import get_tp_group from sglang.srt.distributed.parallel_state import get_tp_group
...@@ -897,7 +892,7 @@ class DeepEPMoE(EPMoE): ...@@ -897,7 +892,7 @@ class DeepEPMoE(EPMoE):
# in forward_aiter, we skip token permutation and unpermutation, which have been fused inside aiter kernel # in forward_aiter, we skip token permutation and unpermutation, which have been fused inside aiter kernel
return self.forward_aiter(dispatch_output) return self.forward_aiter(dispatch_output)
if dispatch_output.format.is_deepep_normal(): if dispatch_output.format.is_deepep_normal():
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM: if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM and self.use_fp8_w8a8:
return self.forward_deepgemm_contiguous(dispatch_output) return self.forward_deepgemm_contiguous(dispatch_output)
else: else:
return self.forward_normal(dispatch_output) return self.forward_normal(dispatch_output)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment