Unverified Commit 5c08a36c authored by hzh0425's avatar hzh0425 Committed by GitHub
Browse files

[Fix] ensure DeepGEMM is only enabled for FP8_W8A8 models (#8110)

parent 9069884b
...@@ -1272,6 +1272,12 @@ class DeepEPMoE(EPMoE): ...@@ -1272,6 +1272,12 @@ class DeepEPMoE(EPMoE):
routed_scaling_factor=routed_scaling_factor, routed_scaling_factor=routed_scaling_factor,
) )
self.deepep_mode = deepep_mode self.deepep_mode = deepep_mode
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM:
assert self.use_fp8_w8a8, (
"DeepGEMM requires an fp8_w8a8 model; "
"alternatively, you can disable DeepGEMM by turning off the ENABLE_JIT_DEEPGEMM environment variable."
)
if self.deepep_mode.enable_low_latency(): if self.deepep_mode.enable_low_latency():
assert ( assert (
deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment