Unverified Commit 42889acb authored by Baizhou Zhang's avatar Baizhou Zhang Committed by GitHub
Browse files

[hotfix] Fix deepep w4a8 bug (#12642)

parent 211f4070
......@@ -357,9 +357,9 @@ class _DeepEPDispatcherImplNormal(_DeepEPDispatcherImplBase):
):
topk_weights, topk_ids = topk_output.topk_weights, topk_output.topk_ids
topk_ids = topk_ids.to(torch.int64)
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM and not (
get_moe_runner_backend().is_cutlass()
and self.quant_config.get_name() == "w4afp8"
if (
deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
and not get_moe_runner_backend().is_cutlass()
):
# TODO hard code 128 block quant,use fp8 communication
hidden_states = sglang_per_token_group_quant_fp8(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment