Unverified Commit 42889acb authored by Baizhou Zhang's avatar Baizhou Zhang Committed by GitHub
Browse files

[hotfix] Fix deepep w4a8 bug (#12642)

parent 211f4070
...@@ -357,9 +357,9 @@ class _DeepEPDispatcherImplNormal(_DeepEPDispatcherImplBase): ...@@ -357,9 +357,9 @@ class _DeepEPDispatcherImplNormal(_DeepEPDispatcherImplBase):
): ):
topk_weights, topk_ids = topk_output.topk_weights, topk_output.topk_ids topk_weights, topk_ids = topk_output.topk_weights, topk_output.topk_ids
topk_ids = topk_ids.to(torch.int64) topk_ids = topk_ids.to(torch.int64)
if deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM and not ( if (
get_moe_runner_backend().is_cutlass() deep_gemm_wrapper.ENABLE_JIT_DEEPGEMM
and self.quant_config.get_name() == "w4afp8" and not get_moe_runner_backend().is_cutlass()
): ):
# TODO hard code 128 block quant,use fp8 communication # TODO hard code 128 block quant,use fp8 communication
hidden_states = sglang_per_token_group_quant_fp8( hidden_states = sglang_per_token_group_quant_fp8(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment