"vscode:/vscode.git/clone" did not exist on "5bb38586a953c9978c618bca20236b4873638dce"
Unverified Commit 2c562fd2 authored by Hubert Lu's avatar Hubert Lu Committed by GitHub
Browse files

Fix Llama 4 with MXFP4 dynamic quant on MI35x (#9993)

parent b648d862
......@@ -816,7 +816,10 @@ class Mxfp4DynamicQuantMoEMethod(FusedMoEMethodBase):
moe_runner_config: MoeRunnerConfig,
) -> torch.Tensor:
topk_weights, topk_ids, _ = topk_output
if _is_hip:
topk_weights = topk_weights.to(
torch.float32
) # aiter's moe_sorting requires topk_weights to be FP32
return fused_moe(
x,
layer.w13_weight,
......
......@@ -2336,7 +2336,8 @@ class ServerArgs:
assert self.attention_backend in {
"fa3",
"aiter",
}, "fa3 or aiter is required for Llama4 model"
"triton",
}, "fa3, aiter, or triton is required for Llama4 model"
elif model_arch in [
"Gemma2ForCausalLM",
"Gemma3ForCausalLM",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment