Unverified Commit 198b9056 authored by Hubert Lu's avatar Hubert Lu Committed by GitHub
Browse files

[AMD] Fix Llama 4 Scout and Maverick accuracy issues on MI300X (#6274)

parent 73eb67c0
...@@ -186,6 +186,19 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): ...@@ -186,6 +186,19 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
if _is_hip and get_bool_env_var("SGLANG_AITER_MOE"): if _is_hip and get_bool_env_var("SGLANG_AITER_MOE"):
assert not no_combine, "unsupported" assert not no_combine, "unsupported"
if apply_router_weight_on_input:
assert (
topk_weights.dim() == 2
), "`topk_weights` should be in shape (num_tokens, topk)"
_, topk = topk_weights.shape
assert (
topk == 1
), "Only support topk=1 when `apply_router_weight_on_input` is True"
x = x * topk_weights.to(x.dtype)
topk_weights = torch.ones_like(
topk_weights, dtype=torch.float32
) # topk_weights must be FP32 (float32)
return ck_moe_2stages( return ck_moe_2stages(
x, x,
layer.w13_weight, layer.w13_weight,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment