Unverified Commit 8ebf72fe authored by kk's avatar kk Committed by GitHub
Browse files

[Fix] RuntimeError: get_cfg Unsupported input_type:Float4_e2m1fn_x2 in using...


[Fix] RuntimeError: get_cfg Unsupported input_type:Float4_e2m1fn_x2 in using aiter-mxfp4-moe (#10981)
Co-authored-by: default avatarwunhuang <wunhuang@amd.com>
parent 82605747
......@@ -843,10 +843,18 @@ class Mxfp4DynamicQuantMoEMethod(FusedMoEMethodBase):
topk_weights = topk_weights.to(
torch.float32
) # aiter's moe_sorting requires topk_weights to be FP32
if hasattr(torch, "float4_e2m1fn_x2"):
w13_weight = layer.w13_weight.view(torch.float4_e2m1fn_x2)
w2_weight = layer.w2_weight.view(torch.float4_e2m1fn_x2)
else:
w13_weight = layer.w13_weight
w2_weight = layer.w2_weight
output = fused_moe(
x,
layer.w13_weight,
layer.w2_weight,
w13_weight,
w2_weight,
topk_weights,
topk_ids,
quant_type=QuantType.per_1x32,
......
......@@ -183,10 +183,17 @@ class QuarkW4A4MXFp4MoEMethod(QuarkMoEMethod):
moe_runner_config = self.moe_runner_config
topk_weights, topk_ids, _ = topk_output
if hasattr(torch, "float4_e2m1fn_x2"):
w13_weight = layer.w13_weight.view(torch.float4_e2m1fn_x2)
w2_weight = layer.w2_weight.view(torch.float4_e2m1fn_x2)
else:
w13_weight = layer.w13_weight
w2_weight = layer.w2_weight
output = fused_moe(
x,
layer.w13_weight,
layer.w2_weight,
w13_weight,
w2_weight,
topk_weights,
topk_ids,
quant_type=QuantType.per_1x32,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment