Unverified Commit 83a7c89c authored by b8zhong's avatar b8zhong Committed by GitHub
Browse files

followup fix for llama 4 trtllm flashinfer backend (#12314)

parent 0380ca82
...@@ -673,7 +673,7 @@ class ModelOptFp8MoEMethod(FusedMoEMethodBase): ...@@ -673,7 +673,7 @@ class ModelOptFp8MoEMethod(FusedMoEMethodBase):
routed_scaling_factor if routed_scaling_factor is not None else 1.0 routed_scaling_factor if routed_scaling_factor is not None else 1.0
), ),
use_routing_scales_on_input=use_routing_scales_on_input, use_routing_scales_on_input=use_routing_scales_on_input,
tile_tokens_dim=None, tile_tokens_dim=8, # TODO(brayden): use the FI tile calculation
routing_method_type=routing_method_type, routing_method_type=routing_method_type,
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment