"configs/vscode:/vscode.git/clone" did not exist on "63e10e00f1d690f134342ea32fbe0af1645ccb0b"
Unverified Commit 47e12e08 authored by b8zhong's avatar b8zhong Committed by GitHub
Browse files

Enable Llama 4 + TRTLLM MHA (#12003)

parent 823b4429
......@@ -966,7 +966,13 @@ class ServerArgs:
"fa3",
"aiter",
"triton",
}, "fa3, aiter, or triton is required for Llama4 model"
"trtllm_mha",
}, "fa3, aiter, triton, or trtllm_mha is required for Llama4 model"
if is_sm100_supported() and self.attention_backend is None:
self.attention_backend = "trtllm_mha"
logger.warning(
"Use trtllm_mha as attention backend on sm100 for Llama4 model"
)
elif model_arch in [
"Gemma2ForCausalLM",
"Gemma3ForCausalLM",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment