Unverified Commit fc42ff7b authored by DarkSharpness's avatar DarkSharpness Committed by GitHub
Browse files

[Fix] Fix wrong backend chosen in hybrid backend (#8989)

parent 7c0db868
......@@ -378,6 +378,12 @@ class ModelRunner:
)
server_args.attention_backend = "torch_native"
if server_args.prefill_attention_backend is not None and (
server_args.prefill_attention_backend
== server_args.decode_attention_backend
): # override the default attention backend
server_args.attention_backend = server_args.prefill_attention_backend
if server_args.attention_backend is None:
"""
Auto select the fastest attention backend.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment