Unverified Commit fc42ff7b authored by DarkSharpness's avatar DarkSharpness Committed by GitHub
Browse files

[Fix] Fix wrong backend chosen in hybrid backend (#8989)

parent 7c0db868
...@@ -378,6 +378,12 @@ class ModelRunner: ...@@ -378,6 +378,12 @@ class ModelRunner:
) )
server_args.attention_backend = "torch_native" server_args.attention_backend = "torch_native"
if server_args.prefill_attention_backend is not None and (
server_args.prefill_attention_backend
== server_args.decode_attention_backend
): # override the default attention backend
server_args.attention_backend = server_args.prefill_attention_backend
if server_args.attention_backend is None: if server_args.attention_backend is None:
""" """
Auto select the fastest attention backend. Auto select the fastest attention backend.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment