[Fix] Fix wrong backend chosen in hybrid backend (#8989)

fc42ff7b · DarkSharpness · GitHub · 7c0db868 · fc42ff7b
Unverified Commit fc42ff7b authored Aug 08, 2025 by DarkSharpness Committed by GitHub Aug 08, 2025
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 0 deletions

python/sglang/srt/model_executor/model_runner.py python/sglang/srt/model_executor/model_runner.py +6 -0

No files found.
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -378,6 +378,12 @@ class ModelRunner:
            )
            server_args.attention_backend = "torch_native"
+        if server_args.prefill_attention_backend is not None and (
+            server_args.prefill_attention_backend
+            == server_args.decode_attention_backend
+        ):  # override the default attention backend
+            server_args.attention_backend = server_args.prefill_attention_backend
        if server_args.attention_backend is None:
            """
            Auto select the fastest attention backend.