[Chore] Update more locations to use `attention_config.backend` (#31153)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[Chore] Update more locations to use `attention_config.backend` (#31153)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
8cef1376 · Cyrus Leung · GitHub · a37328fc · 8cef1376 · 8cef1376
Unverified Commit 8cef1376 authored Dec 23, 2025 by Cyrus Leung Committed by GitHub Dec 22, 2025
Showing with 3 additions and 2 deletions

benchmarks/benchmark_batch_invariance.py benchmarks/benchmark_batch_invariance.py +1 -1

tests/compile/distributed/test_fusions_e2e.py tests/compile/distributed/test_fusions_e2e.py +2 -1

No files found.
--- a/benchmarks/benchmark_batch_invariance.py
+++ b/benchmarks/benchmark_batch_invariance.py
@@ -104,7 +104,6 @@ def run_benchmark_with_batch_invariant(
    random.seed(seed)
    # Set environment variables
-    os.environ["VLLM_ATTENTION_BACKEND"] = backend
    if batch_invariant:
        os.environ["VLLM_BATCH_INVARIANT"] = "1"
    else:
@@ -140,6 +139,7 @@ def run_benchmark_with_batch_invariant(
            max_model_len=max_model_len,
            dtype="bfloat16",
            tensor_parallel_size=tp_size,
+            attention_config={"backend": backend},
            enable_prefix_caching=False,
        )
        init_time = time.perf_counter() - start_init

--- a/tests/compile/distributed/test_fusions_e2e.py
+++ b/tests/compile/distributed/test_fusions_e2e.py
@@ -557,7 +557,8 @@ def test_rms_group_quant(
    # To capture subprocess logs, we need to know whether spawn or fork is used.
    # Force spawn as it is more general.
    monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
-    monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend.name)
+    model_kwargs["attention_config"] = {"backend": backend.name}
    compilation_config = CompilationConfig(
        # Testing properties