[CI/Build Don't add FLASHINFER backend in test_cpu_offloading.py (#29229)

Signed-off-by: Randall Smith <ransmith@amd.com> Co-authored-by: Randall Smith <ransmith@amd.com>

[CI/Build Don't add FLASHINFER backend in test_cpu_offloading.py (#29229)
Signed-off-by: Randall Smith <ransmith@amd.com> Co-authored-by: Randall Smith <ransmith@amd.com>
8e22da1d · rasmith · GitHub · a4fdf240 · 8e22da1d
Unverified Commit 8e22da1d authored Nov 22, 2025 by rasmith Committed by GitHub Nov 22, 2025
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 1 deletion

tests/v1/kv_offload/test_cpu_offloading.py tests/v1/kv_offload/test_cpu_offloading.py +5 -1

No files found.
--- a/tests/v1/kv_offload/test_cpu_offloading.py
+++ b/tests/v1/kv_offload/test_cpu_offloading.py
@@ -12,10 +12,14 @@ from tqdm import tqdm
 from vllm import LLM, SamplingParams, TokensPrompt
 from vllm.config import KVEventsConfig, KVTransferConfig
 from vllm.distributed.kv_events import BlockStored, KVEventBatch
+from vllm.platforms import current_platform
 from vllm.utils.system_utils import set_env_var
 CPU_BLOCK_SIZES = [48]
-ATTN_BACKENDS = ["FLASH_ATTN", "FLASHINFER"]
+ATTN_BACKENDS = ["FLASH_ATTN"]
+if current_platform.is_cuda():
+    ATTN_BACKENDS.append("FLASHINFER")
 class MockSubscriber: