[ROCm][CI] v1 cpu offloading attention backend fix (#31833)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>

[ROCm][CI] v1 cpu offloading attention backend fix (#31833)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
5f2a473f · Andreas Karatzas · GitHub · 6b2a672e · 5f2a473f
Unverified Commit 5f2a473f authored Jan 08, 2026 by Andreas Karatzas Committed by GitHub Jan 08, 2026
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 2 deletions

tests/v1/kv_offload/test_cpu_offloading.py tests/v1/kv_offload/test_cpu_offloading.py +4 -2

No files found.
--- a/tests/v1/kv_offload/test_cpu_offloading.py
+++ b/tests/v1/kv_offload/test_cpu_offloading.py
@@ -15,10 +15,12 @@ from vllm.distributed.kv_events import BlockStored, KVEventBatch
 from vllm.platforms import current_platform

 CPU_BLOCK_SIZES = [48]
-ATTN_BACKENDS = ["FLASH_ATTN", "TRITON_ATTN"]
+ATTN_BACKENDS = []

 if current_platform.is_cuda():
-    ATTN_BACKENDS.append("FLASHINFER")
+    ATTN_BACKENDS = ["FLASH_ATTN", "FLASHINFER", "TRITON_ATTN"]
+elif current_platform.is_rocm():
+    ATTN_BACKENDS = ["TRITON_ATTN"]


 class MockSubscriber: