[KV offload] Enable CPU KV offload on CUDA alike Platforms (#27770)

Signed-off-by: zhewenli <zhewenli@meta.com>

[KV offload] Enable CPU KV offload on CUDA alike Platforms (#27770)
Signed-off-by: zhewenli <zhewenli@meta.com>
0fe01404 · Zhewen Li · GitHub · 4e68cc9b · 0fe01404 · 0fe01404
Unverified Commit 0fe01404 authored Oct 30, 2025 by Zhewen Li Committed by GitHub Oct 30, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 6 deletions

tests/v1/kv_offload/test_cpu_offloading.py tests/v1/kv_offload/test_cpu_offloading.py +0 -4

vllm/v1/kv_offload/cpu.py vllm/v1/kv_offload/cpu.py +2 -2

No files found.
--- a/tests/v1/kv_offload/test_cpu_offloading.py
+++ b/tests/v1/kv_offload/test_cpu_offloading.py
@@ -12,7 +12,6 @@ from tqdm import tqdm
 from vllm import LLM, SamplingParams, TokensPrompt
 from vllm.config import KVEventsConfig, KVTransferConfig
 from vllm.distributed.kv_events import BlockStored, KVEventBatch
-from vllm.platforms import current_platform

 CPU_BLOCK_SIZES = [16, 48]

@@ -64,9 +63,6 @@ class MockSubscriber:
        self.sub.close()


-@pytest.mark.skipif(
-    not current_platform.is_cuda(), reason="CPU offloading only supported on CUDA"
-)
 @pytest.mark.parametrize("cpu_block_size", CPU_BLOCK_SIZES)
 def test_cpu_offloading(cpu_block_size: int) -> None:
    """

--- a/vllm/v1/kv_offload/cpu.py
+++ b/vllm/v1/kv_offload/cpu.py
@@ -51,9 +51,9 @@ class CPUOffloadingSpec(OffloadingSpec):
        self, kv_caches: dict[str, torch.Tensor]
    ) -> Iterator[tuple[type[LoadStoreSpec], type[LoadStoreSpec], OffloadingHandler]]:
        if not self._handler:
-            if not current_platform.is_cuda():
+            if not current_platform.is_cuda_alike():
                raise Exception(
-                    "CPU Offloading is currently only supported on CUDA GPUs"
+                    "CPU Offloading is currently only supported on CUDA-alike GPUs"
                )

            layer_names = list(kv_caches.keys())