Unverified Commit 0fe01404 authored by Zhewen Li's avatar Zhewen Li Committed by GitHub
Browse files

[KV offload] Enable CPU KV offload on CUDA alike Platforms (#27770)


Signed-off-by: default avatarzhewenli <zhewenli@meta.com>
parent 4e68cc9b
......@@ -12,7 +12,6 @@ from tqdm import tqdm
from vllm import LLM, SamplingParams, TokensPrompt
from vllm.config import KVEventsConfig, KVTransferConfig
from vllm.distributed.kv_events import BlockStored, KVEventBatch
from vllm.platforms import current_platform
CPU_BLOCK_SIZES = [16, 48]
......@@ -64,9 +63,6 @@ class MockSubscriber:
self.sub.close()
@pytest.mark.skipif(
not current_platform.is_cuda(), reason="CPU offloading only supported on CUDA"
)
@pytest.mark.parametrize("cpu_block_size", CPU_BLOCK_SIZES)
def test_cpu_offloading(cpu_block_size: int) -> None:
"""
......
......@@ -51,9 +51,9 @@ class CPUOffloadingSpec(OffloadingSpec):
self, kv_caches: dict[str, torch.Tensor]
) -> Iterator[tuple[type[LoadStoreSpec], type[LoadStoreSpec], OffloadingHandler]]:
if not self._handler:
if not current_platform.is_cuda():
if not current_platform.is_cuda_alike():
raise Exception(
"CPU Offloading is currently only supported on CUDA GPUs"
"CPU Offloading is currently only supported on CUDA-alike GPUs"
)
layer_names = list(kv_caches.keys())
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment