Unverified Commit 5f2a473f authored by Andreas Karatzas's avatar Andreas Karatzas Committed by GitHub
Browse files

[ROCm][CI] v1 cpu offloading attention backend fix (#31833)


Signed-off-by: default avatarAndreas Karatzas <akaratza@amd.com>
parent 6b2a672e
...@@ -15,10 +15,12 @@ from vllm.distributed.kv_events import BlockStored, KVEventBatch ...@@ -15,10 +15,12 @@ from vllm.distributed.kv_events import BlockStored, KVEventBatch
from vllm.platforms import current_platform from vllm.platforms import current_platform
CPU_BLOCK_SIZES = [48] CPU_BLOCK_SIZES = [48]
ATTN_BACKENDS = ["FLASH_ATTN", "TRITON_ATTN"] ATTN_BACKENDS = []
if current_platform.is_cuda(): if current_platform.is_cuda():
ATTN_BACKENDS.append("FLASHINFER") ATTN_BACKENDS = ["FLASH_ATTN", "FLASHINFER", "TRITON_ATTN"]
elif current_platform.is_rocm():
ATTN_BACKENDS = ["TRITON_ATTN"]
class MockSubscriber: class MockSubscriber:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment