Unverified Commit 8e22da1d authored by rasmith's avatar rasmith Committed by GitHub
Browse files

[CI/Build Don't add FLASHINFER backend in test_cpu_offloading.py (#29229)


Signed-off-by: default avatarRandall Smith <ransmith@amd.com>
Co-authored-by: default avatarRandall Smith <ransmith@amd.com>
parent a4fdf240
...@@ -12,10 +12,14 @@ from tqdm import tqdm ...@@ -12,10 +12,14 @@ from tqdm import tqdm
from vllm import LLM, SamplingParams, TokensPrompt from vllm import LLM, SamplingParams, TokensPrompt
from vllm.config import KVEventsConfig, KVTransferConfig from vllm.config import KVEventsConfig, KVTransferConfig
from vllm.distributed.kv_events import BlockStored, KVEventBatch from vllm.distributed.kv_events import BlockStored, KVEventBatch
from vllm.platforms import current_platform
from vllm.utils.system_utils import set_env_var from vllm.utils.system_utils import set_env_var
CPU_BLOCK_SIZES = [48] CPU_BLOCK_SIZES = [48]
ATTN_BACKENDS = ["FLASH_ATTN", "FLASHINFER"] ATTN_BACKENDS = ["FLASH_ATTN"]
if current_platform.is_cuda():
ATTN_BACKENDS.append("FLASHINFER")
class MockSubscriber: class MockSubscriber:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment