Unverified Commit 5206ab20 authored by Kunshang Ji's avatar Kunshang Ji Committed by GitHub
Browse files

[XPU] Fix circular import error. (#24927)


Signed-off-by: default avatarKunshang Ji <kunshang.ji@intel.com>
parent 0af3ce13
...@@ -9,7 +9,6 @@ import torch ...@@ -9,7 +9,6 @@ import torch
import vllm.envs as envs import vllm.envs as envs
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import DEFAULT_MAX_NUM_BATCHED_TOKENS from vllm.utils import DEFAULT_MAX_NUM_BATCHED_TOKENS
from vllm.v1.attention.backends.utils import set_kv_cache_layout
from .interface import DeviceCapability, Platform, PlatformEnum, _Backend from .interface import DeviceCapability, Platform, PlatformEnum, _Backend
...@@ -164,11 +163,16 @@ class XPUPlatform(Platform): ...@@ -164,11 +163,16 @@ class XPUPlatform(Platform):
vllm_config.scheduler_config.max_num_batched_tokens = max( vllm_config.scheduler_config.max_num_batched_tokens = max(
vllm_config.scheduler_config.max_model_len, vllm_config.scheduler_config.max_model_len,
DEFAULT_MAX_NUM_BATCHED_TOKENS) DEFAULT_MAX_NUM_BATCHED_TOKENS)
from vllm.v1.attention.backends.utils import set_kv_cache_layout
set_kv_cache_layout("NHD") set_kv_cache_layout("NHD")
logger.info("Setting VLLM_KV_CACHE_LAYOUT to 'NHD' for XPU; " logger.info("Setting VLLM_KV_CACHE_LAYOUT to 'NHD' for XPU; "
"only NHD layout is supported by XPU attention kernels.") "only NHD layout is supported by XPU attention kernels.")
@classmethod
def support_hybrid_kv_cache(cls) -> bool:
return True
@classmethod @classmethod
def is_pin_memory_available(cls): def is_pin_memory_available(cls):
return True return True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment