Unverified Commit b6f01bd9 authored by Yizhou's avatar Yizhou Committed by GitHub
Browse files

refactor: abstract graph mode support into platform interface (#25161)


Signed-off-by: default avatarYizhou Liu <liu_yizhou@outlook.com>
parent 4cf71cc8
......@@ -503,7 +503,7 @@ class VllmConfig:
if self.compilation_config.pass_config.enable_sequence_parallelism:
self.compilation_config.custom_ops.append("+rms_norm")
if current_platform.is_cuda_alike() or current_platform.is_xpu():
if current_platform.support_static_graph_mode():
# if cudagraph_mode is not explicitly set by users, set default
# value
if self.compilation_config.cudagraph_mode is None:
......
......@@ -498,6 +498,10 @@ class CudaPlatformBase(Platform):
def support_hybrid_kv_cache(cls) -> bool:
return True
@classmethod
def support_static_graph_mode(cls) -> bool:
return True
# NVML utils
# Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`,
......
......@@ -587,6 +587,13 @@ class Platform:
"""
return False
@classmethod
def support_static_graph_mode(cls) -> bool:
"""
Returns if the graph mode is supported by the current platform.
"""
return False
@classmethod
def use_sync_weight_loader(cls) -> bool:
"""
......
......@@ -477,3 +477,7 @@ class RocmPlatform(Platform):
@classmethod
def support_hybrid_kv_cache(cls) -> bool:
return True
@classmethod
def support_static_graph_mode(cls) -> bool:
return True
......@@ -113,12 +113,9 @@ class XPUPlatform(Platform):
# lazy import to avoid circular import
from vllm.config import CompilationLevel, CUDAGraphMode
compilation_config = vllm_config.compilation_config
if compilation_config.cudagraph_mode is None or \
compilation_config.cudagraph_mode.max_cudagraph_mode() \
!= CUDAGraphMode.NONE:
logger.info("[XPU] CUDA graph is not supported on XPU, disabling "
"cudagraphs. Fallback to cudagraph_mode=NONE")
compilation_config.cudagraph_mode = CUDAGraphMode.NONE
assert compilation_config.cudagraph_mode == CUDAGraphMode.NONE, \
"CUDA graph mode should be NONE on XPU"
if vllm_config.lora_config is not None:
compilation_config.level = CompilationLevel.NO_COMPILATION
......@@ -169,6 +166,10 @@ class XPUPlatform(Platform):
def support_hybrid_kv_cache(cls) -> bool:
return True
@classmethod
def support_static_graph_mode(cls) -> bool:
return False
@classmethod
def is_pin_memory_available(cls):
return True
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment