"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "e20c92bb618384ce8d0013e0c9ad273d0c23d65b"
Unverified Commit 6ae8bbd0 authored by Kunshang Ji's avatar Kunshang Ji Committed by GitHub
Browse files

[XPU] Disable xpu graph by default (#38193)


Signed-off-by: default avatarKunshang Ji <kunshang.ji@intel.com>
parent a9213c0f
...@@ -247,6 +247,7 @@ if TYPE_CHECKING: ...@@ -247,6 +247,7 @@ if TYPE_CHECKING:
VLLM_ELASTIC_EP_DRAIN_REQUESTS: bool = False VLLM_ELASTIC_EP_DRAIN_REQUESTS: bool = False
VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS: bool = False VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS: bool = False
VLLM_NIXL_EP_MAX_NUM_RANKS: int = 32 VLLM_NIXL_EP_MAX_NUM_RANKS: int = 32
VLLM_XPU_ENABLE_XPU_GRAPH: bool = False
def get_default_cache_root(): def get_default_cache_root():
...@@ -1648,6 +1649,10 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1648,6 +1649,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_NIXL_EP_MAX_NUM_RANKS": lambda: int( "VLLM_NIXL_EP_MAX_NUM_RANKS": lambda: int(
os.getenv("VLLM_NIXL_EP_MAX_NUM_RANKS", "32") os.getenv("VLLM_NIXL_EP_MAX_NUM_RANKS", "32")
), ),
# Whether enable XPU graph on Intel GPU
"VLLM_XPU_ENABLE_XPU_GRAPH": lambda: bool(
int(os.getenv("VLLM_XPU_ENABLE_XPU_GRAPH", "0"))
),
} }
......
...@@ -12,6 +12,7 @@ import vllm_xpu_kernels._C # noqa ...@@ -12,6 +12,7 @@ import vllm_xpu_kernels._C # noqa
import vllm_xpu_kernels._moe_C # noqa import vllm_xpu_kernels._moe_C # noqa
import vllm_xpu_kernels._xpu_C # noqa import vllm_xpu_kernels._xpu_C # noqa
import vllm.envs as envs
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils.torch_utils import supports_xpu_graph from vllm.utils.torch_utils import supports_xpu_graph
from vllm.v1.attention.backends.registry import AttentionBackendEnum from vllm.v1.attention.backends.registry import AttentionBackendEnum
...@@ -181,6 +182,12 @@ class XPUPlatform(Platform): ...@@ -181,6 +182,12 @@ class XPUPlatform(Platform):
"XPU Graph is not supported in the current PyTorch version, " "XPU Graph is not supported in the current PyTorch version, "
"disabling cudagraph_mode." "disabling cudagraph_mode."
) )
elif not envs.VLLM_XPU_ENABLE_XPU_GRAPH:
compilation_config.cudagraph_mode = CUDAGraphMode.NONE
logger.warning(
"XPU Graph is disabled by environment variable, "
"please set VLLM_XPU_ENABLE_XPU_GRAPH=1 to enable it."
)
elif parallel_config.world_size_across_dp > 1: elif parallel_config.world_size_across_dp > 1:
compilation_config.cudagraph_mode = CUDAGraphMode.NONE compilation_config.cudagraph_mode = CUDAGraphMode.NONE
logger.warning( logger.warning(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment