Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6ae8bbd0
Unverified
Commit
6ae8bbd0
authored
Mar 26, 2026
by
Kunshang Ji
Committed by
GitHub
Mar 26, 2026
Browse files
[XPU] Disable xpu graph by default (#38193)
Signed-off-by:
Kunshang Ji
<
kunshang.ji@intel.com
>
parent
a9213c0f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
0 deletions
+12
-0
vllm/envs.py
vllm/envs.py
+5
-0
vllm/platforms/xpu.py
vllm/platforms/xpu.py
+7
-0
No files found.
vllm/envs.py
View file @
6ae8bbd0
...
@@ -247,6 +247,7 @@ if TYPE_CHECKING:
...
@@ -247,6 +247,7 @@ if TYPE_CHECKING:
VLLM_ELASTIC_EP_DRAIN_REQUESTS
:
bool
=
False
VLLM_ELASTIC_EP_DRAIN_REQUESTS
:
bool
=
False
VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS
:
bool
=
False
VLLM_MEMORY_PROFILER_ESTIMATE_CUDAGRAPHS
:
bool
=
False
VLLM_NIXL_EP_MAX_NUM_RANKS
:
int
=
32
VLLM_NIXL_EP_MAX_NUM_RANKS
:
int
=
32
VLLM_XPU_ENABLE_XPU_GRAPH
:
bool
=
False
def
get_default_cache_root
():
def
get_default_cache_root
():
...
@@ -1648,6 +1649,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
...
@@ -1648,6 +1649,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_NIXL_EP_MAX_NUM_RANKS"
:
lambda
:
int
(
"VLLM_NIXL_EP_MAX_NUM_RANKS"
:
lambda
:
int
(
os
.
getenv
(
"VLLM_NIXL_EP_MAX_NUM_RANKS"
,
"32"
)
os
.
getenv
(
"VLLM_NIXL_EP_MAX_NUM_RANKS"
,
"32"
)
),
),
# Whether enable XPU graph on Intel GPU
"VLLM_XPU_ENABLE_XPU_GRAPH"
:
lambda
:
bool
(
int
(
os
.
getenv
(
"VLLM_XPU_ENABLE_XPU_GRAPH"
,
"0"
))
),
}
}
...
...
vllm/platforms/xpu.py
View file @
6ae8bbd0
...
@@ -12,6 +12,7 @@ import vllm_xpu_kernels._C # noqa
...
@@ -12,6 +12,7 @@ import vllm_xpu_kernels._C # noqa
import
vllm_xpu_kernels._moe_C
# noqa
import
vllm_xpu_kernels._moe_C
# noqa
import
vllm_xpu_kernels._xpu_C
# noqa
import
vllm_xpu_kernels._xpu_C
# noqa
import
vllm.envs
as
envs
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.utils.torch_utils
import
supports_xpu_graph
from
vllm.utils.torch_utils
import
supports_xpu_graph
from
vllm.v1.attention.backends.registry
import
AttentionBackendEnum
from
vllm.v1.attention.backends.registry
import
AttentionBackendEnum
...
@@ -181,6 +182,12 @@ class XPUPlatform(Platform):
...
@@ -181,6 +182,12 @@ class XPUPlatform(Platform):
"XPU Graph is not supported in the current PyTorch version, "
"XPU Graph is not supported in the current PyTorch version, "
"disabling cudagraph_mode."
"disabling cudagraph_mode."
)
)
elif
not
envs
.
VLLM_XPU_ENABLE_XPU_GRAPH
:
compilation_config
.
cudagraph_mode
=
CUDAGraphMode
.
NONE
logger
.
warning
(
"XPU Graph is disabled by environment variable, "
"please set VLLM_XPU_ENABLE_XPU_GRAPH=1 to enable it."
)
elif
parallel_config
.
world_size_across_dp
>
1
:
elif
parallel_config
.
world_size_across_dp
>
1
:
compilation_config
.
cudagraph_mode
=
CUDAGraphMode
.
NONE
compilation_config
.
cudagraph_mode
=
CUDAGraphMode
.
NONE
logger
.
warning
(
logger
.
warning
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment