Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
341eed3d
Unverified
Commit
341eed3d
authored
Feb 10, 2026
by
Richard Zou
Committed by
GitHub
Feb 10, 2026
Browse files
[torch.compile] Disable recursive pre_grad_passes (#34092)
Signed-off-by:
Richard Zou
<
zou3519@gmail.com
>
parent
6f2f59f2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
24 additions
and
1 deletion
+24
-1
vllm/compilation/compiler_interface.py
vllm/compilation/compiler_interface.py
+14
-1
vllm/envs.py
vllm/envs.py
+10
-0
No files found.
vllm/compilation/compiler_interface.py
View file @
341eed3d
...
@@ -257,7 +257,20 @@ class InductorStandaloneAdaptor(CompilerInterface):
...
@@ -257,7 +257,20 @@ class InductorStandaloneAdaptor(CompilerInterface):
if
use_aot
:
if
use_aot
:
compile_kwargs
[
"aot"
]
=
True
# type: ignore[assignment]
compile_kwargs
[
"aot"
]
=
True
# type: ignore[assignment]
compiled_graph
=
standalone_compile
(
graph
,
example_inputs
,
**
compile_kwargs
)
# Inductor's pre-grad passes don't do anything for vLLM.
# The pre-grad passes get run even on cache-hit and negatively impact
# vllm cold compile times by O(1s)
# Can remove this after the following issue gets fixed
# https://github.com/pytorch/pytorch/issues/174502
if
envs
.
VLLM_ENABLE_PREGRAD_PASSES
:
ctx
:
Any
=
contextlib
.
nullcontext
()
else
:
ctx
=
patch
(
"torch._inductor.compile_fx._recursive_pre_grad_passes"
,
lambda
gm
,
_
:
gm
,
)
with
ctx
:
compiled_graph
=
standalone_compile
(
graph
,
example_inputs
,
**
compile_kwargs
)
if
use_aot
:
if
use_aot
:
from
torch._inductor.standalone_compile
import
AOTCompiledArtifact
from
torch._inductor.standalone_compile
import
AOTCompiledArtifact
...
...
vllm/envs.py
View file @
341eed3d
...
@@ -132,6 +132,7 @@ if TYPE_CHECKING:
...
@@ -132,6 +132,7 @@ if TYPE_CHECKING:
VLLM_DP_RANK_LOCAL
:
int
=
-
1
VLLM_DP_RANK_LOCAL
:
int
=
-
1
VLLM_DP_SIZE
:
int
=
1
VLLM_DP_SIZE
:
int
=
1
VLLM_USE_STANDALONE_COMPILE
:
bool
=
True
VLLM_USE_STANDALONE_COMPILE
:
bool
=
True
VLLM_ENABLE_PREGRAD_PASSES
:
bool
=
False
VLLM_DP_MASTER_IP
:
str
=
""
VLLM_DP_MASTER_IP
:
str
=
""
VLLM_DP_MASTER_PORT
:
int
=
0
VLLM_DP_MASTER_PORT
:
int
=
0
VLLM_MOE_DP_CHUNK_SIZE
:
int
=
256
VLLM_MOE_DP_CHUNK_SIZE
:
int
=
256
...
@@ -568,6 +569,15 @@ environment_variables: dict[str, Callable[[], Any]] = {
...
@@ -568,6 +569,15 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_USE_STANDALONE_COMPILE"
,
"1"
"VLLM_USE_STANDALONE_COMPILE"
,
"1"
)
)
==
"1"
,
==
"1"
,
# Inductor's pre-grad passes don't do anything for vLLM.
# The pre-grad passes get run even on cache-hit and negatively impact
# vllm cold compile times by O(1s)
# Can remove this after the following issue gets fixed
# https://github.com/pytorch/pytorch/issues/174502
"VLLM_ENABLE_PREGRAD_PASSES"
:
lambda
:
os
.
environ
.
get
(
"VLLM_ENABLE_PREGRAD_PASSES"
,
"0"
)
==
"1"
,
# Debug pattern matching inside custom passes.
# Debug pattern matching inside custom passes.
# Should be set to the fx.Node name (e.g. 'getitem_34' or 'scaled_mm_3').
# Should be set to the fx.Node name (e.g. 'getitem_34' or 'scaled_mm_3').
"VLLM_PATTERN_MATCH_DEBUG"
:
lambda
:
os
.
environ
.
get
(
"VLLM_PATTERN_MATCH_DEBUG"
:
lambda
:
os
.
environ
.
get
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment