Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1fa1e53a
Unverified
Commit
1fa1e53a
authored
Mar 20, 2026
by
Simon Mo
Committed by
GitHub
Mar 20, 2026
Browse files
Revert "[compile] Initialize passes at VllmBackend init" (#37733)
parent
3ffa5200
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
19 deletions
+5
-19
tests/test_config.py
tests/test_config.py
+2
-2
vllm/compilation/backends.py
vllm/compilation/backends.py
+3
-12
vllm/compilation/decorators.py
vllm/compilation/decorators.py
+0
-5
No files found.
tests/test_config.py
View file @
1fa1e53a
...
...
@@ -32,9 +32,9 @@ from vllm.platforms import current_platform
def
test_compile_config_repr_succeeds
():
# setup: VllmBackend mutates the config object
# Note: VllmBackend.__init__ already calls configure_post_pass()
config
=
VllmConfig
()
_
=
VllmBackend
(
config
)
backend
=
VllmBackend
(
config
)
backend
.
configure_post_pass
()
# test that repr(config) succeeds
val
=
repr
(
config
)
...
...
vllm/compilation/backends.py
View file @
1fa1e53a
...
...
@@ -836,18 +836,8 @@ class VllmBackend:
# in future we need PostGradPassManager.uuid() to be executed
# only at compile time.
self
.
inductor_config
=
deepcopy
(
self
.
compilation_config
.
inductor_compile_config
)
# Configure post-grad passes (including AllReduceFusionPass) during
# backend init rather than at torch.compile time, so that expensive
# one-time setup (e.g. FlashInfer workspace allocation) is not
# attributed to compilation latency.
start
=
time
.
time
()
self
.
configure_post_pass
()
logger
.
info_once
(
"Post-grad pass configuration time: %.2f s"
,
time
.
time
()
-
start
,
scope
=
"local"
,
)
# `torch.compile` is JIT compiled, so we don't need to
# do anything here
def
collect_standalone_compile_artifacts
(
self
,
...
...
@@ -1128,6 +1118,7 @@ class VllmBackend:
assert
not
self
.
_called
,
"VllmBackend can only be called once"
self
.
graph
=
graph
self
.
configure_post_pass
()
if
self
.
compilation_config
.
use_inductor_graph_partition
:
# Let Inductor decide partitioning; avoid FX-level pre-splitting.
...
...
vllm/compilation/decorators.py
View file @
1fa1e53a
...
...
@@ -380,11 +380,6 @@ def _support_torch_compile(
compilation_counter
.
num_models_seen
+=
1
self
.
compiled
=
False
# Skip if a parent class's @support_torch_compile already
# initialized the compile wrapper
if
hasattr
(
self
,
"_compiled_callable"
):
return
# Handled by monkeypatching `TorchCompileWithNoGuardsWrapper` into base class
TorchCompileWithNoGuardsWrapper
.
__init__
(
self
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment