Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bdd8981d
Unverified
Commit
bdd8981d
authored
Mar 10, 2026
by
Zhengxu Chen
Committed by
GitHub
Mar 10, 2026
Browse files
[compile] Apply stored functorch config while finalizing loaded artifacts. (#36582)
Signed-off-by:
zhxchen17
<
zhxchen17@fb.com
>
parent
f088a831
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
25 deletions
+15
-25
vllm/compilation/caching.py
vllm/compilation/caching.py
+7
-1
vllm/compilation/piecewise_backend.py
vllm/compilation/piecewise_backend.py
+8
-24
No files found.
vllm/compilation/caching.py
View file @
bdd8981d
...
...
@@ -369,8 +369,14 @@ class VllmSerializableFunction(SerializableCallable): # type: ignore[misc]
from
vllm.compilation.backends
import
VllmBackend
saved_aot_autograd_config
=
self
.
aot_autograd_config
if
saved_aot_autograd_config
is
not
None
:
functorch_ctx
=
torch
.
_functorch
.
config
.
patch
(
saved_aot_autograd_config
)
else
:
functorch_ctx
=
contextlib
.
nullcontext
()
vllm_backend
=
VllmBackend
(
vllm_config
,
self
.
prefix
,
self
.
is_encoder
)
with
tracing
(
TracingContext
(
self
.
_fake_mode
)):
with
tracing
(
TracingContext
(
self
.
_fake_mode
))
,
functorch_ctx
:
result
=
vllm_backend
(
self
.
graph_module
,
list
(
self
.
example_inputs
))
self
.
optimized_call
=
result
.
optimized_call
self
.
vllm_backend
=
vllm_backend
...
...
vllm/compilation/piecewise_backend.py
View file @
bdd8981d
...
...
@@ -258,31 +258,15 @@ class PiecewiseBackend:
else
:
args_list
=
get_fake_args_from_graph
(
self
.
graph
)
# TODO(https://github.com/vllm-project/vllm/issues/35766)
# Can we remove strict_autograd_cache and
# force_non_lazy_backward_lowering overrides?
# I added them explicitly because this is what they are
# set to before the refactor
# (https://github.com/vllm-project/vllm/pull/35472).
# They affect the aotautograd cache key computation
# but they shouldn't have any effect on the actual
# compilation.
config_patches
=
dict
(
bundled_autograd_cache
=
True
,
strict_autograd_cache
=
False
,
range_entry
.
runnable
=
self
.
vllm_backend
.
compiler_manager
.
compile
(
self
.
graph
,
args_list
,
self
.
vllm_backend
.
inductor_config
,
self
.
compilation_config
,
compile_range
=
range_entry
.
compile_range
,
graph_index
=
self
.
piecewise_compile_index
,
num_graphs
=
self
.
total_piecewise_compiles
,
)
if
hasattr
(
torch
.
_functorch
.
config
,
"force_non_lazy_backward_lowering"
):
config_patches
[
"force_non_lazy_backward_lowering"
]
=
False
with
torch
.
_functorch
.
config
.
patch
(
**
config_patches
):
range_entry
.
runnable
=
self
.
vllm_backend
.
compiler_manager
.
compile
(
self
.
graph
,
args_list
,
self
.
vllm_backend
.
inductor_config
,
self
.
compilation_config
,
compile_range
=
range_entry
.
compile_range
,
graph_index
=
self
.
piecewise_compile_index
,
num_graphs
=
self
.
total_piecewise_compiles
,
)
range_entry
.
compiled
=
True
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment