Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
dd6dbd93
Unverified
Commit
dd6dbd93
authored
Mar 04, 2026
by
Zhengxu Chen
Committed by
GitHub
Mar 05, 2026
Browse files
[compile] Fix extra cache save on warm start. (#35921)
Signed-off-by:
zhxchen17
<
zhxchen17@fb.com
>
parent
26366009
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
6 deletions
+27
-6
tests/compile/test_startup.py
tests/compile/test_startup.py
+4
-4
vllm/compilation/backends.py
vllm/compilation/backends.py
+23
-2
No files found.
tests/compile/test_startup.py
View file @
dd6dbd93
...
...
@@ -61,11 +61,11 @@ def test_moe_startup(monkeypatch, vllm_runner, fresh_vllm_cache):
counters
.
clear
()
with
compilation_counter
.
expect
(
num_compiled_artifacts_loaded
=
3
,
# TODO: warm start should not save any artifacts
# https://github.com/vllm-project/vllm/issues/35708
num_compiled_artifacts_saved
=
1
,
num_compiled_artifacts_saved
=
0
,
):
_run_vllm
(
vllm_runner
)
assert
counters
[
"aot_autograd"
][
"total"
]
==
30
assert
counters
[
"aot_autograd"
][
"autograd_cache_miss"
]
==
0
assert
counters
[
"aot_autograd"
][
"autograd_cache_hit"
]
==
1
assert
(
counters
[
"aot_autograd"
][
"autograd_cache_hit"
]
==
0
)
# No miss at aot_autograd level causing disk I/O.
vllm/compilation/backends.py
View file @
dd6dbd93
...
...
@@ -221,10 +221,28 @@ class CompilerManager:
)
->
Callable
[...,
Any
]
|
None
:
if
(
compile_range
,
graph_index
,
self
.
compiler
.
name
)
not
in
self
.
cache
:
return
None
handle
=
self
.
cache
[(
compile_range
,
graph_index
,
self
.
compiler
.
name
)]
def
parse_value
(
value
:
Any
)
->
tuple
[
tuple
[
str
,
str
],
str
]:
assert
isinstance
(
value
,
dict
)
handle
=
value
[
"graph_handle"
]
assert
isinstance
(
handle
[
0
],
str
)
assert
isinstance
(
handle
[
1
],
str
)
cache_key
=
value
[
"cache_key"
]
return
handle
,
cache_key
try
:
handle
,
cache_key
=
parse_value
(
self
.
cache
[(
compile_range
,
graph_index
,
self
.
compiler
.
name
)]
)
except
Exception
:
# When the cache is outdated, we should ignore the existing file.
# This should cause the correct cache to be generated again.
return
None
compiled_graph
=
self
.
compiler
.
load
(
handle
,
graph
,
example_inputs
,
graph_index
,
compile_range
)
self
.
loaded_artifacts
[
cache_key
]
=
compiled_graph
logger
.
debug
(
"Directly load the %s-th graph for compile range %sfrom %s via handle %s"
,
graph_index
,
...
...
@@ -341,7 +359,10 @@ class CompilerManager:
# store the artifact in the cache
if
is_compile_cache_enabled
(
additional_inductor_config
)
and
handle
is
not
None
:
self
.
cache
[(
compile_range
,
graph_index
,
self
.
compiler
.
name
)]
=
handle
self
.
cache
[(
compile_range
,
graph_index
,
self
.
compiler
.
name
)]
=
{
"graph_handle"
:
handle
,
"cache_key"
:
cache_key
,
}
compilation_counter
.
num_cache_entries_updated
+=
1
self
.
is_cache_updated
=
True
if
graph_index
==
0
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment