Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2e089b96
Unverified
Commit
2e089b96
authored
Mar 20, 2026
by
Zhengxu Chen
Committed by
GitHub
Mar 20, 2026
Browse files
[compile] Add compiled artifact counter for VLLM_USE_MEGA_AOT_ARTIFACT=1. (#37589)
Signed-off-by:
zhxchen17
<
zhxchen17@fb.com
>
parent
880be2b1
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
2 deletions
+17
-2
tests/compile/test_startup.py
tests/compile/test_startup.py
+14
-2
vllm/compilation/caching.py
vllm/compilation/caching.py
+3
-0
No files found.
tests/compile/test_startup.py
View file @
2e089b96
...
...
@@ -9,11 +9,15 @@ then runs in the parent with clean in-memory state but populated caches.
import
multiprocessing
as
mp
import
pytest
from
torch._dynamo.utils
import
counters
import
vllm.envs
as
envs
from
vllm.compilation.counter
import
compilation_counter
from
vllm.config
import
CompilationConfig
,
CompilationMode
,
CUDAGraphMode
from
..utils
import
fork_new_process_for_each_test
MODEL
=
"microsoft/Phi-tiny-MoE-instruct"
...
...
@@ -45,8 +49,11 @@ def _cold_start(vllm_runner):
assert
counters
[
"aot_autograd"
][
"autograd_cache_hit"
]
==
0
def
test_moe_startup
(
monkeypatch
,
vllm_runner
,
fresh_vllm_cache
):
@
fork_new_process_for_each_test
@
pytest
.
mark
.
parametrize
(
"mega_aot_artifact"
,
[
"0"
,
"1"
])
def
test_moe_startup
(
monkeypatch
,
vllm_runner
,
fresh_vllm_cache
,
mega_aot_artifact
):
monkeypatch
.
setenv
(
"VLLM_ENABLE_V1_MULTIPROCESSING"
,
"0"
)
monkeypatch
.
setenv
(
"VLLM_USE_MEGA_AOT_ARTIFACT"
,
mega_aot_artifact
)
# Cold start in a forked child (must fork before CUDA init).
# This model has 32 identical transformer layers which produce
...
...
@@ -64,7 +71,12 @@ def test_moe_startup(monkeypatch, vllm_runner, fresh_vllm_cache):
num_compiled_artifacts_saved
=
0
,
):
_run_vllm
(
vllm_runner
)
assert
counters
[
"aot_autograd"
][
"total"
]
==
30
if
envs
.
VLLM_USE_MEGA_AOT_ARTIFACT
:
# MEGA_AOT_ARTIFACT is enabled, so we expect no aot_autograd running on
# subgraphs.
assert
counters
[
"aot_autograd"
][
"total"
]
==
0
else
:
assert
counters
[
"aot_autograd"
][
"total"
]
==
30
assert
counters
[
"aot_autograd"
][
"autograd_cache_miss"
]
==
0
assert
(
counters
[
"aot_autograd"
][
"autograd_cache_hit"
]
==
0
...
...
vllm/compilation/caching.py
View file @
2e089b96
...
...
@@ -17,6 +17,7 @@ from torch.utils import _pytree as pytree
import
vllm.envs
as
envs
from
vllm.compilation.compiler_interface
import
get_inductor_factors
from
vllm.compilation.counter
import
compilation_counter
from
vllm.config
import
VllmConfig
,
get_current_vllm_config
from
vllm.config.utils
import
hash_factors
from
vllm.logger
import
init_logger
...
...
@@ -61,6 +62,7 @@ class StandaloneCompiledArtifacts:
self
.
submodule_bytes
[
f
"
{
submod_name
}
_
{
shape
}
"
]
=
hex_digest
if
hex_digest
not
in
self
.
submodule_bytes_store
:
self
.
submodule_bytes_store
[
hex_digest
]
=
entry
compilation_counter
.
num_compiled_artifacts_saved
+=
1
logger
.
debug
(
"inserting new artifact for submod %s with shape %s "
"(%s bytes) at hash %s"
,
...
...
@@ -124,6 +126,7 @@ class StandaloneCompiledArtifacts:
def
_load_entry
(
entry_bytes
:
bytes
)
->
AOTCompiledArtifact
:
entry
=
pickle
.
loads
(
entry_bytes
)
compilation_counter
.
num_compiled_artifacts_loaded
+=
1
return
AOTCompiledArtifact
.
deserialize
(
entry
)
with
concurrent
.
futures
.
ThreadPoolExecutor
()
as
executor
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment