Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e6ae4b1b
Unverified
Commit
e6ae4b1b
authored
Mar 16, 2026
by
Zhengxu Chen
Committed by
GitHub
Mar 16, 2026
Browse files
[compile] Enable mega aot artifact for torch 2.12+. (#37198)
Signed-off-by:
zhxchen17
<
zhxchen17@fb.com
>
parent
2dccb38f
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
12 deletions
+15
-12
vllm/compilation/caching.py
vllm/compilation/caching.py
+4
-8
vllm/envs.py
vllm/envs.py
+11
-4
No files found.
vllm/compilation/caching.py
View file @
e6ae4b1b
...
...
@@ -307,13 +307,6 @@ class VllmSerializableFunction(SerializableCallable): # type: ignore[misc]
num_submods
=
len
(
submod_names
)
num_artifacts
=
standalone_compile_artifacts
.
num_artifacts
()
logger
.
info
(
"reconstructing serializable fn from standalone compile "
"artifacts. num_artifacts=%d num_submods=%d"
,
num_artifacts
,
num_submods
,
)
with
functorch_ctx
:
fn
=
reconstruct_serializable_fn_from_mega_artifact
(
state
=
state
,
...
...
@@ -324,7 +317,10 @@ class VllmSerializableFunction(SerializableCallable): # type: ignore[misc]
)
logger
.
info
(
"reconstructed serializable fn from standalone compile artifacts"
"reconstructed serializable fn from standalone compile "
"artifacts. num_artifacts=%d num_submods=%d"
,
num_artifacts
,
num_submods
,
)
return
fn
...
...
vllm/envs.py
View file @
e6ae4b1b
...
...
@@ -296,6 +296,16 @@ def use_aot_compile() -> bool:
)
def
use_mega_aot_artifact
():
from
vllm.utils.torch_utils
import
is_torch_equal_or_newer
default_value
=
(
"1"
if
is_torch_equal_or_newer
(
"2.12.0.dev"
)
and
use_aot_compile
()
else
"0"
)
return
os
.
environ
.
get
(
"VLLM_USE_MEGA_AOT_ARTIFACT"
,
default_value
)
==
"1"
def
env_with_choices
(
env_name
:
str
,
default
:
str
|
None
,
...
...
@@ -616,10 +626,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Enable loading compiled models directly from cached standalone compile artifacts
# without re-splitting graph modules. This reduces overhead during model
# loading by using reconstruct_serializable_fn_from_mega_artifact.
"VLLM_USE_MEGA_AOT_ARTIFACT"
:
lambda
:
os
.
environ
.
get
(
"VLLM_USE_MEGA_AOT_ARTIFACT"
,
"0"
)
==
"1"
,
"VLLM_USE_MEGA_AOT_ARTIFACT"
:
use_mega_aot_artifact
,
# local rank of the process in the distributed setting, used to determine
# the GPU device id
"LOCAL_RANK"
:
lambda
:
int
(
os
.
environ
.
get
(
"LOCAL_RANK"
,
"0"
)),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment