Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
89f572db
Unverified
Commit
89f572db
authored
Mar 24, 2026
by
Richard Zou
Committed by
GitHub
Mar 24, 2026
Browse files
[BugFix] fix VLLM_USE_STANDALONE_COMPILE=0 (#38015)
Signed-off-by:
Richard Zou
<
zou3519@gmail.com
>
parent
71a4a2fb
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
48 additions
and
0 deletions
+48
-0
tests/compile/test_aot_compile.py
tests/compile/test_aot_compile.py
+31
-0
vllm/compilation/compiler_interface.py
vllm/compilation/compiler_interface.py
+17
-0
No files found.
tests/compile/test_aot_compile.py
View file @
89f572db
...
...
@@ -441,6 +441,37 @@ def test_partition_wrapper_applied_on_aot_load(
)
@
create_new_process_for_each_test
(
"spawn"
)
def
test_standalone_compile_correctness
():
"""Outputs must match regardless of VLLM_USE_STANDALONE_COMPILE."""
import
json
from
..utils
import
compare_two_settings
compilation_config
=
json
.
dumps
(
{
"mode"
:
CompilationMode
.
VLLM_COMPILE
,
}
)
common_args
=
[
"--dtype"
,
"float16"
,
"--max-model-len"
,
"256"
,
"--compilation_config"
,
compilation_config
,
]
compare_two_settings
(
"facebook/opt-125m"
,
common_args
,
common_args
,
env1
=
{
"VLLM_USE_STANDALONE_COMPILE"
:
"1"
},
env2
=
{
"VLLM_USE_STANDALONE_COMPILE"
:
"0"
},
)
@
pytest
.
mark
.
skipif
(
not
is_torch_equal_or_newer
(
"2.10.0"
),
reason
=
"requires torch 2.10"
)
@
create_new_process_for_each_test
(
"spawn"
)
def
test_gpt2_cache_hit
(
monkeypatch
:
pytest
.
MonkeyPatch
):
...
...
vllm/compilation/compiler_interface.py
View file @
89f572db
...
...
@@ -632,6 +632,23 @@ class InductorAdaptor(CompilerInterface):
)
stack
.
enter_context
(
_patch_constrain_to_fx_strides
())
# Clear the tracing context before calling compile_fx.
# vLLM calls compile_fx from within a PiecewiseCompileInterpreter
# that runs under Dynamo's tracing context. The tracing context
# has a FakeTensorMode from Dynamo, but the example inputs for
# this subgraph have fake tensors from a different FakeTensorMode.
# compile_fx's _compile_fx_main calls detect_fake_mode() which
# asserts all FakeTensorModes match, causing a crash.
# Clearing the tracing context lets compile_fx create its own.
saved_tracing_context
=
torch
.
_guards
.
TracingContext
.
try_get
()
if
saved_tracing_context
is
not
None
:
torch
.
_guards
.
_TLS
.
tracing_context
=
None
def
_restore_tracing_context
():
torch
.
_guards
.
_TLS
.
tracing_context
=
saved_tracing_context
stack
.
callback
(
_restore_tracing_context
)
compiled_graph
=
compile_fx
(
graph
,
example_inputs
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment