Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
91b3d190
Unverified
Commit
91b3d190
authored
Jul 14, 2025
by
Boyuan Feng
Committed by
GitHub
Jul 15, 2025
Browse files
[cold start] replace VLLM_COMPILE_DEPYF with debug_dump_dir (#20940)
Signed-off-by:
Boyuan Feng
<
boyuan@meta.com
>
parent
fc017915
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
21 deletions
+7
-21
vllm/compilation/wrapper.py
vllm/compilation/wrapper.py
+7
-15
vllm/envs.py
vllm/envs.py
+0
-6
No files found.
vllm/compilation/wrapper.py
View file @
91b3d190
...
...
@@ -93,27 +93,19 @@ class TorchCompileWrapperWithCustomDispatcher:
return
self
.
compiled_codes
.
append
(
new_code
)
local_cache_dir
=
self
.
vllm_config
.
compilation_config
.
local_cache_dir
if
isinstance
(
local_cache_dir
,
str
):
decompiled_file_name
=
(
"transformed_code.py"
if
envs
.
VLLM_COMPILE_DEPYF
else
"transformed_code_README.txt"
)
decompiled_file
=
os
.
path
.
join
(
local_cache_dir
,
decompiled_file_name
)
debug_dump_dir
=
self
.
vllm_config
.
compilation_config
.
debug_dump_path
if
isinstance
(
debug_dump_dir
,
str
)
and
debug_dump_dir
!=
""
:
rank
=
self
.
vllm_config
.
parallel_config
.
rank
decompiled_file
=
os
.
path
.
join
(
debug_dump_dir
,
f
"rank_
{
rank
}
"
,
"transformed_code.py"
)
if
not
os
.
path
.
exists
(
decompiled_file
):
try
:
# usually the decompilation will succeed for most models,
# as we guarantee a full-graph compilation in Dynamo.
# but there's no 100% guarantee, since decompliation is
# not a reversible process.
if
envs
.
VLLM_COMPILE_DEPYF
:
import
depyf
src
=
depyf
.
decompile
(
new_code
)
else
:
src
=
(
"To get a transformed_code.py file, re-run with "
"VLLM_COMPILE_DEPYF=1"
)
with
open
(
decompiled_file
,
"w"
)
as
f
:
f
.
write
(
src
)
...
...
vllm/envs.py
View file @
91b3d190
...
...
@@ -97,7 +97,6 @@ if TYPE_CHECKING:
VLLM_ENABLE_V1_MULTIPROCESSING
:
bool
=
True
VLLM_LOG_BATCHSIZE_INTERVAL
:
float
=
-
1
VLLM_DISABLE_COMPILE_CACHE
:
bool
=
False
VLLM_COMPILE_DEPYF
:
bool
=
False
Q_SCALE_CONSTANT
:
int
=
200
K_SCALE_CONSTANT
:
int
=
200
V_SCALE_CONSTANT
:
int
=
100
...
...
@@ -742,11 +741,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_DISABLE_COMPILE_CACHE"
:
lambda
:
bool
(
int
(
os
.
getenv
(
"VLLM_DISABLE_COMPILE_CACHE"
,
"0"
))),
# If set, vllm will decompile the torch compiled code and dump to
# transformed_code.py. This is useful for debugging.
"VLLM_COMPILE_DEPYF"
:
lambda
:
bool
(
int
(
os
.
getenv
(
"VLLM_COMPILE_DEPYF"
,
"0"
))),
# If set, vllm will run in development mode, which will enable
# some additional endpoints for developing and debugging,
# e.g. `/reset_prefix_cache`
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment