[cold start] replace VLLM_COMPILE_DEPYF with debug_dump_dir (#20940)

Signed-off-by: Boyuan Feng <boyuan@meta.com>

[cold start] replace VLLM_COMPILE_DEPYF with debug_dump_dir (#20940)
Signed-off-by: Boyuan Feng <boyuan@meta.com>
91b3d190 · Boyuan Feng · GitHub · fc017915 · 91b3d190 · 91b3d190
Unverified Commit 91b3d190 authored Jul 14, 2025 by Boyuan Feng Committed by GitHub Jul 15, 2025
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 21 deletions

vllm/compilation/wrapper.py vllm/compilation/wrapper.py +7 -15

vllm/envs.py vllm/envs.py +0 -6

No files found.
--- a/vllm/compilation/wrapper.py
+++ b/vllm/compilation/wrapper.py
@@ -93,27 +93,19 @@ class TorchCompileWrapperWithCustomDispatcher:
            return
        self.compiled_codes.append(new_code)
-        local_cache_dir = self.vllm_config.compilation_config.local_cache_dir
+        debug_dump_dir = self.vllm_config.compilation_config.debug_dump_path
-        if isinstance(local_cache_dir, str):
+        if isinstance(debug_dump_dir, str) and debug_dump_dir != "":
-            decompiled_file_name = ("transformed_code.py"
+            rank = self.vllm_config.parallel_config.rank
-                                    if envs.VLLM_COMPILE_DEPYF else
+            decompiled_file = os.path.join(debug_dump_dir, f"rank_{rank}",
-                                    "transformed_code_README.txt")
+                                           "transformed_code.py")
-            decompiled_file = os.path.join(local_cache_dir,
-                                           decompiled_file_name)
            if not os.path.exists(decompiled_file):
                try:
                    # usually the decompilation will succeed for most models,
                    # as we guarantee a full-graph compilation in Dynamo.
                    # but there's no 100% guarantee, since decompliation is
                    # not a reversible process.
-                    if envs.VLLM_COMPILE_DEPYF:
                    import depyf
                    src = depyf.decompile(new_code)
-                    else:
-                        src = (
-                            "To get a transformed_code.py file, re-run with "
-                            "VLLM_COMPILE_DEPYF=1")
                    with open(decompiled_file, "w") as f:
                        f.write(src)

--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -97,7 +97,6 @@ if TYPE_CHECKING:
    VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
    VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
    VLLM_DISABLE_COMPILE_CACHE: bool = False
-    VLLM_COMPILE_DEPYF: bool = False
    Q_SCALE_CONSTANT: int = 200
    K_SCALE_CONSTANT: int = 200
    V_SCALE_CONSTANT: int = 100
@@ -742,11 +741,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_DISABLE_COMPILE_CACHE":
    lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))),
-    # If set, vllm will decompile the torch compiled code and dump to
-    # transformed_code.py. This is useful for debugging.
-    "VLLM_COMPILE_DEPYF":
-    lambda: bool(int(os.getenv("VLLM_COMPILE_DEPYF", "0"))),
    # If set, vllm will run in development mode, which will enable
    # some additional endpoints for developing and debugging,
    # e.g. `/reset_prefix_cache`