Unverified Commit 91b3d190 authored by Boyuan Feng's avatar Boyuan Feng Committed by GitHub
Browse files

[cold start] replace VLLM_COMPILE_DEPYF with debug_dump_dir (#20940)


Signed-off-by: default avatarBoyuan Feng <boyuan@meta.com>
parent fc017915
...@@ -93,27 +93,19 @@ class TorchCompileWrapperWithCustomDispatcher: ...@@ -93,27 +93,19 @@ class TorchCompileWrapperWithCustomDispatcher:
return return
self.compiled_codes.append(new_code) self.compiled_codes.append(new_code)
local_cache_dir = self.vllm_config.compilation_config.local_cache_dir debug_dump_dir = self.vllm_config.compilation_config.debug_dump_path
if isinstance(local_cache_dir, str): if isinstance(debug_dump_dir, str) and debug_dump_dir != "":
decompiled_file_name = ("transformed_code.py" rank = self.vllm_config.parallel_config.rank
if envs.VLLM_COMPILE_DEPYF else decompiled_file = os.path.join(debug_dump_dir, f"rank_{rank}",
"transformed_code_README.txt") "transformed_code.py")
decompiled_file = os.path.join(local_cache_dir,
decompiled_file_name)
if not os.path.exists(decompiled_file): if not os.path.exists(decompiled_file):
try: try:
# usually the decompilation will succeed for most models, # usually the decompilation will succeed for most models,
# as we guarantee a full-graph compilation in Dynamo. # as we guarantee a full-graph compilation in Dynamo.
# but there's no 100% guarantee, since decompliation is # but there's no 100% guarantee, since decompliation is
# not a reversible process. # not a reversible process.
if envs.VLLM_COMPILE_DEPYF:
import depyf import depyf
src = depyf.decompile(new_code) src = depyf.decompile(new_code)
else:
src = (
"To get a transformed_code.py file, re-run with "
"VLLM_COMPILE_DEPYF=1")
with open(decompiled_file, "w") as f: with open(decompiled_file, "w") as f:
f.write(src) f.write(src)
......
...@@ -97,7 +97,6 @@ if TYPE_CHECKING: ...@@ -97,7 +97,6 @@ if TYPE_CHECKING:
VLLM_ENABLE_V1_MULTIPROCESSING: bool = True VLLM_ENABLE_V1_MULTIPROCESSING: bool = True
VLLM_LOG_BATCHSIZE_INTERVAL: float = -1 VLLM_LOG_BATCHSIZE_INTERVAL: float = -1
VLLM_DISABLE_COMPILE_CACHE: bool = False VLLM_DISABLE_COMPILE_CACHE: bool = False
VLLM_COMPILE_DEPYF: bool = False
Q_SCALE_CONSTANT: int = 200 Q_SCALE_CONSTANT: int = 200
K_SCALE_CONSTANT: int = 200 K_SCALE_CONSTANT: int = 200
V_SCALE_CONSTANT: int = 100 V_SCALE_CONSTANT: int = 100
...@@ -742,11 +741,6 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -742,11 +741,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_DISABLE_COMPILE_CACHE": "VLLM_DISABLE_COMPILE_CACHE":
lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))), lambda: bool(int(os.getenv("VLLM_DISABLE_COMPILE_CACHE", "0"))),
# If set, vllm will decompile the torch compiled code and dump to
# transformed_code.py. This is useful for debugging.
"VLLM_COMPILE_DEPYF":
lambda: bool(int(os.getenv("VLLM_COMPILE_DEPYF", "0"))),
# If set, vllm will run in development mode, which will enable # If set, vllm will run in development mode, which will enable
# some additional endpoints for developing and debugging, # some additional endpoints for developing and debugging,
# e.g. `/reset_prefix_cache` # e.g. `/reset_prefix_cache`
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment