Use standalone_compile by default in torch >= 2.8.0 (#18846)

Signed-off-by: rzou <zou3519@gmail.com>

Use standalone_compile by default in torch >= 2.8.0 (#18846)
Signed-off-by: rzou <zou3519@gmail.com>
a521ef06 · Richard Zou · GitHub · 64eaf5fe · a521ef06 · a521ef06
Unverified Commit a521ef06 authored May 29, 2025 by Richard Zou Committed by GitHub May 30, 2025
Showing with 12 additions and 9 deletions

vllm/compilation/backends.py vllm/compilation/backends.py +3 -2

vllm/compilation/compiler_interface.py vllm/compilation/compiler_interface.py +1 -1

vllm/envs.py vllm/envs.py +8 -6

No files found.
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -16,7 +16,7 @@ import vllm.envs as envs
 from vllm.config import CompilationConfig, VllmConfig
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
 from .compiler_interface import (CompilerInterface, EagerAdaptor,
                                 InductorAdaptor, InductorStandaloneAdaptor)
@@ -29,7 +29,8 @@ logger = init_logger(__name__)
 def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface:
    if compilation_config.use_inductor:
-        if envs.VLLM_TEST_STANDALONE_COMPILE:
+        if envs.VLLM_USE_STANDALONE_COMPILE and is_torch_equal_or_newer(
+                "2.8.0"):
            logger.info("Using InductorStandaloneAdaptor")
            return InductorStandaloneAdaptor()
        else:

--- a/vllm/compilation/compiler_interface.py
+++ b/vllm/compilation/compiler_interface.py
@@ -155,7 +155,7 @@ class InductorStandaloneAdaptor(CompilerInterface):
    This is not on by default yet, but we plan to turn it on by default for
    PyTorch 2.8.
-    Use VLLM_TEST_STANDALONE_COMPILE to toggle this on or off.
+    Use VLLM_USE_STANDALONE_COMPILE to toggle this on or off.
    """
    name = "inductor_standalone"

--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -308,9 +308,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
    lambda: bool(
        os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),
-    # Internal flag to enable/disable Inductor standalone compile
+    # Feature flag to enable/disable Inductor standalone compile.
-    "VLLM_TEST_STANDALONE_COMPILE":
+    # In torch <= 2.7 we ignore this flag; in torch >= 2.8 this is
-    lambda: os.environ.get("VLLM_TEST_STANDALONE_COMPILE", "0") != "0",
+    # enabled by default.
+    "VLLM_USE_STANDALONE_COMPILE":
+    lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "1") == "1",
    # local rank of the process in the distributed setting, used to determine
    # the GPU device id
@@ -892,7 +894,7 @@ def compute_hash() -> str:
        "VLLM_USE_TRITON_AWQ",
        "VLLM_DP_RANK",
        "VLLM_DP_SIZE",
-        "VLLM_TEST_STANDALONE_COMPILE",
+        "VLLM_USE_STANDALONE_COMPILE",
    ]
    for key in environment_variables_to_hash:
        if key in environment_variables: