Unverified Commit a521ef06 authored by Richard Zou's avatar Richard Zou Committed by GitHub
Browse files

Use standalone_compile by default in torch >= 2.8.0 (#18846)


Signed-off-by: default avatarrzou <zou3519@gmail.com>
parent 64eaf5fe
...@@ -16,7 +16,7 @@ import vllm.envs as envs ...@@ -16,7 +16,7 @@ import vllm.envs as envs
from vllm.config import CompilationConfig, VllmConfig from vllm.config import CompilationConfig, VllmConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import resolve_obj_by_qualname from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
from .compiler_interface import (CompilerInterface, EagerAdaptor, from .compiler_interface import (CompilerInterface, EagerAdaptor,
InductorAdaptor, InductorStandaloneAdaptor) InductorAdaptor, InductorStandaloneAdaptor)
...@@ -29,7 +29,8 @@ logger = init_logger(__name__) ...@@ -29,7 +29,8 @@ logger = init_logger(__name__)
def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface: def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface:
if compilation_config.use_inductor: if compilation_config.use_inductor:
if envs.VLLM_TEST_STANDALONE_COMPILE: if envs.VLLM_USE_STANDALONE_COMPILE and is_torch_equal_or_newer(
"2.8.0"):
logger.info("Using InductorStandaloneAdaptor") logger.info("Using InductorStandaloneAdaptor")
return InductorStandaloneAdaptor() return InductorStandaloneAdaptor()
else: else:
......
...@@ -155,7 +155,7 @@ class InductorStandaloneAdaptor(CompilerInterface): ...@@ -155,7 +155,7 @@ class InductorStandaloneAdaptor(CompilerInterface):
This is not on by default yet, but we plan to turn it on by default for This is not on by default yet, but we plan to turn it on by default for
PyTorch 2.8. PyTorch 2.8.
Use VLLM_TEST_STANDALONE_COMPILE to toggle this on or off. Use VLLM_USE_STANDALONE_COMPILE to toggle this on or off.
""" """
name = "inductor_standalone" name = "inductor_standalone"
......
...@@ -308,9 +308,11 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -308,9 +308,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
lambda: bool( lambda: bool(
os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"), os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),
# Internal flag to enable/disable Inductor standalone compile # Feature flag to enable/disable Inductor standalone compile.
"VLLM_TEST_STANDALONE_COMPILE": # In torch <= 2.7 we ignore this flag; in torch >= 2.8 this is
lambda: os.environ.get("VLLM_TEST_STANDALONE_COMPILE", "0") != "0", # enabled by default.
"VLLM_USE_STANDALONE_COMPILE":
lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "1") == "1",
# local rank of the process in the distributed setting, used to determine # local rank of the process in the distributed setting, used to determine
# the GPU device id # the GPU device id
...@@ -892,7 +894,7 @@ def compute_hash() -> str: ...@@ -892,7 +894,7 @@ def compute_hash() -> str:
"VLLM_USE_TRITON_AWQ", "VLLM_USE_TRITON_AWQ",
"VLLM_DP_RANK", "VLLM_DP_RANK",
"VLLM_DP_SIZE", "VLLM_DP_SIZE",
"VLLM_TEST_STANDALONE_COMPILE", "VLLM_USE_STANDALONE_COMPILE",
] ]
for key in environment_variables_to_hash: for key in environment_variables_to_hash:
if key in environment_variables: if key in environment_variables:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment