Unverified Commit 424033f4 authored by Richard Zou's avatar Richard Zou Committed by GitHub
Browse files

[Bugfix] Include inductor and functorch configs in compilation cache key (#40627)


Signed-off-by: default avatarRichard Zou <zou3519@gmail.com>
parent da1e7311
...@@ -56,6 +56,7 @@ def _cold_start(vllm_runner): ...@@ -56,6 +56,7 @@ def _cold_start(vllm_runner):
def test_moe_startup(monkeypatch, vllm_runner, fresh_vllm_cache, mega_aot_artifact): def test_moe_startup(monkeypatch, vllm_runner, fresh_vllm_cache, mega_aot_artifact):
monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0") monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
monkeypatch.setenv("VLLM_USE_MEGA_AOT_ARTIFACT", mega_aot_artifact) monkeypatch.setenv("VLLM_USE_MEGA_AOT_ARTIFACT", mega_aot_artifact)
monkeypatch.setenv("VLLM_DEEP_GEMM_WARMUP", "skip")
# Cold start in a forked child (must fork before CUDA init). # Cold start in a forked child (must fork before CUDA init).
# This model has 32 identical transformer layers which produce # This model has 32 identical transformer layers which produce
...@@ -235,6 +236,7 @@ def _cold_start_model(vllm_runner, spec: ModelStartupSpec): ...@@ -235,6 +236,7 @@ def _cold_start_model(vllm_runner, spec: ModelStartupSpec):
@fork_new_process_for_each_test @fork_new_process_for_each_test
def test_model_startup(monkeypatch, vllm_runner, fresh_vllm_cache, spec): def test_model_startup(monkeypatch, vllm_runner, fresh_vllm_cache, spec):
monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0") monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
monkeypatch.setenv("VLLM_DEEP_GEMM_WARMUP", "skip")
# Cold start in a forked child (must fork before CUDA init). # Cold start in a forked child (must fork before CUDA init).
ctx = mp.get_context("fork") ctx = mp.get_context("fork")
......
...@@ -617,6 +617,24 @@ def test_inductor_asserts_enabled_in_debug(monkeypatch): ...@@ -617,6 +617,24 @@ def test_inductor_asserts_enabled_in_debug(monkeypatch):
assert config.inductor_compile_config.get("scalar_asserts") is True assert config.inductor_compile_config.get("scalar_asserts") is True
def test_get_inductor_factors_includes_configs():
"""Changing inductor or functorch config must change the cache key factors."""
from torch._functorch import config as functorch_config
from torch._inductor import config as inductor_config
from vllm.compilation.compiler_interface import get_inductor_factors
baseline = get_inductor_factors()
with inductor_config.patch("max_autotune", not inductor_config.max_autotune):
patched = get_inductor_factors()
assert baseline != patched, "inductor config change was not reflected"
with functorch_config.patch("donated_buffer", not functorch_config.donated_buffer):
patched = get_inductor_factors()
assert baseline != patched, "functorch config change was not reflected"
def test_inductor_asserts_user_override(monkeypatch): def test_inductor_asserts_user_override(monkeypatch):
"""Test that explicit inductor_compile_config overrides the """Test that explicit inductor_compile_config overrides the
debug-logging default.""" debug-logging default."""
......
...@@ -152,6 +152,17 @@ class AlwaysHitShapeEnv: ...@@ -152,6 +152,17 @@ class AlwaysHitShapeEnv:
return "" return ""
def _get_vllm_functorch_config() -> dict[str, Any]:
"""Return the functorch config overrides that vLLM applies at compile time.
Used by both set_functorch_config() and get_inductor_factors() to ensure
the compile-time config and cache key are always consistent."""
cfg: dict[str, Any] = {}
if not envs.VLLM_USE_MEGA_AOT_ARTIFACT:
cfg["bundled_autograd_cache"] = False
return cfg
def get_inductor_factors() -> list[Any]: def get_inductor_factors() -> list[Any]:
factors: list[Any] = [] factors: list[Any] = []
# summarize system state # summarize system state
...@@ -165,6 +176,13 @@ def get_inductor_factors() -> list[Any]: ...@@ -165,6 +176,13 @@ def get_inductor_factors() -> list[Any]:
torch_factors = torch_key() torch_factors = torch_key()
factors.append(torch_factors) factors.append(torch_factors)
from torch._functorch import config as functorch_config
from torch._inductor import config as inductor_config
factors.append(inductor_config.save_config_portable())
with functorch_config.patch(_get_vllm_functorch_config()):
factors.append(functorch_config.save_config_portable())
return factors return factors
...@@ -739,8 +757,8 @@ def set_inductor_config(config: dict[str, Any], compile_range: Range) -> None: ...@@ -739,8 +757,8 @@ def set_inductor_config(config: dict[str, Any], compile_range: Range) -> None:
def set_functorch_config() -> None: def set_functorch_config() -> None:
if not envs.VLLM_USE_MEGA_AOT_ARTIFACT: for k, v in _get_vllm_functorch_config().items():
torch._functorch.config.bundled_autograd_cache = False setattr(torch._functorch.config, k, v)
class EagerAdaptor(CompilerInterface): class EagerAdaptor(CompilerInterface):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment