"vscode:/vscode.git/clone" did not exist on "2888b4e5a7ec26b78bbbb6704277fc2cf58e9f6c"
Unverified Commit 12fd17eb authored by Angela Yi's avatar Angela Yi Committed by GitHub
Browse files

[compile] Initialize passes at VllmBackend init (#35216)


Signed-off-by: default avatarangelayi <yiangela7@gmail.com>
parent 37aadf62
......@@ -32,9 +32,9 @@ from vllm.platforms import current_platform
def test_compile_config_repr_succeeds():
# setup: VllmBackend mutates the config object
# Note: VllmBackend.__init__ already calls configure_post_pass()
config = VllmConfig()
backend = VllmBackend(config)
backend.configure_post_pass()
_ = VllmBackend(config)
# test that repr(config) succeeds
val = repr(config)
......
......@@ -836,8 +836,18 @@ class VllmBackend:
# in future we need PostGradPassManager.uuid() to be executed
# only at compile time.
self.inductor_config = deepcopy(self.compilation_config.inductor_compile_config)
# `torch.compile` is JIT compiled, so we don't need to
# do anything here
# Configure post-grad passes (including AllReduceFusionPass) during
# backend init rather than at torch.compile time, so that expensive
# one-time setup (e.g. FlashInfer workspace allocation) is not
# attributed to compilation latency.
start = time.time()
self.configure_post_pass()
logger.info_once(
"Post-grad pass configuration time: %.2f s",
time.time() - start,
scope="local",
)
def collect_standalone_compile_artifacts(
self,
......@@ -1118,7 +1128,6 @@ class VllmBackend:
assert not self._called, "VllmBackend can only be called once"
self.graph = graph
self.configure_post_pass()
if self.compilation_config.use_inductor_graph_partition:
# Let Inductor decide partitioning; avoid FX-level pre-splitting.
......
......@@ -380,6 +380,11 @@ def _support_torch_compile(
compilation_counter.num_models_seen += 1
self.compiled = False
# Skip if a parent class's @support_torch_compile already
# initialized the compile wrapper
if hasattr(self, "_compiled_callable"):
return
# Handled by monkeypatching `TorchCompileWithNoGuardsWrapper` into base class
TorchCompileWithNoGuardsWrapper.__init__(
self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment