Use lazy graph module during split_module to defer recompile() (#37609)

Signed-off-by: angelayi <yiangela7@gmail.com>

Use lazy graph module during split_module to defer recompile() (#37609)
Signed-off-by: angelayi <yiangela7@gmail.com>
aceadb5e · Angela Yi · GitHub · ec228061 · aceadb5e
Unverified Commit aceadb5e authored Mar 23, 2026 by Angela Yi Committed by GitHub Mar 23, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 3 deletions

vllm/compilation/backends.py vllm/compilation/backends.py +8 -3

No files found.
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -20,6 +20,7 @@ import torch
 import torch.fx as fx
 from torch._dynamo.utils import dynamo_timed
 from torch._logging._internal import trace_structured
+from torch.fx._lazy_graph_module import _use_lazy_graph_module

 import vllm.envs as envs
 from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig
@@ -573,9 +574,13 @@ def split_graph(
    # otherwise pytorch might reorder the nodes and
    # the semantics of the graph will change when we
    # have mutations in the graph
-    split_gm = torch.fx.passes.split_module.split_module(
-        graph, None, lambda node: node_to_subgraph_id[node], keep_original_order=True
-    )
+    with _use_lazy_graph_module(True):
+        split_gm = torch.fx.passes.split_module.split_module(
+            graph,
+            None,
+            lambda node: node_to_subgraph_id[node],
+            keep_original_order=True,
+        )

    outputs = []