[torch.compile] Cleanup compilation tests and custom passes, add debug utils,...

[torch.compile] Cleanup compilation tests and custom passes, add debug utils, fix DCE bug (#23091), fix test (#24376), and prep for custom op matching (#24604) (#24542) Signed-off-by: Luka Govedič <lgovedic@redhat.com> Signed-off-by: luka <lgovedic@redhat.com> Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com>

[torch.compile] Cleanup compilation tests and custom passes, add debug utils,...
[torch.compile] Cleanup compilation tests and custom passes, add debug utils, fix DCE bug (#23091), fix test (#24376), and prep for custom op matching (#24604) (#24542) Signed-off-by: Luka Govedič <lgovedic@redhat.com> Signed-off-by: luka <lgovedic@redhat.com> Signed-off-by: Luka Govedič <ProExpertProg@users.noreply.github.com>
d5e0fca2 · Luka Govedič · GitHub · 8d0ee5a5 · d5e0fca2 · d5e0fca2
Unverified Commit d5e0fca2 authored Sep 22, 2025 by Luka Govedič Committed by GitHub Sep 22, 2025
Showing with 69 additions and 7 deletions

vllm/config/__init__.py vllm/config/__init__.py +3 -4

vllm/config/compilation.py vllm/config/compilation.py +46 -2

vllm/envs.py vllm/envs.py +6 -0

vllm/utils/__init__.py vllm/utils/__init__.py +14 -1

No files found.
--- a/vllm/config/__init__.py
+++ b/vllm/config/__init__.py
@@ -905,10 +905,9 @@ def set_current_vllm_config(vllm_config: VllmConfig,
    except Exception:
        raise
    else:
-        logger.debug("enabled custom ops: %s",
-                     vllm_config.compilation_config.enabled_custom_ops)
-        logger.debug("disabled custom ops: %s",
-                     vllm_config.compilation_config.disabled_custom_ops)
+        if check_compile:
+            vllm_config.compilation_config.custom_op_log_check()
+
        if check_compile and \
            vllm_config.compilation_config.level == CompilationLevel.PIECEWISE \
            and compilation_counter.num_models_seen == num_models_seen:

--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -487,6 +487,12 @@ class CompilationConfig:
                             "supported with torch>=2.9.0.dev. Set "
                             "use_inductor_graph_partition=False instead.")

+        for op in self.custom_ops:
+            if op[0] not in {'+', '-'} and op not in {'all', 'none'}:
+                raise ValueError(f"Invalid syntax '{op}' for custom op, "
+                                 "must be 'all', 'none', '+op' or '-op' "
+                                 "(where 'op' is the registered op name)")
+
    def init_backend(self, vllm_config: "VllmConfig") -> Union[str, Callable]:
        if self.level == CompilationLevel.NO_COMPILATION:
            raise ValueError("No compilation level is set.")
@@ -532,8 +538,8 @@ class CompilationConfig:
            for x in self.compile_sizes:
                if isinstance(x, str):
                    assert x == "cudagraph_capture_sizes", \
-                    "Unrecognized size type in compile_sizes, " \
-                    f"expect 'cudagraph_capture_sizes', got {x}"
+                        "Unrecognized size type in compile_sizes, " \
+                        f"expect 'cudagraph_capture_sizes', got {x}"
                    computed_compile_sizes.extend(self.cudagraph_capture_sizes)
                else:
                    assert isinstance(x, int)
@@ -628,3 +634,41 @@ class CompilationConfig:

        return use_fx_graph_piecewise_compilation or \
            use_inductor_piecewise_compilation
+
+    def custom_op_log_check(self):
+        """
+        This method logs the enabled/disabled custom ops and checks that the
+        passed custom_ops field only contains relevant ops.
+        It is called at the end of set_current_vllm_config,
+        after the custom ops have been instantiated.
+        """
+
+        if len(self.enabled_custom_ops) + len(self.disabled_custom_ops) == 0:
+            logger.debug("No custom ops found in model.")
+            return
+
+        logger.debug("enabled custom ops: %s", self.enabled_custom_ops)
+        logger.debug("disabled custom ops: %s", self.disabled_custom_ops)
+
+        all_ops_in_model = (self.enabled_custom_ops | self.disabled_custom_ops)
+        for op in self.custom_ops:
+            if op in {"all", "none"}:
+                continue
+
+            assert op[0] in {'+', '-'}, "Invalid custom op syntax " \
+                                        "(should be checked during init)"
+
+            # check if op name exists in model
+            op_name = op[1:]
+            if op_name not in all_ops_in_model:
+                from vllm.model_executor.custom_op import CustomOp
+
+                # Does op exist at all or is it just not present in this model?
+                # Note: Only imported op classes appear in the registry.
+                missing_str = "doesn't exist (or wasn't imported/registered)" \
+                    if op_name not in CustomOp.op_registry \
+                    else "not present in model"
+
+                enable_str = "enabling" if op[0] == '+' else "disabling"
+                logger.warning_once("Op '%s' %s, %s with '%s' has no effect",
+                                    op_name, missing_str, enable_str, op)
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -190,6 +190,7 @@ if TYPE_CHECKING:
    VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
    VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True
    VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME: str = "VLLM_OBJECT_STORAGE_SHM_BUFFER"
+    VLLM_PATTERN_MATCH_DEBUG: Optional[str] = None


 def get_default_cache_root():
@@ -442,6 +443,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_USE_STANDALONE_COMPILE":
    lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "0") == "1",

+    # Debug pattern matching inside custom passes.
+    # Should be set to the fx.Node name (e.g. 'getitem_34' or 'scaled_mm_3').
+    "VLLM_PATTERN_MATCH_DEBUG":
+    lambda: os.environ.get("VLLM_PATTERN_MATCH_DEBUG", None),
+
    # local rank of the process in the distributed setting, used to determine
    # the GPU device id
    "LOCAL_RANK":

--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -3392,7 +3392,7 @@ def length_from_prompt_token_ids_or_embeds(
    prompt_token_ids: Optional[list[int]],
    prompt_embeds: Optional[torch.Tensor],
 ) -> int:
-    """Calculate the request length (in number of tokens) give either 
+    """Calculate the request length (in number of tokens) give either
    prompt_token_ids or prompt_embeds.
    """
    prompt_token_len = None if prompt_token_ids is None else len(
@@ -3413,3 +3413,16 @@ def length_from_prompt_token_ids_or_embeds(
                f" prompt_token_ids={prompt_token_len}"
                f" prompt_embeds={prompt_embeds_len}")
        return prompt_token_len
+
+
+@contextlib.contextmanager
+def set_env_var(key, value):
+    old = os.environ.get(key)
+    os.environ[key] = value
+    try:
+        yield
+    finally:
+        if old is None:
+            del os.environ[key]
+        else:
+            os.environ[key] = old