[ez] Delete more torch version checks <= 2.8 (#33288)

Signed-off-by: angelayi <yiangela7@gmail.com>

[ez] Delete more torch version checks <= 2.8 (#33288)
Signed-off-by: angelayi <yiangela7@gmail.com>
07ea184f · Angela Yi · GitHub · a663b218 · 07ea184f · 07ea184f
Unverified Commit 07ea184f authored Jan 28, 2026 by Angela Yi Committed by GitHub Jan 29, 2026
Showing with 22 additions and 70 deletions

vllm/compilation/compiler_interface.py vllm/compilation/compiler_interface.py +22 -66

vllm/model_executor/layers/quantization/kernels/scaled_mm/pytorch.py ...executor/layers/quantization/kernels/scaled_mm/pytorch.py +0 -4

No files found.
--- a/vllm/compilation/compiler_interface.py
+++ b/vllm/compilation/compiler_interface.py
@@ -375,38 +375,7 @@ class InductorAdaptor(CompilerInterface):
        # it to get the hash of the compiled graph directly.
        hash_str, file_path = None, None
-        from torch._inductor.codecache import FxGraphCache, compiled_fx_graph_hash
+        from torch._inductor.codecache import compiled_fx_graph_hash
-        if torch.__version__.startswith("2.5"):
-            original_load = FxGraphCache.load
-            original_load_name = "torch._inductor.codecache.FxGraphCache.load"
-            def hijack_load(*args: Any, **kwargs: Any) -> Any:
-                inductor_compiled_graph = original_load(*args, **kwargs)
-                nonlocal file_path
-                compiled_fn = inductor_compiled_graph.current_callable
-                file_path = compiled_fn.__code__.co_filename  # noqa
-                if (
-                    not file_path.startswith(self.base_cache_dir)
-                    and compiled_fn.__closure__ is not None
-                ):
-                    # hooked in the align_inputs_from_check_idxs function
-                    # in torch/_inductor/utils.py
-                    for cell in compiled_fn.__closure__:
-                        if not callable(cell.cell_contents):
-                            continue
-                        if cell.cell_contents.__code__.co_filename.startswith(
-                            self.base_cache_dir
-                        ):
-                            # this is the real file path compiled from Inductor
-                            file_path = cell.cell_contents.__code__.co_filename
-                            break
-                return inductor_compiled_graph
-            hijacked_compile_fx_inner = torch._inductor.compile_fx.compile_fx_inner  # noqa
-        elif torch.__version__ >= "2.6":
-            # function renamed in 2.6
-            original_load_name = None
        def hijacked_compile_fx_inner(*args: Any, **kwargs: Any) -> Any:
            output = torch._inductor.compile_fx.compile_fx_inner(*args, **kwargs)
@@ -453,10 +422,6 @@ class InductorAdaptor(CompilerInterface):
            return AlwaysHitShapeEnv()
        with ExitStack() as stack:
-            # hijack to get the compiled graph itself
-            if original_load_name is not None:
-                stack.enter_context(patch(original_load_name, hijack_load))
            # for hijacking the hash of the compiled graph
            stack.enter_context(
                patch(
@@ -573,15 +538,6 @@ class InductorAdaptor(CompilerInterface):
            # Dynamo metrics context, see method for more details.
            exit_stack.enter_context(self.metrics_context())
-            if torch.__version__.startswith("2.5"):
-                inductor_compiled_graph = FxGraphCache._lookup_graph(
-                    hash_str, example_inputs, True, False
-                )
-                assert inductor_compiled_graph is not None, (
-                    "Inductor cache lookup failed. Please remove "
-                    f"the cache directory and try again."  # noqa
-                )
-            elif torch.__version__ >= "2.6":
            from torch._inductor.output_code import CompiledFxGraphConstantsWithGm
            constants = CompiledFxGraphConstantsWithGm(graph)

--- a/vllm/model_executor/layers/quantization/kernels/scaled_mm/pytorch.py
+++ b/vllm/model_executor/layers/quantization/kernels/scaled_mm/pytorch.py
@@ -3,7 +3,6 @@
 import torch
-from packaging import version
 from vllm.config import CompilationMode, get_current_vllm_config
 from vllm.platforms import current_platform
@@ -98,9 +97,6 @@ class RowWiseTorchFP8ScaledMMLinearKernel(TorchFP8ScaledMMLinearKernel):
        if compute_capability is not None and compute_capability < 94:
            return False, "requires compute capability 94 and above."
-        if not version.parse(torch.__version__) >= version.parse("2.7"):
-            return False, "requires pytorch version >=2.7."
        return True, None
    @classmethod