[misc] Remove is_torch_equal_or_newer(2.4) cases (#32296)

Signed-off-by: angelayi <yiangela7@gmail.com>

[misc] Remove is_torch_equal_or_newer(2.4) cases (#32296)
Signed-off-by: angelayi <yiangela7@gmail.com>
79336380 · Angela Yi · GitHub · 6b176095 · 79336380 · 79336380
Unverified Commit 79336380 authored Jan 13, 2026 by Angela Yi Committed by GitHub Jan 13, 2026
4 changed files
--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@@ -28,7 +28,7 @@ from vllm.config.compilation import DynamicShapesType
 from vllm.logger import init_logger
 from vllm.sequence import IntermediateTensors
 from vllm.utils.import_utils import resolve_obj_by_qualname
-from vllm.utils.torch_utils import is_torch_equal_or_newer, supports_dynamo
+from vllm.utils.torch_utils import is_torch_equal_or_newer

 from .monitor import start_monitoring_torch_compile

@@ -312,7 +312,6 @@ def _support_torch_compile(
        self.do_not_compile = (
            self.compilation_config.mode
            in [CompilationMode.NONE, CompilationMode.STOCK_TORCH_COMPILE]
-            or not supports_dynamo()
            or _should_ignore_torch_compile(self.__class__)
            or not enable_compile
        )

--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -53,7 +53,6 @@ from vllm.utils.network_utils import get_distributed_init_method
 from vllm.utils.system_utils import suppress_stdout
 from vllm.utils.torch_utils import (
    direct_register_custom_op,
-    supports_custom_op,
 )


@@ -246,33 +245,32 @@ def patched_fused_scaled_matmul_reduce_scatter(
    )


-if supports_custom_op():
-    direct_register_custom_op(
+direct_register_custom_op(
    op_name="all_reduce",
    op_func=all_reduce,
    fake_impl=all_reduce_fake,
-    )
+)

-    direct_register_custom_op(
+direct_register_custom_op(
    op_name="reduce_scatter",
    op_func=reduce_scatter,
    fake_impl=reduce_scatter_fake,
-    )
+)

-    direct_register_custom_op(
+direct_register_custom_op(
    op_name="all_gather",
    op_func=all_gather,
    fake_impl=all_gather_fake,
-    )
+)

-    # TODO: Remove this once the pytorch fix
-    # (https://github.com/pytorch/pytorch/pull/165086) gets released,
-    # in either 2.9.1 or 2.10
-    direct_register_custom_op(
+# TODO: Remove this once the pytorch fix
+# (https://github.com/pytorch/pytorch/pull/165086) gets released,
+# in either 2.9.1 or 2.10
+direct_register_custom_op(
    op_name="patched_fused_scaled_matmul_reduce_scatter",
    op_func=patched_fused_scaled_matmul_reduce_scatter,
    fake_impl=patched_fused_scaled_matmul_reduce_scatter_fake,
-    )
+)


 class GroupCoordinator:

--- a/vllm/utils/torch_utils.py
+++ b/vllm/utils/torch_utils.py
@@ -704,13 +704,6 @@ def is_torch_equal(target: str) -> bool:
        return Version(importlib.metadata.version("torch")) == Version(target)


-# Using dynamo with vLLM doesn't really work well with PyTorch versions < 2.4.0.
-# In particular, the FakeScalarType is not supported for earlier versions of
-# PyTorch which breaks dynamo for any ops registered using ScalarType.
-def supports_dynamo() -> bool:
-    return is_torch_equal_or_newer("2.4.0")
-
-
 # Supports xccl with PyTorch versions >= 2.8.0.dev for XPU platform
 def supports_xccl() -> bool:
    return (
@@ -718,12 +711,6 @@ def supports_xccl() -> bool:
    )


-# Some backends use pytorch version < 2.4.0 which doesn't
-# support `torch.library.custom_op`.
-def supports_custom_op() -> bool:
-    return hasattr(torch.library, "custom_op")
-
-
 # create a library to hold the custom op
 vllm_lib = Library("vllm", "FRAGMENT")  # noqa

@@ -752,18 +739,6 @@ def direct_register_custom_op(
    library object. If you want to bind the operator to a different library,
    make sure the library object is alive when the operator is used.
    """
-    if not supports_custom_op():
-        from vllm.platforms import current_platform
-
-        assert not current_platform.is_cuda_alike(), (
-            "cuda platform needs torch>=2.4 to support custom op, "
-            "chances are you are using an old version of pytorch "
-            "or a custom build of pytorch. It is recommended to "
-            "use vLLM in a fresh new environment and let it install "
-            "the required dependencies."
-        )
-        return
-
    if mutates_args is None:
        mutates_args = []


--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -96,7 +96,6 @@ from vllm.utils.platform_utils import is_pin_memory_available
 from vllm.utils.torch_utils import (
    get_dtype_size,
    kv_cache_dtype_str_to_dtype,
-    supports_dynamo,
 )
 from vllm.v1.attention.backend import (
    AttentionBackend,
@@ -3944,7 +3943,6 @@ class GPUModelRunner(
        if (
            self.vllm_config.compilation_config.mode
            == CompilationMode.STOCK_TORCH_COMPILE
-            and supports_dynamo()
        ):
            backend = self.vllm_config.compilation_config.init_backend(self.vllm_config)
            compilation_counter.stock_torch_compile_count += 1