Unverified Commit 79336380 authored by Angela Yi's avatar Angela Yi Committed by GitHub
Browse files

[misc] Remove is_torch_equal_or_newer(2.4) cases (#32296)


Signed-off-by: default avatarangelayi <yiangela7@gmail.com>
parent 6b176095
...@@ -28,7 +28,7 @@ from vllm.config.compilation import DynamicShapesType ...@@ -28,7 +28,7 @@ from vllm.config.compilation import DynamicShapesType
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.sequence import IntermediateTensors from vllm.sequence import IntermediateTensors
from vllm.utils.import_utils import resolve_obj_by_qualname from vllm.utils.import_utils import resolve_obj_by_qualname
from vllm.utils.torch_utils import is_torch_equal_or_newer, supports_dynamo from vllm.utils.torch_utils import is_torch_equal_or_newer
from .monitor import start_monitoring_torch_compile from .monitor import start_monitoring_torch_compile
...@@ -312,7 +312,6 @@ def _support_torch_compile( ...@@ -312,7 +312,6 @@ def _support_torch_compile(
self.do_not_compile = ( self.do_not_compile = (
self.compilation_config.mode self.compilation_config.mode
in [CompilationMode.NONE, CompilationMode.STOCK_TORCH_COMPILE] in [CompilationMode.NONE, CompilationMode.STOCK_TORCH_COMPILE]
or not supports_dynamo()
or _should_ignore_torch_compile(self.__class__) or _should_ignore_torch_compile(self.__class__)
or not enable_compile or not enable_compile
) )
......
...@@ -53,7 +53,6 @@ from vllm.utils.network_utils import get_distributed_init_method ...@@ -53,7 +53,6 @@ from vllm.utils.network_utils import get_distributed_init_method
from vllm.utils.system_utils import suppress_stdout from vllm.utils.system_utils import suppress_stdout
from vllm.utils.torch_utils import ( from vllm.utils.torch_utils import (
direct_register_custom_op, direct_register_custom_op,
supports_custom_op,
) )
...@@ -246,33 +245,32 @@ def patched_fused_scaled_matmul_reduce_scatter( ...@@ -246,33 +245,32 @@ def patched_fused_scaled_matmul_reduce_scatter(
) )
if supports_custom_op(): direct_register_custom_op(
direct_register_custom_op(
op_name="all_reduce", op_name="all_reduce",
op_func=all_reduce, op_func=all_reduce,
fake_impl=all_reduce_fake, fake_impl=all_reduce_fake,
) )
direct_register_custom_op( direct_register_custom_op(
op_name="reduce_scatter", op_name="reduce_scatter",
op_func=reduce_scatter, op_func=reduce_scatter,
fake_impl=reduce_scatter_fake, fake_impl=reduce_scatter_fake,
) )
direct_register_custom_op( direct_register_custom_op(
op_name="all_gather", op_name="all_gather",
op_func=all_gather, op_func=all_gather,
fake_impl=all_gather_fake, fake_impl=all_gather_fake,
) )
# TODO: Remove this once the pytorch fix # TODO: Remove this once the pytorch fix
# (https://github.com/pytorch/pytorch/pull/165086) gets released, # (https://github.com/pytorch/pytorch/pull/165086) gets released,
# in either 2.9.1 or 2.10 # in either 2.9.1 or 2.10
direct_register_custom_op( direct_register_custom_op(
op_name="patched_fused_scaled_matmul_reduce_scatter", op_name="patched_fused_scaled_matmul_reduce_scatter",
op_func=patched_fused_scaled_matmul_reduce_scatter, op_func=patched_fused_scaled_matmul_reduce_scatter,
fake_impl=patched_fused_scaled_matmul_reduce_scatter_fake, fake_impl=patched_fused_scaled_matmul_reduce_scatter_fake,
) )
class GroupCoordinator: class GroupCoordinator:
......
...@@ -704,13 +704,6 @@ def is_torch_equal(target: str) -> bool: ...@@ -704,13 +704,6 @@ def is_torch_equal(target: str) -> bool:
return Version(importlib.metadata.version("torch")) == Version(target) return Version(importlib.metadata.version("torch")) == Version(target)
# Using dynamo with vLLM doesn't really work well with PyTorch versions < 2.4.0.
# In particular, the FakeScalarType is not supported for earlier versions of
# PyTorch which breaks dynamo for any ops registered using ScalarType.
def supports_dynamo() -> bool:
return is_torch_equal_or_newer("2.4.0")
# Supports xccl with PyTorch versions >= 2.8.0.dev for XPU platform # Supports xccl with PyTorch versions >= 2.8.0.dev for XPU platform
def supports_xccl() -> bool: def supports_xccl() -> bool:
return ( return (
...@@ -718,12 +711,6 @@ def supports_xccl() -> bool: ...@@ -718,12 +711,6 @@ def supports_xccl() -> bool:
) )
# Some backends use pytorch version < 2.4.0 which doesn't
# support `torch.library.custom_op`.
def supports_custom_op() -> bool:
return hasattr(torch.library, "custom_op")
# create a library to hold the custom op # create a library to hold the custom op
vllm_lib = Library("vllm", "FRAGMENT") # noqa vllm_lib = Library("vllm", "FRAGMENT") # noqa
...@@ -752,18 +739,6 @@ def direct_register_custom_op( ...@@ -752,18 +739,6 @@ def direct_register_custom_op(
library object. If you want to bind the operator to a different library, library object. If you want to bind the operator to a different library,
make sure the library object is alive when the operator is used. make sure the library object is alive when the operator is used.
""" """
if not supports_custom_op():
from vllm.platforms import current_platform
assert not current_platform.is_cuda_alike(), (
"cuda platform needs torch>=2.4 to support custom op, "
"chances are you are using an old version of pytorch "
"or a custom build of pytorch. It is recommended to "
"use vLLM in a fresh new environment and let it install "
"the required dependencies."
)
return
if mutates_args is None: if mutates_args is None:
mutates_args = [] mutates_args = []
......
...@@ -96,7 +96,6 @@ from vllm.utils.platform_utils import is_pin_memory_available ...@@ -96,7 +96,6 @@ from vllm.utils.platform_utils import is_pin_memory_available
from vllm.utils.torch_utils import ( from vllm.utils.torch_utils import (
get_dtype_size, get_dtype_size,
kv_cache_dtype_str_to_dtype, kv_cache_dtype_str_to_dtype,
supports_dynamo,
) )
from vllm.v1.attention.backend import ( from vllm.v1.attention.backend import (
AttentionBackend, AttentionBackend,
...@@ -3944,7 +3943,6 @@ class GPUModelRunner( ...@@ -3944,7 +3943,6 @@ class GPUModelRunner(
if ( if (
self.vllm_config.compilation_config.mode self.vllm_config.compilation_config.mode
== CompilationMode.STOCK_TORCH_COMPILE == CompilationMode.STOCK_TORCH_COMPILE
and supports_dynamo()
): ):
backend = self.vllm_config.compilation_config.init_backend(self.vllm_config) backend = self.vllm_config.compilation_config.init_backend(self.vllm_config)
compilation_counter.stock_torch_compile_count += 1 compilation_counter.stock_torch_compile_count += 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment