Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
79336380
Unverified
Commit
79336380
authored
Jan 13, 2026
by
Angela Yi
Committed by
GitHub
Jan 13, 2026
Browse files
[misc] Remove is_torch_equal_or_newer(2.4) cases (#32296)
Signed-off-by:
angelayi
<
yiangela7@gmail.com
>
parent
6b176095
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
24 additions
and
54 deletions
+24
-54
vllm/compilation/decorators.py
vllm/compilation/decorators.py
+1
-2
vllm/distributed/parallel_state.py
vllm/distributed/parallel_state.py
+23
-25
vllm/utils/torch_utils.py
vllm/utils/torch_utils.py
+0
-25
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+0
-2
No files found.
vllm/compilation/decorators.py
View file @
79336380
...
@@ -28,7 +28,7 @@ from vllm.config.compilation import DynamicShapesType
...
@@ -28,7 +28,7 @@ from vllm.config.compilation import DynamicShapesType
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.utils.import_utils
import
resolve_obj_by_qualname
from
vllm.utils.import_utils
import
resolve_obj_by_qualname
from
vllm.utils.torch_utils
import
is_torch_equal_or_newer
,
supports_dynamo
from
vllm.utils.torch_utils
import
is_torch_equal_or_newer
from
.monitor
import
start_monitoring_torch_compile
from
.monitor
import
start_monitoring_torch_compile
...
@@ -312,7 +312,6 @@ def _support_torch_compile(
...
@@ -312,7 +312,6 @@ def _support_torch_compile(
self
.
do_not_compile
=
(
self
.
do_not_compile
=
(
self
.
compilation_config
.
mode
self
.
compilation_config
.
mode
in
[
CompilationMode
.
NONE
,
CompilationMode
.
STOCK_TORCH_COMPILE
]
in
[
CompilationMode
.
NONE
,
CompilationMode
.
STOCK_TORCH_COMPILE
]
or
not
supports_dynamo
()
or
_should_ignore_torch_compile
(
self
.
__class__
)
or
_should_ignore_torch_compile
(
self
.
__class__
)
or
not
enable_compile
or
not
enable_compile
)
)
...
...
vllm/distributed/parallel_state.py
View file @
79336380
...
@@ -53,7 +53,6 @@ from vllm.utils.network_utils import get_distributed_init_method
...
@@ -53,7 +53,6 @@ from vllm.utils.network_utils import get_distributed_init_method
from
vllm.utils.system_utils
import
suppress_stdout
from
vllm.utils.system_utils
import
suppress_stdout
from
vllm.utils.torch_utils
import
(
from
vllm.utils.torch_utils
import
(
direct_register_custom_op
,
direct_register_custom_op
,
supports_custom_op
,
)
)
...
@@ -246,33 +245,32 @@ def patched_fused_scaled_matmul_reduce_scatter(
...
@@ -246,33 +245,32 @@ def patched_fused_scaled_matmul_reduce_scatter(
)
)
if
supports_custom_op
():
direct_register_custom_op
(
direct_register_custom_op
(
op_name
=
"all_reduce"
,
op_name
=
"all_reduce"
,
op_func
=
all_reduce
,
op_func
=
all_reduce
,
fake_impl
=
all_reduce_fake
,
fake_impl
=
all_reduce_fake
,
)
)
direct_register_custom_op
(
direct_register_custom_op
(
op_name
=
"reduce_scatter"
,
op_name
=
"reduce_scatter"
,
op_func
=
reduce_scatter
,
op_func
=
reduce_scatter
,
fake_impl
=
reduce_scatter_fake
,
fake_impl
=
reduce_scatter_fake
,
)
)
direct_register_custom_op
(
direct_register_custom_op
(
op_name
=
"all_gather"
,
op_name
=
"all_gather"
,
op_func
=
all_gather
,
op_func
=
all_gather
,
fake_impl
=
all_gather_fake
,
fake_impl
=
all_gather_fake
,
)
)
# TODO: Remove this once the pytorch fix
# TODO: Remove this once the pytorch fix
# (https://github.com/pytorch/pytorch/pull/165086) gets released,
# (https://github.com/pytorch/pytorch/pull/165086) gets released,
# in either 2.9.1 or 2.10
# in either 2.9.1 or 2.10
direct_register_custom_op
(
direct_register_custom_op
(
op_name
=
"patched_fused_scaled_matmul_reduce_scatter"
,
op_name
=
"patched_fused_scaled_matmul_reduce_scatter"
,
op_func
=
patched_fused_scaled_matmul_reduce_scatter
,
op_func
=
patched_fused_scaled_matmul_reduce_scatter
,
fake_impl
=
patched_fused_scaled_matmul_reduce_scatter_fake
,
fake_impl
=
patched_fused_scaled_matmul_reduce_scatter_fake
,
)
)
class
GroupCoordinator
:
class
GroupCoordinator
:
...
...
vllm/utils/torch_utils.py
View file @
79336380
...
@@ -704,13 +704,6 @@ def is_torch_equal(target: str) -> bool:
...
@@ -704,13 +704,6 @@ def is_torch_equal(target: str) -> bool:
return
Version
(
importlib
.
metadata
.
version
(
"torch"
))
==
Version
(
target
)
return
Version
(
importlib
.
metadata
.
version
(
"torch"
))
==
Version
(
target
)
# Using dynamo with vLLM doesn't really work well with PyTorch versions < 2.4.0.
# In particular, the FakeScalarType is not supported for earlier versions of
# PyTorch which breaks dynamo for any ops registered using ScalarType.
def
supports_dynamo
()
->
bool
:
return
is_torch_equal_or_newer
(
"2.4.0"
)
# Supports xccl with PyTorch versions >= 2.8.0.dev for XPU platform
# Supports xccl with PyTorch versions >= 2.8.0.dev for XPU platform
def
supports_xccl
()
->
bool
:
def
supports_xccl
()
->
bool
:
return
(
return
(
...
@@ -718,12 +711,6 @@ def supports_xccl() -> bool:
...
@@ -718,12 +711,6 @@ def supports_xccl() -> bool:
)
)
# Some backends use pytorch version < 2.4.0 which doesn't
# support `torch.library.custom_op`.
def
supports_custom_op
()
->
bool
:
return
hasattr
(
torch
.
library
,
"custom_op"
)
# create a library to hold the custom op
# create a library to hold the custom op
vllm_lib
=
Library
(
"vllm"
,
"FRAGMENT"
)
# noqa
vllm_lib
=
Library
(
"vllm"
,
"FRAGMENT"
)
# noqa
...
@@ -752,18 +739,6 @@ def direct_register_custom_op(
...
@@ -752,18 +739,6 @@ def direct_register_custom_op(
library object. If you want to bind the operator to a different library,
library object. If you want to bind the operator to a different library,
make sure the library object is alive when the operator is used.
make sure the library object is alive when the operator is used.
"""
"""
if
not
supports_custom_op
():
from
vllm.platforms
import
current_platform
assert
not
current_platform
.
is_cuda_alike
(),
(
"cuda platform needs torch>=2.4 to support custom op, "
"chances are you are using an old version of pytorch "
"or a custom build of pytorch. It is recommended to "
"use vLLM in a fresh new environment and let it install "
"the required dependencies."
)
return
if
mutates_args
is
None
:
if
mutates_args
is
None
:
mutates_args
=
[]
mutates_args
=
[]
...
...
vllm/v1/worker/gpu_model_runner.py
View file @
79336380
...
@@ -96,7 +96,6 @@ from vllm.utils.platform_utils import is_pin_memory_available
...
@@ -96,7 +96,6 @@ from vllm.utils.platform_utils import is_pin_memory_available
from
vllm.utils.torch_utils
import
(
from
vllm.utils.torch_utils
import
(
get_dtype_size
,
get_dtype_size
,
kv_cache_dtype_str_to_dtype
,
kv_cache_dtype_str_to_dtype
,
supports_dynamo
,
)
)
from
vllm.v1.attention.backend
import
(
from
vllm.v1.attention.backend
import
(
AttentionBackend
,
AttentionBackend
,
...
@@ -3944,7 +3943,6 @@ class GPUModelRunner(
...
@@ -3944,7 +3943,6 @@ class GPUModelRunner(
if
(
if
(
self
.
vllm_config
.
compilation_config
.
mode
self
.
vllm_config
.
compilation_config
.
mode
==
CompilationMode
.
STOCK_TORCH_COMPILE
==
CompilationMode
.
STOCK_TORCH_COMPILE
and
supports_dynamo
()
):
):
backend
=
self
.
vllm_config
.
compilation_config
.
init_backend
(
self
.
vllm_config
)
backend
=
self
.
vllm_config
.
compilation_config
.
init_backend
(
self
.
vllm_config
)
compilation_counter
.
stock_torch_compile_count
+=
1
compilation_counter
.
stock_torch_compile_count
+=
1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment