Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
07ea184f
Unverified
Commit
07ea184f
authored
Jan 28, 2026
by
Angela Yi
Committed by
GitHub
Jan 29, 2026
Browse files
[ez] Delete more torch version checks <= 2.8 (#33288)
Signed-off-by:
angelayi
<
yiangela7@gmail.com
>
parent
a663b218
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
70 deletions
+22
-70
vllm/compilation/compiler_interface.py
vllm/compilation/compiler_interface.py
+22
-66
vllm/model_executor/layers/quantization/kernels/scaled_mm/pytorch.py
...executor/layers/quantization/kernels/scaled_mm/pytorch.py
+0
-4
No files found.
vllm/compilation/compiler_interface.py
View file @
07ea184f
...
@@ -375,38 +375,7 @@ class InductorAdaptor(CompilerInterface):
...
@@ -375,38 +375,7 @@ class InductorAdaptor(CompilerInterface):
# it to get the hash of the compiled graph directly.
# it to get the hash of the compiled graph directly.
hash_str
,
file_path
=
None
,
None
hash_str
,
file_path
=
None
,
None
from
torch._inductor.codecache
import
FxGraphCache
,
compiled_fx_graph_hash
from
torch._inductor.codecache
import
compiled_fx_graph_hash
if
torch
.
__version__
.
startswith
(
"2.5"
):
original_load
=
FxGraphCache
.
load
original_load_name
=
"torch._inductor.codecache.FxGraphCache.load"
def
hijack_load
(
*
args
:
Any
,
**
kwargs
:
Any
)
->
Any
:
inductor_compiled_graph
=
original_load
(
*
args
,
**
kwargs
)
nonlocal
file_path
compiled_fn
=
inductor_compiled_graph
.
current_callable
file_path
=
compiled_fn
.
__code__
.
co_filename
# noqa
if
(
not
file_path
.
startswith
(
self
.
base_cache_dir
)
and
compiled_fn
.
__closure__
is
not
None
):
# hooked in the align_inputs_from_check_idxs function
# in torch/_inductor/utils.py
for
cell
in
compiled_fn
.
__closure__
:
if
not
callable
(
cell
.
cell_contents
):
continue
if
cell
.
cell_contents
.
__code__
.
co_filename
.
startswith
(
self
.
base_cache_dir
):
# this is the real file path compiled from Inductor
file_path
=
cell
.
cell_contents
.
__code__
.
co_filename
break
return
inductor_compiled_graph
hijacked_compile_fx_inner
=
torch
.
_inductor
.
compile_fx
.
compile_fx_inner
# noqa
elif
torch
.
__version__
>=
"2.6"
:
# function renamed in 2.6
original_load_name
=
None
def
hijacked_compile_fx_inner
(
*
args
:
Any
,
**
kwargs
:
Any
)
->
Any
:
def
hijacked_compile_fx_inner
(
*
args
:
Any
,
**
kwargs
:
Any
)
->
Any
:
output
=
torch
.
_inductor
.
compile_fx
.
compile_fx_inner
(
*
args
,
**
kwargs
)
output
=
torch
.
_inductor
.
compile_fx
.
compile_fx_inner
(
*
args
,
**
kwargs
)
...
@@ -453,10 +422,6 @@ class InductorAdaptor(CompilerInterface):
...
@@ -453,10 +422,6 @@ class InductorAdaptor(CompilerInterface):
return
AlwaysHitShapeEnv
()
return
AlwaysHitShapeEnv
()
with
ExitStack
()
as
stack
:
with
ExitStack
()
as
stack
:
# hijack to get the compiled graph itself
if
original_load_name
is
not
None
:
stack
.
enter_context
(
patch
(
original_load_name
,
hijack_load
))
# for hijacking the hash of the compiled graph
# for hijacking the hash of the compiled graph
stack
.
enter_context
(
stack
.
enter_context
(
patch
(
patch
(
...
@@ -573,15 +538,6 @@ class InductorAdaptor(CompilerInterface):
...
@@ -573,15 +538,6 @@ class InductorAdaptor(CompilerInterface):
# Dynamo metrics context, see method for more details.
# Dynamo metrics context, see method for more details.
exit_stack
.
enter_context
(
self
.
metrics_context
())
exit_stack
.
enter_context
(
self
.
metrics_context
())
if
torch
.
__version__
.
startswith
(
"2.5"
):
inductor_compiled_graph
=
FxGraphCache
.
_lookup_graph
(
hash_str
,
example_inputs
,
True
,
False
)
assert
inductor_compiled_graph
is
not
None
,
(
"Inductor cache lookup failed. Please remove "
f
"the cache directory and try again."
# noqa
)
elif
torch
.
__version__
>=
"2.6"
:
from
torch._inductor.output_code
import
CompiledFxGraphConstantsWithGm
from
torch._inductor.output_code
import
CompiledFxGraphConstantsWithGm
constants
=
CompiledFxGraphConstantsWithGm
(
graph
)
constants
=
CompiledFxGraphConstantsWithGm
(
graph
)
...
...
vllm/model_executor/layers/quantization/kernels/scaled_mm/pytorch.py
View file @
07ea184f
...
@@ -3,7 +3,6 @@
...
@@ -3,7 +3,6 @@
import
torch
import
torch
from
packaging
import
version
from
vllm.config
import
CompilationMode
,
get_current_vllm_config
from
vllm.config
import
CompilationMode
,
get_current_vllm_config
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
...
@@ -98,9 +97,6 @@ class RowWiseTorchFP8ScaledMMLinearKernel(TorchFP8ScaledMMLinearKernel):
...
@@ -98,9 +97,6 @@ class RowWiseTorchFP8ScaledMMLinearKernel(TorchFP8ScaledMMLinearKernel):
if
compute_capability
is
not
None
and
compute_capability
<
94
:
if
compute_capability
is
not
None
and
compute_capability
<
94
:
return
False
,
"requires compute capability 94 and above."
return
False
,
"requires compute capability 94 and above."
if
not
version
.
parse
(
torch
.
__version__
)
>=
version
.
parse
(
"2.7"
):
return
False
,
"requires pytorch version >=2.7."
return
True
,
None
return
True
,
None
@
classmethod
@
classmethod
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment