Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
aafabaa0
Unverified
Commit
aafabaa0
authored
Jun 27, 2025
by
Luka Govedič
Committed by
GitHub
Jun 27, 2025
Browse files
[Fix][torch.compile] Enable custom ops by default when Inductor off (#20102)
Signed-off-by:
luka
<
luka@neuralmagic.com
>
parent
94a55c76
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
41 additions
and
43 deletions
+41
-43
tests/model_executor/test_enabled_custom_ops.py
tests/model_executor/test_enabled_custom_ops.py
+26
-19
vllm/config.py
vllm/config.py
+9
-18
vllm/model_executor/custom_op.py
vllm/model_executor/custom_op.py
+6
-6
No files found.
tests/model_executor/test_enabled_custom_ops.py
View file @
aafabaa0
...
@@ -28,42 +28,49 @@ class Relu3(ReLUSquaredActivation):
...
@@ -28,42 +28,49 @@ class Relu3(ReLUSquaredActivation):
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"env, torch_level, ops_enabled, default_on"
,
"env, torch_level,
use_inductor,
ops_enabled, default_on"
,
[
[
# Default values based on compile level
# Default values based on compile level
(
""
,
0
,
[
True
]
*
4
,
True
),
# - All by default (no Inductor compilation)
(
""
,
1
,
[
True
]
*
4
,
True
),
(
""
,
0
,
False
,
[
True
]
*
4
,
True
),
(
""
,
2
,
[
True
]
*
4
,
True
),
# All by default
(
""
,
1
,
True
,
[
True
]
*
4
,
True
),
(
""
,
3
,
[
False
]
*
4
,
False
),
(
""
,
2
,
False
,
[
True
]
*
4
,
True
),
(
""
,
4
,
[
False
]
*
4
,
False
),
# None by default
# - None by default (with Inductor)
(
""
,
3
,
True
,
[
False
]
*
4
,
False
),
(
""
,
4
,
True
,
[
False
]
*
4
,
False
),
# - All by default (without Inductor)
(
""
,
3
,
False
,
[
True
]
*
4
,
True
),
(
""
,
4
,
False
,
[
True
]
*
4
,
True
),
# Explicitly enabling/disabling
# Explicitly enabling/disabling
#
#
# Default: all
# Default: all
#
#
# All but SiluAndMul
# All but SiluAndMul
(
"+rms_norm,-silu_and_mul"
,
0
,
[
1
,
0
,
1
,
1
],
True
),
(
"+rms_norm,-silu_and_mul"
,
0
,
True
,
[
1
,
0
,
1
,
1
],
True
),
# Only ReLU3
# Only ReLU3
(
"none,-rms_norm,+relu3"
,
0
,
[
0
,
0
,
0
,
1
],
False
),
(
"none,-rms_norm,+relu3"
,
1
,
False
,
[
0
,
0
,
0
,
1
],
False
),
# All but SiluAndMul
# All but SiluAndMul
(
"all,-silu_and_mul"
,
1
,
[
1
,
0
,
1
,
1
],
True
),
(
"all,-silu_and_mul"
,
2
,
True
,
[
1
,
0
,
1
,
1
],
True
),
# All but ReLU3 (even if ReLU2 is on)
# All but ReLU3 (even if ReLU2 is on)
(
"-relu3,relu2"
,
1
,
[
1
,
1
,
1
,
0
],
True
),
(
"-relu3,relu2"
,
3
,
False
,
[
1
,
1
,
1
,
0
],
True
),
#
GeluAndMul
and SiluAndMul
#
RMSNorm
and SiluAndMul
(
"none,-relu3,+
gelu_and_mul
,+silu_and_mul"
,
2
,
[
0
,
1
,
1
,
0
],
False
),
(
"none,-relu3,+
rms_norm
,+silu_and_mul"
,
4
,
False
,
[
1
,
1
,
0
,
0
],
False
),
# All but RMSNorm
# All but RMSNorm
(
"-rms_norm"
,
2
,
[
0
,
1
,
1
,
1
],
True
),
(
"-rms_norm"
,
3
,
False
,
[
0
,
1
,
1
,
1
],
True
),
#
#
# Default: none
# Default: none
#
#
# Only ReLU3
# Only ReLU3
(
"-silu_and_mul,+relu3"
,
3
,
[
0
,
0
,
0
,
1
],
False
),
(
"-silu_and_mul,+relu3"
,
3
,
True
,
[
0
,
0
,
0
,
1
],
False
),
# All but RMSNorm
# All but RMSNorm
(
"all,-rms_norm"
,
4
,
[
0
,
1
,
1
,
1
],
True
),
(
"all,-rms_norm"
,
4
,
True
,
[
0
,
1
,
1
,
1
],
True
),
])
])
def
test_enabled_ops
(
env
:
str
,
torch_level
:
int
,
ops_enabled
:
list
[
int
],
def
test_enabled_ops
(
env
:
str
,
torch_level
:
int
,
use_inductor
:
bool
,
default_on
:
bool
):
ops_enabled
:
list
[
int
],
default_on
:
bool
):
vllm_config
=
VllmConfig
(
compilation_config
=
CompilationConfig
(
vllm_config
=
VllmConfig
(
level
=
torch_level
,
custom_ops
=
env
.
split
(
","
)))
compilation_config
=
CompilationConfig
(
use_inductor
=
bool
(
use_inductor
),
level
=
torch_level
,
custom_ops
=
env
.
split
(
","
)))
with
set_current_vllm_config
(
vllm_config
):
with
set_current_vllm_config
(
vllm_config
):
assert
CustomOp
.
default_on
()
==
default_on
assert
CustomOp
.
default_on
()
==
default_on
...
...
vllm/config.py
View file @
aafabaa0
...
@@ -3994,7 +3994,8 @@ class CompilationConfig:
...
@@ -3994,7 +3994,8 @@ class CompilationConfig:
- 'none,+op1,+op2' to enable only op1 and op2
- 'none,+op1,+op2' to enable only op1 and op2
By default, all custom ops are enabled when running without Inductor and
By default, all custom ops are enabled when running without Inductor and
disabled when running with Inductor (compile_level >= Inductor)."""
disabled when running with Inductor: level>=PIECEWISE and use_inductor=True.
Inductor generates (fused) Triton kernels for disabled custom ops."""
splitting_ops
:
list
[
str
]
=
field
(
default_factory
=
list
)
splitting_ops
:
list
[
str
]
=
field
(
default_factory
=
list
)
"""A list of ops to split the full graph into subgraphs, used in piecewise
"""A list of ops to split the full graph into subgraphs, used in piecewise
compilation."""
compilation."""
...
@@ -4003,10 +4004,13 @@ class CompilationConfig:
...
@@ -4003,10 +4004,13 @@ class CompilationConfig:
use_inductor
:
bool
=
True
use_inductor
:
bool
=
True
"""Whether to use inductor compilation:
"""Whether to use inductor compilation:
- False: inductor compilation is not used. graph runs in eager.
- False: inductor compilation is not used. graph runs in eager
- True: inductor compilation is used. one graph for symbolic shape
(custom_ops enabled by default).
is compiled. In addition, compile for compile_sizes,
- True: inductor compilation is used (custom_ops disabled by default).
using configurations in inductor_compile_config."""
One graph for symbolic shape and one graph per size in compile_sizes
are compiled using configurations in inductor_compile_config.
This setting is ignored if level<PIECEWISE."""
compile_sizes
:
Optional
[
list
[
Union
[
int
,
str
]]]
=
None
compile_sizes
:
Optional
[
list
[
Union
[
int
,
str
]]]
=
None
"""Sizes to compile for inductor. In addition
"""Sizes to compile for inductor. In addition
to integers, it also supports "cudagraph_capture_sizes" to
to integers, it also supports "cudagraph_capture_sizes" to
...
@@ -4537,19 +4541,6 @@ class VllmConfig:
...
@@ -4537,19 +4541,6 @@ class VllmConfig:
self
.
compilation_config
.
level
=
CompilationLevel
.
PIECEWISE
self
.
compilation_config
.
level
=
CompilationLevel
.
PIECEWISE
self
.
compilation_config
.
set_splitting_ops_for_v1
()
self
.
compilation_config
.
set_splitting_ops_for_v1
()
# The behavior of custom ops with inductor depends on the config:
# - If use_inductor=True and custom_ops is empty:
# Inductor generates Triton kernels for all registered custom ops
# (default behavior)
# - If use_inductor=True and custom_ops is non-empty:
# Custom CUDA kernels are used for specified ops while inductor
# generates Triton kernels for remaining ops, including misc torch
# ops in the model.
if
(
not
self
.
compilation_config
.
custom_ops
and
self
.
compilation_config
.
use_inductor
):
# Let inductor generate Triton kernels for the custom ops.
self
.
compilation_config
.
custom_ops
=
[
"none"
]
self
.
_set_cudagraph_sizes
()
self
.
_set_cudagraph_sizes
()
if
self
.
cache_config
.
cpu_offload_gb
>
0
and
\
if
self
.
cache_config
.
cpu_offload_gb
>
0
and
\
...
...
vllm/model_executor/custom_op.py
View file @
aafabaa0
...
@@ -141,16 +141,16 @@ class CustomOp(nn.Module):
...
@@ -141,16 +141,16 @@ class CustomOp(nn.Module):
@
staticmethod
@
staticmethod
def
default_on
()
->
bool
:
def
default_on
()
->
bool
:
"""
"""
On by default if
level < CompilationLevel.PIECEWISE
On by default if
PyTorch Inductor is not used.
Specifying 'all' or 'none' in custom_op takes precedence.
Specifying 'all' or 'none' in custom_op takes precedence.
"""
"""
from
vllm.config
import
CompilationLevel
from
vllm.config
import
CompilationLevel
compilation_config
=
get_current_vllm_config
().
compilation_config
compilation_config
=
get_current_vllm_config
().
compilation_config
custom_ops
=
compilation_config
.
custom_ops
default_on
=
(
compilation_config
.
level
<
CompilationLevel
.
PIECEWISE
count_none
=
custom_ops
.
count
(
"none"
)
or
not
compilation_config
.
use_inductor
)
count_
all
=
custom_ops
.
count
(
"
all
"
)
count_
none
=
compilation_config
.
custom_ops
.
count
(
"
none
"
)
return
compilation_config
.
level
<
CompilationLevel
.
PIECEWISE
and
\
count_all
=
compilation_config
.
custom_ops
.
count
(
"all"
)
not
count_none
>
0
or
count_all
>
0
return
default_on
and
not
count_none
>
0
or
count_all
>
0
# Dictionary of all custom ops (classes, indexed by registered name).
# Dictionary of all custom ops (classes, indexed by registered name).
# To check if an op with a name is enabled, call .enabled() on the class.
# To check if an op with a name is enabled, call .enabled() on the class.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment