Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
fd57d2b5
Unverified
Commit
fd57d2b5
authored
Dec 08, 2024
by
youkaichao
Committed by
GitHub
Dec 08, 2024
Browse files
[torch.compile] allow candidate compile sizes (#10984)
Signed-off-by:
youkaichao
<
youkaichao@gmail.com
>
parent
7be15d93
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
28 additions
and
35 deletions
+28
-35
tests/engine/test_arg_utils.py
tests/engine/test_arg_utils.py
+4
-4
vllm/config.py
vllm/config.py
+22
-22
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+1
-4
vllm/entrypoints/llm.py
vllm/entrypoints/llm.py
+1
-5
No files found.
tests/engine/test_arg_utils.py
View file @
fd57d2b5
...
...
@@ -50,12 +50,12 @@ def test_compilation_config():
args
=
parser
.
parse_args
([
"-O=3"
])
assert
args
.
compilation_config
.
level
==
3
# set to
json
args
=
parser
.
parse_args
([
"--compilation-config"
,
'{"
level
"
: 3}
'
])
# set to
string form of a dict
args
=
parser
.
parse_args
([
"--compilation-config"
,
"{'
level
'
: 3}
"
])
assert
args
.
compilation_config
.
level
==
3
# set to
json
args
=
parser
.
parse_args
([
'
--compilation-config={
"
level
"
: 3}
'
])
# set to
string form of a dict
args
=
parser
.
parse_args
([
"
--compilation-config={
'
level
'
: 3}
"
])
assert
args
.
compilation_config
.
level
==
3
...
...
vllm/config.py
View file @
fd57d2b5
import
ast
import
copy
import
enum
import
hashlib
...
...
@@ -2191,14 +2192,10 @@ class CompilationConfig(BaseModel):
- use_inductor: whether to use inductor compilation.
- False: inductor compilation is not used. graph runs in eager.
- True: inductor compilation is used. one graph for symbolic shape
is compiled. In addition, compile for
different sizes specified
in
inductor
_compile_sizes, using configurations
is compiled. In addition, compile for
cudagraph sizes that are
in
candidate
_compile_sizes, using configurations
in inductor_compile_config.
- inductor_compile_sizes: sizes to compile for inductor.
- inductor_specialize_for_cudagraph_no_more_than: an optional integer
to specialize inductor for cudagraph sizes no more than the
specified size. It is useful when we want to specialize inductor
with a subset of cudagraph sizes.
- candidate_compile_sizes: sizes to compile for inductor.
- inductor_compile_config: additional configurations for inductor.
- None: use default configurations.
- inductor_passes: additional passes for inductor. It is a dictionary
...
...
@@ -2227,8 +2224,7 @@ class CompilationConfig(BaseModel):
])
use_inductor
:
bool
=
True
inductor_specialize_for_cudagraph_no_more_than
:
Optional
[
int
]
=
None
inductor_compile_sizes
:
Optional
[
List
[
int
]]
=
Field
(
default
=
None
)
candidate_compile_sizes
:
Optional
[
List
[
int
]]
=
Field
(
default
=
None
)
inductor_compile_config
:
Dict
=
Field
(
default_factory
=
dict
)
inductor_passes
:
Dict
[
str
,
str
]
=
Field
(
default_factory
=
dict
)
...
...
@@ -2294,7 +2290,9 @@ class CompilationConfig(BaseModel):
"""Parse the CLI value for the compilation config."""
if
cli_value
in
[
"0"
,
"1"
,
"2"
,
"3"
]:
return
cls
(
level
=
int
(
cli_value
))
return
CompilationConfig
.
model_validate_json
(
cli_value
)
# do not use `eval`, it is dangerous and can execute arbitrary code
dict_value
=
ast
.
literal_eval
(
cli_value
)
return
CompilationConfig
.
model_validate
(
dict_value
)
def
model_post_init
(
self
,
__context
:
Any
)
->
None
:
...
...
@@ -2355,18 +2353,20 @@ class CompilationConfig(BaseModel):
logger
.
info
((
"cudagraph sizes specified by model runner"
" %s is overridden by config %s"
),
sizes_to_specialize
,
self
.
cudagraph_capture_sizes
)
if
self
.
inductor_specialize_for_cudagraph_no_more_than
is
not
None
:
assert
self
.
inductor_compile_sizes
is
None
,
(
"inductor_compile_sizes should be None when "
"inductor_specialize_for_cudagraph_no_more_than is not None"
)
if
self
.
candidate_compile_sizes
is
None
:
self
.
candidate_compile_sizes
=
[]
self
.
compile_sizes
=
[
x
for
x
in
self
.
capture_sizes
if
x
<=
self
.
inductor_specialize_for_cudagraph_no_more_than
x
for
x
in
self
.
candidate_compile_sizes
if
x
in
self
.
capture_sizes
]
else
:
if
self
.
inductor_compile_sizes
is
None
:
self
.
inductor_compile_sizes
=
[]
self
.
compile_sizes
=
self
.
inductor_compile_sizes
ignored_sizes
=
[
x
for
x
in
self
.
candidate_compile_sizes
if
x
not
in
self
.
capture_sizes
]
if
ignored_sizes
:
logger
.
warning
((
"candidate_compile_sizes %s are ignored "
"because they are not cudagraph capture sizes."
),
ignored_sizes
)
# sort to make sure cudagraph capture sizes are in descending order
self
.
capture_sizes
.
sort
(
reverse
=
True
)
...
...
vllm/engine/arg_utils.py
View file @
fd57d2b5
...
...
@@ -209,12 +209,9 @@ class EngineArgs:
# support `EngineArgs(compilation_config={...})`
# without having to manually construct a
# CompilationConfig object
if
isinstance
(
self
.
compilation_config
,
(
int
)):
if
isinstance
(
self
.
compilation_config
,
(
int
,
dict
)):
self
.
compilation_config
=
CompilationConfig
.
from_cli
(
str
(
self
.
compilation_config
))
elif
isinstance
(
self
.
compilation_config
,
(
dict
)):
self
.
compilation_config
=
CompilationConfig
.
from_cli
(
json
.
dumps
(
self
.
compilation_config
))
# Setup plugins
from
vllm.plugins
import
load_general_plugins
...
...
vllm/entrypoints/llm.py
View file @
fd57d2b5
import
itertools
import
json
import
warnings
from
contextlib
import
contextmanager
from
typing
import
(
Any
,
ClassVar
,
Dict
,
List
,
Optional
,
Sequence
,
Tuple
,
Type
,
...
...
@@ -186,12 +185,9 @@ class LLM:
kwargs
[
"disable_log_stats"
]
=
True
if
compilation_config
is
not
None
:
if
isinstance
(
compilation_config
,
(
int
)):
if
isinstance
(
compilation_config
,
(
int
,
dict
)):
compilation_config_instance
=
CompilationConfig
.
from_cli
(
str
(
compilation_config
))
elif
isinstance
(
compilation_config
,
(
dict
)):
compilation_config_instance
=
CompilationConfig
.
from_cli
(
json
.
dumps
(
compilation_config
))
else
:
compilation_config_instance
=
compilation_config
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment