Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
baec0d4d
Unverified
Commit
baec0d4d
authored
Mar 21, 2025
by
Cyrus Leung
Committed by
GitHub
Mar 21, 2025
Browse files
Revert "[Feature] specify model in config.yaml (#14855)" (#15293)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
c21b99b9
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
30 additions
and
102 deletions
+30
-102
docs/source/serving/openai_compatible_server.md
docs/source/serving/openai_compatible_server.md
+1
-3
tests/config/test_config_with_model.yaml
tests/config/test_config_with_model.yaml
+0
-7
tests/conftest.py
tests/conftest.py
+0
-12
tests/data/test_config.yaml
tests/data/test_config.yaml
+0
-0
tests/test_utils.py
tests/test_utils.py
+11
-42
vllm/entrypoints/cli/serve.py
vllm/entrypoints/cli/serve.py
+9
-13
vllm/utils.py
vllm/utils.py
+9
-25
No files found.
docs/source/serving/openai_compatible_server.md
View file @
baec0d4d
...
@@ -184,7 +184,6 @@ For example:
...
@@ -184,7 +184,6 @@ For example:
```yaml
```yaml
# config.yaml
# config.yaml
model: meta-llama/Llama-3.1-8B-Instruct
host: "127.0.0.1"
host: "127.0.0.1"
port: 6379
port: 6379
uvicorn-log-level: "info"
uvicorn-log-level: "info"
...
@@ -193,13 +192,12 @@ uvicorn-log-level: "info"
...
@@ -193,13 +192,12 @@ uvicorn-log-level: "info"
To use the above config file:
To use the above config file:
```bash
```bash
vllm serve --config config.yaml
vllm serve
SOME_MODEL
--config config.yaml
```
```
:::{note}
:::{note}
In case an argument is supplied simultaneously using command line and the config file, the value from the command line will take precedence.
In case an argument is supplied simultaneously using command line and the config file, the value from the command line will take precedence.
The order of priorities is `
command line > config file values > defaults
`.
The order of priorities is `
command line > config file values > defaults
`.
e.g. `
vllm serve SOME_MODEL --config config.yaml
`, SOME_MODEL takes precedence over `
model
` in config file.
:::
:::
## API Reference
## API Reference
...
...
tests/config/test_config_with_model.yaml
deleted
100644 → 0
View file @
c21b99b9
# Same as test_config.yaml but with model specified
model
:
config-model
port
:
12312
served_model_name
:
mymodel
tensor_parallel_size
:
2
trust_remote_code
:
true
multi_step_stream_outputs
:
false
tests/conftest.py
View file @
baec0d4d
...
@@ -1121,15 +1121,3 @@ def pytest_collection_modifyitems(config, items):
...
@@ -1121,15 +1121,3 @@ def pytest_collection_modifyitems(config, items):
for
item
in
items
:
for
item
in
items
:
if
"optional"
in
item
.
keywords
:
if
"optional"
in
item
.
keywords
:
item
.
add_marker
(
skip_optional
)
item
.
add_marker
(
skip_optional
)
@
pytest
.
fixture
(
scope
=
"session"
)
def
cli_config_file
():
"""Return the path to the CLI config file."""
return
os
.
path
.
join
(
_TEST_DIR
,
"config"
,
"test_config.yaml"
)
@
pytest
.
fixture
(
scope
=
"session"
)
def
cli_config_file_with_model
():
"""Return the path to the CLI config file with model."""
return
os
.
path
.
join
(
_TEST_DIR
,
"config"
,
"test_config_with_model.yaml"
)
tests/
config
/test_config.yaml
→
tests/
data
/test_config.yaml
View file @
baec0d4d
File moved
tests/test_utils.py
View file @
baec0d4d
...
@@ -8,7 +8,7 @@ from unittest.mock import patch
...
@@ -8,7 +8,7 @@ from unittest.mock import patch
import
pytest
import
pytest
import
torch
import
torch
from
vllm_test_utils
.monitor
import
monitor
from
vllm_test_utils
import
monitor
from
vllm.config
import
ParallelConfig
,
VllmConfig
,
set_current_vllm_config
from
vllm.config
import
ParallelConfig
,
VllmConfig
,
set_current_vllm_config
from
vllm.utils
import
(
FlexibleArgumentParser
,
MemorySnapshot
,
from
vllm.utils
import
(
FlexibleArgumentParser
,
MemorySnapshot
,
...
@@ -140,8 +140,7 @@ def parser():
...
@@ -140,8 +140,7 @@ def parser():
def
parser_with_config
():
def
parser_with_config
():
parser
=
FlexibleArgumentParser
()
parser
=
FlexibleArgumentParser
()
parser
.
add_argument
(
'serve'
)
parser
.
add_argument
(
'serve'
)
parser
.
add_argument
(
'model_tag'
,
nargs
=
'?'
)
parser
.
add_argument
(
'model_tag'
)
parser
.
add_argument
(
'--model'
,
type
=
str
)
parser
.
add_argument
(
'--served-model-name'
,
type
=
str
)
parser
.
add_argument
(
'--served-model-name'
,
type
=
str
)
parser
.
add_argument
(
'--config'
,
type
=
str
)
parser
.
add_argument
(
'--config'
,
type
=
str
)
parser
.
add_argument
(
'--port'
,
type
=
int
)
parser
.
add_argument
(
'--port'
,
type
=
int
)
...
@@ -197,29 +196,29 @@ def test_missing_required_argument(parser):
...
@@ -197,29 +196,29 @@ def test_missing_required_argument(parser):
parser
.
parse_args
([])
parser
.
parse_args
([])
def
test_cli_override_to_config
(
parser_with_config
,
cli_config_file
):
def
test_cli_override_to_config
(
parser_with_config
):
args
=
parser_with_config
.
parse_args
([
args
=
parser_with_config
.
parse_args
([
'serve'
,
'mymodel'
,
'--config'
,
cli
_config
_file
,
'serve'
,
'mymodel'
,
'--config'
,
'./data/test
_config
.yaml'
,
'--tensor-parallel-size'
,
'3'
'--tensor-parallel-size'
,
'3'
])
])
assert
args
.
tensor_parallel_size
==
3
assert
args
.
tensor_parallel_size
==
3
args
=
parser_with_config
.
parse_args
([
args
=
parser_with_config
.
parse_args
([
'serve'
,
'mymodel'
,
'--tensor-parallel-size'
,
'3'
,
'--config'
,
'serve'
,
'mymodel'
,
'--tensor-parallel-size'
,
'3'
,
'--config'
,
cli
_config
_file
'./data/test
_config
.yaml'
])
])
assert
args
.
tensor_parallel_size
==
3
assert
args
.
tensor_parallel_size
==
3
assert
args
.
port
==
12312
assert
args
.
port
==
12312
args
=
parser_with_config
.
parse_args
([
args
=
parser_with_config
.
parse_args
([
'serve'
,
'mymodel'
,
'--tensor-parallel-size'
,
'3'
,
'--config'
,
'serve'
,
'mymodel'
,
'--tensor-parallel-size'
,
'3'
,
'--config'
,
cli
_config
_file
,
'--port'
,
'666'
'./data/test
_config
.yaml'
,
'--port'
,
'666'
])
])
assert
args
.
tensor_parallel_size
==
3
assert
args
.
tensor_parallel_size
==
3
assert
args
.
port
==
666
assert
args
.
port
==
666
def
test_config_args
(
parser_with_config
,
cli_config_file
):
def
test_config_args
(
parser_with_config
):
args
=
parser_with_config
.
parse_args
(
args
=
parser_with_config
.
parse_args
(
[
'serve'
,
'mymodel'
,
'--config'
,
cli
_config
_file
])
[
'serve'
,
'mymodel'
,
'--config'
,
'./data/test
_config
.yaml'
])
assert
args
.
tensor_parallel_size
==
2
assert
args
.
tensor_parallel_size
==
2
assert
args
.
trust_remote_code
assert
args
.
trust_remote_code
assert
not
args
.
multi_step_stream_outputs
assert
not
args
.
multi_step_stream_outputs
...
@@ -241,9 +240,10 @@ def test_config_file(parser_with_config):
...
@@ -241,9 +240,10 @@ def test_config_file(parser_with_config):
])
])
def
test_no_model_tag
(
parser_with_config
,
cli_config_file
):
def
test_no_model_tag
(
parser_with_config
):
with
pytest
.
raises
(
ValueError
):
with
pytest
.
raises
(
ValueError
):
parser_with_config
.
parse_args
([
'serve'
,
'--config'
,
cli_config_file
])
parser_with_config
.
parse_args
(
[
'serve'
,
'--config'
,
'./data/test_config.yaml'
])
# yapf: enable
# yapf: enable
...
@@ -476,34 +476,3 @@ def test_swap_dict_values(obj, key1, key2):
...
@@ -476,34 +476,3 @@ def test_swap_dict_values(obj, key1, key2):
assert
obj
[
key1
]
==
original_obj
[
key2
]
assert
obj
[
key1
]
==
original_obj
[
key2
]
else
:
else
:
assert
key1
not
in
obj
assert
key1
not
in
obj
def
test_model_specification
(
parser_with_config
,
cli_config_file
,
cli_config_file_with_model
):
# Test model in CLI takes precedence over config
args
=
parser_with_config
.
parse_args
([
'serve'
,
'cli-model'
,
'--config'
,
cli_config_file_with_model
])
assert
args
.
model_tag
==
'cli-model'
assert
args
.
served_model_name
==
'mymodel'
# Test model from config file works
args
=
parser_with_config
.
parse_args
([
'serve'
,
'--config'
,
cli_config_file_with_model
])
assert
args
.
model
==
'config-model'
assert
args
.
served_model_name
==
'mymodel'
# Test no model specified anywhere raises error
with
pytest
.
raises
(
ValueError
,
match
=
"No model specified!"
):
parser_with_config
.
parse_args
([
'serve'
,
'--config'
,
cli_config_file
])
# Test other config values are preserved
args
=
parser_with_config
.
parse_args
([
'serve'
,
'cli-model'
,
'--config'
,
cli_config_file_with_model
])
assert
args
.
tensor_parallel_size
==
2
assert
args
.
trust_remote_code
is
True
assert
args
.
multi_step_stream_outputs
is
False
assert
args
.
port
==
12312
vllm/entrypoints/cli/serve.py
View file @
baec0d4d
...
@@ -21,16 +21,14 @@ class ServeSubcommand(CLISubcommand):
...
@@ -21,16 +21,14 @@ class ServeSubcommand(CLISubcommand):
@
staticmethod
@
staticmethod
def
cmd
(
args
:
argparse
.
Namespace
)
->
None
:
def
cmd
(
args
:
argparse
.
Namespace
)
->
None
:
# If model is specified in CLI (as positional arg), it takes precedence
# The default value of `--model`
if
hasattr
(
args
,
'model_tag'
)
and
args
.
model_tag
is
not
None
:
if
args
.
model
!=
EngineArgs
.
model
:
args
.
model
=
args
.
model_tag
# Otherwise use model from config (already in args.model)
# Check if we have a model specified somewhere
if
args
.
model
==
EngineArgs
.
model
:
# Still has default value
raise
ValueError
(
raise
ValueError
(
"With `vllm serve`, you should provide the model either as a "
"With `vllm serve`, you should provide the model as a "
"positional argument or in config file."
)
"positional argument instead of via the `--model` option."
)
# EngineArgs expects the model name to be passed as --model.
args
.
model
=
args
.
model_tag
uvloop
.
run
(
run_server
(
args
))
uvloop
.
run
(
run_server
(
args
))
...
@@ -43,12 +41,10 @@ class ServeSubcommand(CLISubcommand):
...
@@ -43,12 +41,10 @@ class ServeSubcommand(CLISubcommand):
serve_parser
=
subparsers
.
add_parser
(
serve_parser
=
subparsers
.
add_parser
(
"serve"
,
"serve"
,
help
=
"Start the vLLM OpenAI Compatible API server"
,
help
=
"Start the vLLM OpenAI Compatible API server"
,
usage
=
"vllm serve
[
model_tag
]
[options]"
)
usage
=
"vllm serve
<
model_tag
>
[options]"
)
serve_parser
.
add_argument
(
"model_tag"
,
serve_parser
.
add_argument
(
"model_tag"
,
type
=
str
,
type
=
str
,
nargs
=
'?'
,
help
=
"The model tag to serve"
)
help
=
"The model tag to serve "
"(optional if specified in config)"
)
serve_parser
.
add_argument
(
serve_parser
.
add_argument
(
"--config"
,
"--config"
,
type
=
str
,
type
=
str
,
...
...
vllm/utils.py
View file @
baec0d4d
...
@@ -1265,29 +1265,19 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
...
@@ -1265,29 +1265,19 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
config_args
=
self
.
_load_config_file
(
file_path
)
config_args
=
self
.
_load_config_file
(
file_path
)
# 0th index is for {serve,chat,complete}
# 0th index is for {serve,chat,complete}
#
optionally
followed by model_tag (only for serve)
# followed by model_tag (only for serve)
# followed by config args
# followed by config args
# followed by rest of cli args.
# followed by rest of cli args.
# maintaining this order will enforce the precedence
# maintaining this order will enforce the precedence
# of cli > config > defaults
# of cli > config > defaults
if
args
[
0
]
==
"serve"
:
if
args
[
0
]
==
"serve"
:
model_in_cli
=
len
(
args
)
>
1
and
not
args
[
1
].
startswith
(
'-'
)
if
index
==
1
:
model_in_config
=
any
(
arg
==
'--model'
for
arg
in
config_args
)
if
not
model_in_cli
and
not
model_in_config
:
raise
ValueError
(
raise
ValueError
(
"No model specified! Please specify model either in "
"No model_tag specified! Please check your command-line"
"command-line arguments or in config file."
)
" arguments."
)
args
=
[
args
[
0
]]
+
[
if
model_in_cli
:
args
[
1
]
# Model specified as positional arg, keep CLI version
]
+
config_args
+
args
[
2
:
index
]
+
args
[
index
+
2
:]
args
=
[
args
[
0
]]
+
[
args
[
1
]
]
+
config_args
+
args
[
2
:
index
]
+
args
[
index
+
2
:]
else
:
# No model in CLI, use config if available
args
=
[
args
[
0
]
]
+
config_args
+
args
[
1
:
index
]
+
args
[
index
+
2
:]
else
:
else
:
args
=
[
args
[
0
]]
+
config_args
+
args
[
1
:
index
]
+
args
[
index
+
2
:]
args
=
[
args
[
0
]]
+
config_args
+
args
[
1
:
index
]
+
args
[
index
+
2
:]
...
@@ -1305,7 +1295,9 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
...
@@ -1305,7 +1295,9 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
'--port': '12323',
'--port': '12323',
'--tensor-parallel-size': '4'
'--tensor-parallel-size': '4'
]
]
"""
"""
extension
:
str
=
file_path
.
split
(
'.'
)[
-
1
]
extension
:
str
=
file_path
.
split
(
'.'
)[
-
1
]
if
extension
not
in
(
'yaml'
,
'yml'
):
if
extension
not
in
(
'yaml'
,
'yml'
):
raise
ValueError
(
raise
ValueError
(
...
@@ -1330,15 +1322,7 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
...
@@ -1330,15 +1322,7 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
if
isinstance
(
action
,
StoreBoolean
)
if
isinstance
(
action
,
StoreBoolean
)
]
]
# Skip model from config if it's provided as positional argument
skip_model
=
(
hasattr
(
self
,
'_parsed_args'
)
and
self
.
_parsed_args
and
len
(
self
.
_parsed_args
)
>
1
and
self
.
_parsed_args
[
0
]
==
'serve'
and
not
self
.
_parsed_args
[
1
].
startswith
(
'-'
))
for
key
,
value
in
config
.
items
():
for
key
,
value
in
config
.
items
():
if
skip_model
and
key
==
'model'
:
continue
if
isinstance
(
value
,
bool
)
and
key
not
in
store_boolean_arguments
:
if
isinstance
(
value
,
bool
)
and
key
not
in
store_boolean_arguments
:
if
value
:
if
value
:
processed_args
.
append
(
'--'
+
key
)
processed_args
.
append
(
'--'
+
key
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment