Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
baec0d4d
Unverified
Commit
baec0d4d
authored
Mar 21, 2025
by
Cyrus Leung
Committed by
GitHub
Mar 21, 2025
Browse files
Revert "[Feature] specify model in config.yaml (#14855)" (#15293)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
c21b99b9
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
30 additions
and
102 deletions
+30
-102
docs/source/serving/openai_compatible_server.md
docs/source/serving/openai_compatible_server.md
+1
-3
tests/config/test_config_with_model.yaml
tests/config/test_config_with_model.yaml
+0
-7
tests/conftest.py
tests/conftest.py
+0
-12
tests/data/test_config.yaml
tests/data/test_config.yaml
+0
-0
tests/test_utils.py
tests/test_utils.py
+11
-42
vllm/entrypoints/cli/serve.py
vllm/entrypoints/cli/serve.py
+9
-13
vllm/utils.py
vllm/utils.py
+9
-25
No files found.
docs/source/serving/openai_compatible_server.md
View file @
baec0d4d
...
...
@@ -184,7 +184,6 @@ For example:
```yaml
# config.yaml
model: meta-llama/Llama-3.1-8B-Instruct
host: "127.0.0.1"
port: 6379
uvicorn-log-level: "info"
...
...
@@ -193,13 +192,12 @@ uvicorn-log-level: "info"
To use the above config file:
```bash
vllm serve --config config.yaml
vllm serve
SOME_MODEL
--config config.yaml
```
:::{note}
In case an argument is supplied simultaneously using command line and the config file, the value from the command line will take precedence.
The order of priorities is `
command line > config file values > defaults
`.
e.g. `
vllm serve SOME_MODEL --config config.yaml
`, SOME_MODEL takes precedence over `
model
` in config file.
:::
## API Reference
...
...
tests/config/test_config_with_model.yaml
deleted
100644 → 0
View file @
c21b99b9
# Same as test_config.yaml but with model specified
model
:
config-model
port
:
12312
served_model_name
:
mymodel
tensor_parallel_size
:
2
trust_remote_code
:
true
multi_step_stream_outputs
:
false
tests/conftest.py
View file @
baec0d4d
...
...
@@ -1121,15 +1121,3 @@ def pytest_collection_modifyitems(config, items):
for
item
in
items
:
if
"optional"
in
item
.
keywords
:
item
.
add_marker
(
skip_optional
)
@
pytest
.
fixture
(
scope
=
"session"
)
def
cli_config_file
():
"""Return the path to the CLI config file."""
return
os
.
path
.
join
(
_TEST_DIR
,
"config"
,
"test_config.yaml"
)
@
pytest
.
fixture
(
scope
=
"session"
)
def
cli_config_file_with_model
():
"""Return the path to the CLI config file with model."""
return
os
.
path
.
join
(
_TEST_DIR
,
"config"
,
"test_config_with_model.yaml"
)
tests/
config
/test_config.yaml
→
tests/
data
/test_config.yaml
View file @
baec0d4d
File moved
tests/test_utils.py
View file @
baec0d4d
...
...
@@ -8,7 +8,7 @@ from unittest.mock import patch
import
pytest
import
torch
from
vllm_test_utils
.monitor
import
monitor
from
vllm_test_utils
import
monitor
from
vllm.config
import
ParallelConfig
,
VllmConfig
,
set_current_vllm_config
from
vllm.utils
import
(
FlexibleArgumentParser
,
MemorySnapshot
,
...
...
@@ -140,8 +140,7 @@ def parser():
def
parser_with_config
():
parser
=
FlexibleArgumentParser
()
parser
.
add_argument
(
'serve'
)
parser
.
add_argument
(
'model_tag'
,
nargs
=
'?'
)
parser
.
add_argument
(
'--model'
,
type
=
str
)
parser
.
add_argument
(
'model_tag'
)
parser
.
add_argument
(
'--served-model-name'
,
type
=
str
)
parser
.
add_argument
(
'--config'
,
type
=
str
)
parser
.
add_argument
(
'--port'
,
type
=
int
)
...
...
@@ -197,29 +196,29 @@ def test_missing_required_argument(parser):
parser
.
parse_args
([])
def
test_cli_override_to_config
(
parser_with_config
,
cli_config_file
):
def
test_cli_override_to_config
(
parser_with_config
):
args
=
parser_with_config
.
parse_args
([
'serve'
,
'mymodel'
,
'--config'
,
cli
_config
_file
,
'serve'
,
'mymodel'
,
'--config'
,
'./data/test
_config
.yaml'
,
'--tensor-parallel-size'
,
'3'
])
assert
args
.
tensor_parallel_size
==
3
args
=
parser_with_config
.
parse_args
([
'serve'
,
'mymodel'
,
'--tensor-parallel-size'
,
'3'
,
'--config'
,
cli
_config
_file
'./data/test
_config
.yaml'
])
assert
args
.
tensor_parallel_size
==
3
assert
args
.
port
==
12312
args
=
parser_with_config
.
parse_args
([
'serve'
,
'mymodel'
,
'--tensor-parallel-size'
,
'3'
,
'--config'
,
cli
_config
_file
,
'--port'
,
'666'
'./data/test
_config
.yaml'
,
'--port'
,
'666'
])
assert
args
.
tensor_parallel_size
==
3
assert
args
.
port
==
666
def
test_config_args
(
parser_with_config
,
cli_config_file
):
def
test_config_args
(
parser_with_config
):
args
=
parser_with_config
.
parse_args
(
[
'serve'
,
'mymodel'
,
'--config'
,
cli
_config
_file
])
[
'serve'
,
'mymodel'
,
'--config'
,
'./data/test
_config
.yaml'
])
assert
args
.
tensor_parallel_size
==
2
assert
args
.
trust_remote_code
assert
not
args
.
multi_step_stream_outputs
...
...
@@ -241,9 +240,10 @@ def test_config_file(parser_with_config):
])
def
test_no_model_tag
(
parser_with_config
,
cli_config_file
):
def
test_no_model_tag
(
parser_with_config
):
with
pytest
.
raises
(
ValueError
):
parser_with_config
.
parse_args
([
'serve'
,
'--config'
,
cli_config_file
])
parser_with_config
.
parse_args
(
[
'serve'
,
'--config'
,
'./data/test_config.yaml'
])
# yapf: enable
...
...
@@ -476,34 +476,3 @@ def test_swap_dict_values(obj, key1, key2):
assert
obj
[
key1
]
==
original_obj
[
key2
]
else
:
assert
key1
not
in
obj
def
test_model_specification
(
parser_with_config
,
cli_config_file
,
cli_config_file_with_model
):
# Test model in CLI takes precedence over config
args
=
parser_with_config
.
parse_args
([
'serve'
,
'cli-model'
,
'--config'
,
cli_config_file_with_model
])
assert
args
.
model_tag
==
'cli-model'
assert
args
.
served_model_name
==
'mymodel'
# Test model from config file works
args
=
parser_with_config
.
parse_args
([
'serve'
,
'--config'
,
cli_config_file_with_model
])
assert
args
.
model
==
'config-model'
assert
args
.
served_model_name
==
'mymodel'
# Test no model specified anywhere raises error
with
pytest
.
raises
(
ValueError
,
match
=
"No model specified!"
):
parser_with_config
.
parse_args
([
'serve'
,
'--config'
,
cli_config_file
])
# Test other config values are preserved
args
=
parser_with_config
.
parse_args
([
'serve'
,
'cli-model'
,
'--config'
,
cli_config_file_with_model
])
assert
args
.
tensor_parallel_size
==
2
assert
args
.
trust_remote_code
is
True
assert
args
.
multi_step_stream_outputs
is
False
assert
args
.
port
==
12312
vllm/entrypoints/cli/serve.py
View file @
baec0d4d
...
...
@@ -21,16 +21,14 @@ class ServeSubcommand(CLISubcommand):
@
staticmethod
def
cmd
(
args
:
argparse
.
Namespace
)
->
None
:
# If model is specified in CLI (as positional arg), it takes precedence
if
hasattr
(
args
,
'model_tag'
)
and
args
.
model_tag
is
not
None
:
args
.
model
=
args
.
model_tag
# Otherwise use model from config (already in args.model)
# Check if we have a model specified somewhere
if
args
.
model
==
EngineArgs
.
model
:
# Still has default value
# The default value of `--model`
if
args
.
model
!=
EngineArgs
.
model
:
raise
ValueError
(
"With `vllm serve`, you should provide the model either as a "
"positional argument or in config file."
)
"With `vllm serve`, you should provide the model as a "
"positional argument instead of via the `--model` option."
)
# EngineArgs expects the model name to be passed as --model.
args
.
model
=
args
.
model_tag
uvloop
.
run
(
run_server
(
args
))
...
...
@@ -43,12 +41,10 @@ class ServeSubcommand(CLISubcommand):
serve_parser
=
subparsers
.
add_parser
(
"serve"
,
help
=
"Start the vLLM OpenAI Compatible API server"
,
usage
=
"vllm serve
[
model_tag
]
[options]"
)
usage
=
"vllm serve
<
model_tag
>
[options]"
)
serve_parser
.
add_argument
(
"model_tag"
,
type
=
str
,
nargs
=
'?'
,
help
=
"The model tag to serve "
"(optional if specified in config)"
)
help
=
"The model tag to serve"
)
serve_parser
.
add_argument
(
"--config"
,
type
=
str
,
...
...
vllm/utils.py
View file @
baec0d4d
...
...
@@ -1265,29 +1265,19 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
config_args
=
self
.
_load_config_file
(
file_path
)
# 0th index is for {serve,chat,complete}
#
optionally
followed by model_tag (only for serve)
# followed by model_tag (only for serve)
# followed by config args
# followed by rest of cli args.
# maintaining this order will enforce the precedence
# of cli > config > defaults
if
args
[
0
]
==
"serve"
:
model_in_cli
=
len
(
args
)
>
1
and
not
args
[
1
].
startswith
(
'-'
)
model_in_config
=
any
(
arg
==
'--model'
for
arg
in
config_args
)
if
not
model_in_cli
and
not
model_in_config
:
if
index
==
1
:
raise
ValueError
(
"No model specified! Please specify model either in "
"command-line arguments or in config file."
)
if
model_in_cli
:
# Model specified as positional arg, keep CLI version
"No model_tag specified! Please check your command-line"
" arguments."
)
args
=
[
args
[
0
]]
+
[
args
[
1
]
]
+
config_args
+
args
[
2
:
index
]
+
args
[
index
+
2
:]
else
:
# No model in CLI, use config if available
args
=
[
args
[
0
]
]
+
config_args
+
args
[
1
:
index
]
+
args
[
index
+
2
:]
else
:
args
=
[
args
[
0
]]
+
config_args
+
args
[
1
:
index
]
+
args
[
index
+
2
:]
...
...
@@ -1305,7 +1295,9 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
'--port': '12323',
'--tensor-parallel-size': '4'
]
"""
extension
:
str
=
file_path
.
split
(
'.'
)[
-
1
]
if
extension
not
in
(
'yaml'
,
'yml'
):
raise
ValueError
(
...
...
@@ -1330,15 +1322,7 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
if
isinstance
(
action
,
StoreBoolean
)
]
# Skip model from config if it's provided as positional argument
skip_model
=
(
hasattr
(
self
,
'_parsed_args'
)
and
self
.
_parsed_args
and
len
(
self
.
_parsed_args
)
>
1
and
self
.
_parsed_args
[
0
]
==
'serve'
and
not
self
.
_parsed_args
[
1
].
startswith
(
'-'
))
for
key
,
value
in
config
.
items
():
if
skip_model
and
key
==
'model'
:
continue
if
isinstance
(
value
,
bool
)
and
key
not
in
store_boolean_arguments
:
if
value
:
processed_args
.
append
(
'--'
+
key
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment