Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7e65477e
Unverified
Commit
7e65477e
authored
May 03, 2024
by
Michael Goin
Committed by
GitHub
May 03, 2024
Browse files
[Bugfix] Allow "None" or "" to be passed to CLI for string args that default to None (#4586)
parent
3521ba4f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
34 additions
and
25 deletions
+34
-25
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+19
-13
vllm/entrypoints/openai/cli_args.py
vllm/entrypoints/openai/cli_args.py
+15
-12
No files found.
vllm/engine/arg_utils.py
View file @
7e65477e
...
...
@@ -11,6 +11,12 @@ from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
from
vllm.utils
import
str_to_int_tuple
def
nullable_str
(
val
:
str
):
if
not
val
or
val
==
"None"
:
return
None
return
val
@
dataclass
class
EngineArgs
:
"""Arguments for vLLM engine."""
...
...
@@ -96,7 +102,7 @@ class EngineArgs:
help
=
'Name or path of the huggingface model to use.'
)
parser
.
add_argument
(
'--tokenizer'
,
type
=
str
,
type
=
nullable_
str
,
default
=
EngineArgs
.
tokenizer
,
help
=
'Name or path of the huggingface tokenizer to use.'
)
parser
.
add_argument
(
...
...
@@ -105,21 +111,21 @@ class EngineArgs:
help
=
'Skip initialization of tokenizer and detokenizer'
)
parser
.
add_argument
(
'--revision'
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
help
=
'The specific model version to use. It can be a branch '
'name, a tag name, or a commit id. If unspecified, will use '
'the default version.'
)
parser
.
add_argument
(
'--code-revision'
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
help
=
'The specific revision to use for the model code on '
'Hugging Face Hub. It can be a branch name, a tag name, or a '
'commit id. If unspecified, will use the default version.'
)
parser
.
add_argument
(
'--tokenizer-revision'
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
help
=
'The specific tokenizer version to use. It can be a branch '
'name, a tag name, or a commit id. If unspecified, will use '
...
...
@@ -136,7 +142,7 @@ class EngineArgs:
action
=
'store_true'
,
help
=
'Trust remote code from huggingface.'
)
parser
.
add_argument
(
'--download-dir'
,
type
=
str
,
type
=
nullable_
str
,
default
=
EngineArgs
.
download_dir
,
help
=
'Directory to download and load the weights, '
'default to the default cache dir of '
...
...
@@ -187,7 +193,7 @@ class EngineArgs:
'supported for common inference criteria.'
)
parser
.
add_argument
(
'--quantization-param-path'
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
help
=
'Path to the JSON file containing the KV cache '
'scaling factors. This should generally be supplied, when '
...
...
@@ -304,7 +310,7 @@ class EngineArgs:
# Quantization settings.
parser
.
add_argument
(
'--quantization'
,
'-q'
,
type
=
str
,
type
=
nullable_
str
,
choices
=
[
*
QUANTIZATION_METHODS
,
None
],
default
=
EngineArgs
.
quantization
,
help
=
'Method used to quantize the weights. If '
...
...
@@ -349,7 +355,7 @@ class EngineArgs:
'asynchronous tokenization. Ignored '
'if tokenizer_pool_size is 0.'
)
parser
.
add_argument
(
'--tokenizer-pool-extra-config'
,
type
=
str
,
type
=
nullable_
str
,
default
=
EngineArgs
.
tokenizer_pool_extra_config
,
help
=
'Extra config for tokenizer pool. '
'This should be a JSON string that will be '
...
...
@@ -404,7 +410,7 @@ class EngineArgs:
# Related to Vision-language models such as llava
parser
.
add_argument
(
'--image-input-type'
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
choices
=
[
t
.
name
.
lower
()
for
t
in
VisionLanguageConfig
.
ImageInputType
...
...
@@ -417,7 +423,7 @@ class EngineArgs:
help
=
(
'Input id for image token.'
))
parser
.
add_argument
(
'--image-input-shape'
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
help
=
(
'The biggest image input shape (worst for memory footprint) '
'given an input type. Only used for vLLM
\'
s profile_run.'
))
...
...
@@ -440,7 +446,7 @@ class EngineArgs:
parser
.
add_argument
(
'--speculative-model'
,
type
=
str
,
type
=
nullable_
str
,
default
=
EngineArgs
.
speculative_model
,
help
=
'The name of the draft model to be used in speculative decoding.'
)
...
...
@@ -454,7 +460,7 @@ class EngineArgs:
parser
.
add_argument
(
'--speculative-max-model-len'
,
type
=
str
,
type
=
int
,
default
=
EngineArgs
.
speculative_max_model_len
,
help
=
'The maximum sequence length supported by the '
'draft model. Sequences over this length will skip '
...
...
@@ -475,7 +481,7 @@ class EngineArgs:
'decoding.'
)
parser
.
add_argument
(
'--model-loader-extra-config'
,
type
=
str
,
type
=
nullable_
str
,
default
=
EngineArgs
.
model_loader_extra_config
,
help
=
'Extra config for model loader. '
'This will be passed to the model loader '
...
...
vllm/entrypoints/openai/cli_args.py
View file @
7e65477e
...
...
@@ -8,7 +8,7 @@ import argparse
import
json
import
ssl
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
,
nullable_str
from
vllm.entrypoints.openai.serving_engine
import
LoRAModulePath
...
...
@@ -25,7 +25,10 @@ class LoRAParserAction(argparse.Action):
def
make_arg_parser
():
parser
=
argparse
.
ArgumentParser
(
description
=
"vLLM OpenAI-Compatible RESTful API server."
)
parser
.
add_argument
(
"--host"
,
type
=
str
,
default
=
None
,
help
=
"host name"
)
parser
.
add_argument
(
"--host"
,
type
=
nullable_str
,
default
=
None
,
help
=
"host name"
)
parser
.
add_argument
(
"--port"
,
type
=
int
,
default
=
8000
,
help
=
"port number"
)
parser
.
add_argument
(
"--uvicorn-log-level"
,
...
...
@@ -49,13 +52,13 @@ def make_arg_parser():
default
=
[
"*"
],
help
=
"allowed headers"
)
parser
.
add_argument
(
"--api-key"
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
help
=
"If provided, the server will require this key "
"to be presented in the header."
)
parser
.
add_argument
(
"--served-model-name"
,
nargs
=
"+"
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
help
=
"The model name(s) used in the API. If multiple "
"names are provided, the server will respond to any "
...
...
@@ -65,33 +68,33 @@ def make_arg_parser():
"same as the `--model` argument."
)
parser
.
add_argument
(
"--lora-modules"
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
nargs
=
'+'
,
action
=
LoRAParserAction
,
help
=
"LoRA module configurations in the format name=path. "
"Multiple modules can be specified."
)
parser
.
add_argument
(
"--chat-template"
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
help
=
"The file path to the chat template, "
"or the template in single-line form "
"for the specified model"
)
parser
.
add_argument
(
"--response-role"
,
type
=
str
,
type
=
nullable_
str
,
default
=
"assistant"
,
help
=
"The role name to return if "
"`request.add_generation_prompt=true`."
)
parser
.
add_argument
(
"--ssl-keyfile"
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
help
=
"The file path to the SSL key file"
)
parser
.
add_argument
(
"--ssl-certfile"
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
help
=
"The file path to the SSL cert file"
)
parser
.
add_argument
(
"--ssl-ca-certs"
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
help
=
"The CA certificates file"
)
parser
.
add_argument
(
...
...
@@ -102,12 +105,12 @@ def make_arg_parser():
)
parser
.
add_argument
(
"--root-path"
,
type
=
str
,
type
=
nullable_
str
,
default
=
None
,
help
=
"FastAPI root_path when app is behind a path based routing proxy"
)
parser
.
add_argument
(
"--middleware"
,
type
=
str
,
type
=
nullable_
str
,
action
=
"append"
,
default
=
[],
help
=
"Additional ASGI middleware to apply to the app. "
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment