Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
26df46ee
Unverified
Commit
26df46ee
authored
Mar 29, 2025
by
Reid
Committed by
GitHub
Mar 28, 2025
Browse files
[Misc] cli auto show default value (#15582)
Signed-off-by:
reidliu41
<
reid201711@gmail.com
>
parent
c3f687ac
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
11 additions
and
22 deletions
+11
-22
vllm/benchmarks/serve.py
vllm/benchmarks/serve.py
+1
-3
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+8
-17
vllm/entrypoints/openai/cli_args.py
vllm/entrypoints/openai/cli_args.py
+1
-1
vllm/utils.py
vllm/utils.py
+1
-1
No files found.
vllm/benchmarks/serve.py
View file @
26df46ee
...
...
@@ -726,15 +726,13 @@ def add_cli_args(parser: argparse.ArgumentParser):
default
=
"ttft,tpot,itl"
,
help
=
"Comma-seperated list of selected metrics to report percentils. "
"This argument specifies the metrics to report percentiles. "
"Allowed metric names are
\"
ttft
\"
,
\"
tpot
\"
,
\"
itl
\"
,
\"
e2el
\"
. "
"Default value is
\"
ttft,tpot,itl
\"
."
)
"Allowed metric names are
\"
ttft
\"
,
\"
tpot
\"
,
\"
itl
\"
,
\"
e2el
\"
. "
)
parser
.
add_argument
(
"--metric-percentiles"
,
type
=
str
,
default
=
"99"
,
help
=
"Comma-seperated list of percentiles for selected metrics. "
"To report 25-th, 50-th, and 75-th percentiles, use
\"
25,50,75
\"
. "
"Default value is
\"
99
\"
. "
"Use
\"
--percentile-metrics
\"
to select metrics."
,
)
parser
.
add_argument
(
...
...
vllm/engine/arg_utils.py
View file @
26df46ee
...
...
@@ -322,9 +322,7 @@ class EngineArgs:
parser
.
add_argument
(
'--download-dir'
,
type
=
nullable_str
,
default
=
EngineArgs
.
download_dir
,
help
=
'Directory to download and load the weights, '
'default to the default cache dir of '
'huggingface.'
)
help
=
'Directory to download and load the weights.'
)
parser
.
add_argument
(
'--load-format'
,
type
=
str
,
...
...
@@ -399,8 +397,7 @@ class EngineArgs:
'Valid backend values are "xgrammar", "guidance", and "auto". '
'With "auto", we will make opinionated choices based on request'
'contents and what the backend libraries currently support, so '
'the behavior is subject to change in each release. '
'The default is xgrammar.'
)
'the behavior is subject to change in each release.'
)
parser
.
add_argument
(
'--logits-processor-pattern'
,
type
=
nullable_str
,
...
...
@@ -493,8 +490,7 @@ class EngineArgs:
default
=
EngineArgs
.
prefix_caching_hash_algo
,
help
=
"Set the hash algorithm for prefix caching. "
"Options are 'builtin' (Python's built-in hash) or 'sha256' "
"(collision resistant but with certain overheads). Defaults "
"to 'builtin'."
,
"(collision resistant but with certain overheads)."
,
)
parser
.
add_argument
(
'--disable-sliding-window'
,
action
=
'store_true'
,
...
...
@@ -568,9 +564,7 @@ class EngineArgs:
type
=
int
,
default
=
EngineArgs
.
max_num_partial_prefills
,
help
=
"For chunked prefill, the max number of concurrent
\
partial prefills."
"Defaults to 1"
,
)
partial prefills."
)
parser
.
add_argument
(
"--max-long-partial-prefills"
,
type
=
int
,
...
...
@@ -579,15 +573,13 @@ class EngineArgs:
"than --long-prefill-token-threshold that will be prefilled "
"concurrently. Setting this less than --max-num-partial-prefills "
"will allow shorter prompts to jump the queue in front of longer "
"prompts in some cases, improving latency.
Defaults to 1.
"
)
"prompts in some cases, improving latency."
)
parser
.
add_argument
(
"--long-prefill-token-threshold"
,
type
=
float
,
default
=
EngineArgs
.
long_prefill_token_threshold
,
help
=
"For chunked prefill, a request is considered long if the "
"prompt is longer than this number of tokens. Defaults to 4%% of "
"the model's context length."
,
)
"prompt is longer than this number of tokens."
)
parser
.
add_argument
(
'--max-num-seqs'
,
type
=
int
,
default
=
EngineArgs
.
max_num_seqs
,
...
...
@@ -739,8 +731,7 @@ class EngineArgs:
type
=
int
,
default
=
EngineArgs
.
max_cpu_loras
,
help
=
(
'Maximum number of LoRAs to store in CPU memory. '
'Must be >= than max_loras. '
'Defaults to max_loras.'
))
'Must be >= than max_loras.'
))
parser
.
add_argument
(
'--fully-sharded-loras'
,
action
=
'store_true'
,
...
...
@@ -894,7 +885,7 @@ class EngineArgs:
help
=
'Set the lower bound threshold for the posterior '
'probability of a token to be accepted. This threshold is '
'used by the TypicalAcceptanceSampler to make sampling decisions '
'during speculative decoding.
Defaults to 0.09
'
)
'during speculative decoding.'
)
parser
.
add_argument
(
'--typical-acceptance-sampler-posterior-alpha'
,
...
...
vllm/entrypoints/openai/cli_args.py
View file @
26df46ee
...
...
@@ -247,7 +247,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
default
=
None
,
help
=
'Max number of prompt characters or prompt '
'ID numbers being printed in log.'
'
\n\n
Default: U
nlimited'
)
'
The default of None means u
nlimited
.
'
)
parser
.
add_argument
(
"--disable-fastapi-docs"
,
...
...
vllm/utils.py
View file @
26df46ee
...
...
@@ -1212,7 +1212,7 @@ class StoreBoolean(argparse.Action):
"Expected 'true' or 'false'."
)
class
SortedHelpFormatter
(
argparse
.
HelpFormatter
):
class
SortedHelpFormatter
(
argparse
.
ArgumentDefaults
HelpFormatter
):
"""SortedHelpFormatter that sorts arguments by their option strings."""
def
add_arguments
(
self
,
actions
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment