Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f2056f72
Unverified
Commit
f2056f72
authored
Nov 15, 2024
by
shangmingc
Committed by
GitHub
Nov 15, 2024
Browse files
[Misc] Fix some help info of arg_utils to improve readability (#10362)
parent
1d65ec7e
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
12 deletions
+12
-12
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+12
-12
No files found.
vllm/engine/arg_utils.py
View file @
f2056f72
...
@@ -272,10 +272,10 @@ class EngineArgs:
...
@@ -272,10 +272,10 @@ class EngineArgs:
parser
.
add_argument
(
parser
.
add_argument
(
'--allowed-local-media-path'
,
'--allowed-local-media-path'
,
type
=
str
,
type
=
str
,
help
=
"Allowing API requests to read local images or videos"
help
=
"Allowing API requests to read local images or videos
"
"from directories specified by the server file system."
"from directories specified by the server file system.
"
"This is a security risk."
"This is a security risk.
"
"Should only be enabled in trusted environments"
)
"Should only be enabled in trusted environments
.
"
)
parser
.
add_argument
(
'--download-dir'
,
parser
.
add_argument
(
'--download-dir'
,
type
=
nullable_str
,
type
=
nullable_str
,
default
=
EngineArgs
.
download_dir
,
default
=
EngineArgs
.
download_dir
,
...
@@ -340,7 +340,7 @@ class EngineArgs:
...
@@ -340,7 +340,7 @@ class EngineArgs:
'scaling factors. This should generally be supplied, when '
'scaling factors. This should generally be supplied, when '
'KV cache dtype is FP8. Otherwise, KV cache scaling factors '
'KV cache dtype is FP8. Otherwise, KV cache scaling factors '
'default to 1.0, which may cause accuracy issues. '
'default to 1.0, which may cause accuracy issues. '
'FP8_E5M2 (without scaling) is only supported on cuda version'
'FP8_E5M2 (without scaling) is only supported on cuda version
'
'greater than 11.8. On ROCm (AMD GPU), FP8_E4M3 is instead '
'greater than 11.8. On ROCm (AMD GPU), FP8_E4M3 is instead '
'supported for common inference criteria.'
)
'supported for common inference criteria.'
)
parser
.
add_argument
(
'--max-model-len'
,
parser
.
add_argument
(
'--max-model-len'
,
...
@@ -446,9 +446,9 @@ class EngineArgs:
...
@@ -446,9 +446,9 @@ class EngineArgs:
'this argument can be seen as a virtual way to increase '
'this argument can be seen as a virtual way to increase '
'the GPU memory size. For example, if you have one 24 GB '
'the GPU memory size. For example, if you have one 24 GB '
'GPU and set this to 10, virtually you can think of it as '
'GPU and set this to 10, virtually you can think of it as '
'a 34 GB GPU. Then you can load a 13B model with BF16 weight,'
'a 34 GB GPU. Then you can load a 13B model with BF16 weight,
'
'which requires at least 26GB GPU memory. Note that this '
'which requires at least 26GB GPU memory. Note that this '
'requires fast CPU-GPU interconnect, as part of the model is'
'requires fast CPU-GPU interconnect, as part of the model is
'
'loaded from CPU memory to GPU memory on the fly in each '
'loaded from CPU memory to GPU memory on the fly in each '
'model forward pass.'
)
'model forward pass.'
)
parser
.
add_argument
(
parser
.
add_argument
(
...
@@ -468,7 +468,7 @@ class EngineArgs:
...
@@ -468,7 +468,7 @@ class EngineArgs:
type
=
int
,
type
=
int
,
default
=
None
,
default
=
None
,
help
=
'If specified, ignore GPU profiling result and use this number'
help
=
'If specified, ignore GPU profiling result and use this number'
'of GPU blocks. Used for testing preemption.'
)
'
of GPU blocks. Used for testing preemption.'
)
parser
.
add_argument
(
'--max-num-batched-tokens'
,
parser
.
add_argument
(
'--max-num-batched-tokens'
,
type
=
int
,
type
=
int
,
default
=
EngineArgs
.
max_num_batched_tokens
,
default
=
EngineArgs
.
max_num_batched_tokens
,
...
@@ -514,7 +514,7 @@ class EngineArgs:
...
@@ -514,7 +514,7 @@ class EngineArgs:
parser
.
add_argument
(
'--hf-overrides'
,
parser
.
add_argument
(
'--hf-overrides'
,
type
=
json
.
loads
,
type
=
json
.
loads
,
default
=
EngineArgs
.
hf_overrides
,
default
=
EngineArgs
.
hf_overrides
,
help
=
'Extra arguments for the HuggingFace config.'
help
=
'Extra arguments for the HuggingFace config.
'
'This should be a JSON string that will be '
'This should be a JSON string that will be '
'parsed into a dictionary.'
)
'parsed into a dictionary.'
)
parser
.
add_argument
(
'--enforce-eager'
,
parser
.
add_argument
(
'--enforce-eager'
,
...
@@ -572,7 +572,7 @@ class EngineArgs:
...
@@ -572,7 +572,7 @@ class EngineArgs:
'--mm-processor-kwargs'
,
'--mm-processor-kwargs'
,
default
=
None
,
default
=
None
,
type
=
json
.
loads
,
type
=
json
.
loads
,
help
=
(
'Overrides for the multimodal input mapping/processing,'
help
=
(
'Overrides for the multimodal input mapping/processing,
'
'e.g., image processor. For example: {"num_crops": 4}.'
))
'e.g., image processor. For example: {"num_crops": 4}.'
))
# LoRA related configs
# LoRA related configs
...
@@ -822,9 +822,9 @@ class EngineArgs:
...
@@ -822,9 +822,9 @@ class EngineArgs:
"of the provided names. The model name in the model "
"of the provided names. The model name in the model "
"field of a response will be the first name in this "
"field of a response will be the first name in this "
"list. If not specified, the model name will be the "
"list. If not specified, the model name will be the "
"same as the `--model` argument. Noted that this name(s)"
"same as the `--model` argument. Noted that this name(s)
"
"will also be used in `model_name` tag content of "
"will also be used in `model_name` tag content of "
"prometheus metrics, if multiple names provided, metrics"
"prometheus metrics, if multiple names provided, metrics
"
"tag will take the first one."
)
"tag will take the first one."
)
parser
.
add_argument
(
'--qlora-adapter-name-or-path'
,
parser
.
add_argument
(
'--qlora-adapter-name-or-path'
,
type
=
str
,
type
=
str
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment