Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9cdba966
Unverified
Commit
9cdba966
authored
Nov 11, 2024
by
Russell Bryant
Committed by
GitHub
Nov 12, 2024
Browse files
[Doc] Update help text for `--distributed-executor-backend` (#10231)
Signed-off-by:
Russell Bryant
<
rbryant@redhat.com
>
parent
d1c6799b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
6 deletions
+14
-6
vllm/config.py
vllm/config.py
+6
-3
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+8
-3
No files found.
vllm/config.py
View file @
9cdba966
...
@@ -951,9 +951,12 @@ class ParallelConfig:
...
@@ -951,9 +951,12 @@ class ParallelConfig:
https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.
https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.
placement_group: ray distributed model workers placement group.
placement_group: ray distributed model workers placement group.
distributed_executor_backend: Backend to use for distributed model
distributed_executor_backend: Backend to use for distributed model
workers, either "ray" or "mp" (multiprocessing). If either
workers, either "ray" or "mp" (multiprocessing). If the product
pipeline_parallel_size or tensor_parallel_size is greater than 1,
of pipeline_parallel_size and tensor_parallel_size is less than
will default to "ray" if Ray is installed or "mp" otherwise.
or equal to the number of GPUs available, "mp" will be used to
keep processing on a single host. Otherwise, this will default
to "ray" if Ray is installed and fail otherwise. Note that tpu
and hpu only support Ray for distributed inference.
"""
"""
def
__init__
(
def
__init__
(
...
...
vllm/engine/arg_utils.py
View file @
9cdba966
...
@@ -369,9 +369,14 @@ class EngineArgs:
...
@@ -369,9 +369,14 @@ class EngineArgs:
'--distributed-executor-backend'
,
'--distributed-executor-backend'
,
choices
=
[
'ray'
,
'mp'
],
choices
=
[
'ray'
,
'mp'
],
default
=
EngineArgs
.
distributed_executor_backend
,
default
=
EngineArgs
.
distributed_executor_backend
,
help
=
'Backend to use for distributed serving. When more than 1 GPU '
help
=
'Backend to use for distributed model '
'is used, will be automatically set to "ray" if installed '
'workers, either "ray" or "mp" (multiprocessing). If the product '
'or "mp" (multiprocessing) otherwise.'
)
'of pipeline_parallel_size and tensor_parallel_size is less than '
'or equal to the number of GPUs available, "mp" will be used to '
'keep processing on a single host. Otherwise, this will default '
'to "ray" if Ray is installed and fail otherwise. Note that tpu '
'and hpu only support Ray for distributed inference.'
)
parser
.
add_argument
(
parser
.
add_argument
(
'--worker-use-ray'
,
'--worker-use-ray'
,
action
=
'store_true'
,
action
=
'store_true'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment