Unverified Commit 9cdba966 authored by Russell Bryant's avatar Russell Bryant Committed by GitHub
Browse files

[Doc] Update help text for `--distributed-executor-backend` (#10231)


Signed-off-by: default avatarRussell Bryant <rbryant@redhat.com>
parent d1c6799b
...@@ -951,9 +951,12 @@ class ParallelConfig: ...@@ -951,9 +951,12 @@ class ParallelConfig:
https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler. https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.
placement_group: ray distributed model workers placement group. placement_group: ray distributed model workers placement group.
distributed_executor_backend: Backend to use for distributed model distributed_executor_backend: Backend to use for distributed model
workers, either "ray" or "mp" (multiprocessing). If either workers, either "ray" or "mp" (multiprocessing). If the product
pipeline_parallel_size or tensor_parallel_size is greater than 1, of pipeline_parallel_size and tensor_parallel_size is less than
will default to "ray" if Ray is installed or "mp" otherwise. or equal to the number of GPUs available, "mp" will be used to
keep processing on a single host. Otherwise, this will default
to "ray" if Ray is installed and fail otherwise. Note that tpu
and hpu only support Ray for distributed inference.
""" """
def __init__( def __init__(
......
...@@ -369,9 +369,14 @@ class EngineArgs: ...@@ -369,9 +369,14 @@ class EngineArgs:
'--distributed-executor-backend', '--distributed-executor-backend',
choices=['ray', 'mp'], choices=['ray', 'mp'],
default=EngineArgs.distributed_executor_backend, default=EngineArgs.distributed_executor_backend,
help='Backend to use for distributed serving. When more than 1 GPU ' help='Backend to use for distributed model '
'is used, will be automatically set to "ray" if installed ' 'workers, either "ray" or "mp" (multiprocessing). If the product '
'or "mp" (multiprocessing) otherwise.') 'of pipeline_parallel_size and tensor_parallel_size is less than '
'or equal to the number of GPUs available, "mp" will be used to '
'keep processing on a single host. Otherwise, this will default '
'to "ray" if Ray is installed and fail otherwise. Note that tpu '
'and hpu only support Ray for distributed inference.')
parser.add_argument( parser.add_argument(
'--worker-use-ray', '--worker-use-ray',
action='store_true', action='store_true',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment