[Doc] Update help text for `--distributed-executor-backend` (#10231)

Signed-off-by: Russell Bryant <rbryant@redhat.com>

[Doc] Update help text for `--distributed-executor-backend` (#10231)
Signed-off-by: Russell Bryant <rbryant@redhat.com>
9cdba966 · Russell Bryant · GitHub · d1c6799b · 9cdba966 · 9cdba966
Unverified Commit 9cdba966 authored Nov 11, 2024 by Russell Bryant Committed by GitHub Nov 12, 2024
Show whitespace changes
Inline Side-by-side

Showing with 14 additions and 6 deletions

vllm/config.py vllm/config.py +6 -3

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +8 -3

No files found.
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -951,9 +951,12 @@ class ParallelConfig:
            https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.
        placement_group: ray distributed model workers placement group.
        distributed_executor_backend: Backend to use for distributed model
-            workers, either "ray" or "mp" (multiprocessing). If either
-            pipeline_parallel_size or tensor_parallel_size is greater than 1,
-            will default to "ray" if Ray is installed or "mp" otherwise.
+            workers, either "ray" or "mp" (multiprocessing). If the product
+            of pipeline_parallel_size and tensor_parallel_size is less than
+            or equal to the number of GPUs available, "mp" will be used to
+            keep processing on a single host. Otherwise, this will default
+            to "ray" if Ray is installed and fail otherwise. Note that tpu
+            and hpu only support Ray for distributed inference.
    """

    def __init__(

--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -369,9 +369,14 @@ class EngineArgs:
            '--distributed-executor-backend',
            choices=['ray', 'mp'],
            default=EngineArgs.distributed_executor_backend,
-            help='Backend to use for distributed serving. When more than 1 GPU '
-            'is used, will be automatically set to "ray" if installed '
-            'or "mp" (multiprocessing) otherwise.')
+            help='Backend to use for distributed model '
+            'workers, either "ray" or "mp" (multiprocessing). If the product '
+            'of pipeline_parallel_size and tensor_parallel_size is less than '
+            'or equal to the number of GPUs available, "mp" will be used to '
+            'keep processing on a single host. Otherwise, this will default '
+            'to "ray" if Ray is installed and fail otherwise. Note that tpu '
+            'and hpu only support Ray for distributed inference.')
+
        parser.add_argument(
            '--worker-use-ray',
            action='store_true',