[V1] Add `RayExecutor` support for `AsyncLLM` (api server) (#11712)

fbf25645 · Kunshang Ji · GitHub · d1d49397 · fbf25645
Unverified Commit fbf25645 authored Jan 04, 2025 by Kunshang Ji Committed by GitHub Jan 04, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 1 deletion

vllm/v1/engine/async_llm.py vllm/v1/engine/async_llm.py +6 -1

No files found.
--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
@@ -22,6 +22,7 @@ from vllm.v1.engine.core_client import EngineCoreClient
 from vllm.v1.engine.detokenizer import Detokenizer
 from vllm.v1.engine.processor import Processor
 from vllm.v1.executor.abstract import Executor
+from vllm.v1.executor.ray_utils import initialize_ray_cluster
 logger = init_logger(__name__)
@@ -131,7 +132,11 @@ class AsyncLLM(EngineClient):
        executor_class: Type[Executor]
        distributed_executor_backend = (
            vllm_config.parallel_config.distributed_executor_backend)
-        if distributed_executor_backend == "mp":
+        if distributed_executor_backend == "ray":
+            initialize_ray_cluster(vllm_config.parallel_config)
+            from vllm.v1.executor.ray_executor import RayExecutor
+            executor_class = RayExecutor
+        elif distributed_executor_backend == "mp":
            from vllm.v1.executor.multiproc_executor import MultiprocExecutor
            executor_class = MultiprocExecutor
        else: