Unverified Commit fbf25645 authored by Kunshang Ji's avatar Kunshang Ji Committed by GitHub
Browse files

[V1] Add `RayExecutor` support for `AsyncLLM` (api server) (#11712)

parent d1d49397
...@@ -22,6 +22,7 @@ from vllm.v1.engine.core_client import EngineCoreClient ...@@ -22,6 +22,7 @@ from vllm.v1.engine.core_client import EngineCoreClient
from vllm.v1.engine.detokenizer import Detokenizer from vllm.v1.engine.detokenizer import Detokenizer
from vllm.v1.engine.processor import Processor from vllm.v1.engine.processor import Processor
from vllm.v1.executor.abstract import Executor from vllm.v1.executor.abstract import Executor
from vllm.v1.executor.ray_utils import initialize_ray_cluster
logger = init_logger(__name__) logger = init_logger(__name__)
...@@ -131,7 +132,11 @@ class AsyncLLM(EngineClient): ...@@ -131,7 +132,11 @@ class AsyncLLM(EngineClient):
executor_class: Type[Executor] executor_class: Type[Executor]
distributed_executor_backend = ( distributed_executor_backend = (
vllm_config.parallel_config.distributed_executor_backend) vllm_config.parallel_config.distributed_executor_backend)
if distributed_executor_backend == "mp": if distributed_executor_backend == "ray":
initialize_ray_cluster(vllm_config.parallel_config)
from vllm.v1.executor.ray_executor import RayExecutor
executor_class = RayExecutor
elif distributed_executor_backend == "mp":
from vllm.v1.executor.multiproc_executor import MultiprocExecutor from vllm.v1.executor.multiproc_executor import MultiprocExecutor
executor_class = MultiprocExecutor executor_class = MultiprocExecutor
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment