Unverified Commit 9832e557 authored by Rui Qiao's avatar Rui Qiao Committed by GitHub
Browse files

[V1] Unify VLLM_ENABLE_V1_MULTIPROCESSING handling in RayExecutor (#11472)

parent 3f3e92e1
...@@ -127,11 +127,6 @@ def test_models_distributed( ...@@ -127,11 +127,6 @@ def test_models_distributed(
if attention_backend: if attention_backend:
os.environ["VLLM_ATTENTION_BACKEND"] = attention_backend os.environ["VLLM_ATTENTION_BACKEND"] = attention_backend
# Import VLLM_USE_V1 dynamically to handle patching
from vllm.envs import VLLM_USE_V1
if VLLM_USE_V1 and distributed_executor_backend != "mp":
os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
dtype = "half" dtype = "half"
max_tokens = 5 max_tokens = 5
......
...@@ -21,7 +21,6 @@ from vllm.v1.engine.core_client import EngineCoreClient ...@@ -21,7 +21,6 @@ from vllm.v1.engine.core_client import EngineCoreClient
from vllm.v1.engine.detokenizer import Detokenizer from vllm.v1.engine.detokenizer import Detokenizer
from vllm.v1.engine.processor import Processor from vllm.v1.engine.processor import Processor
from vllm.v1.executor.abstract import Executor from vllm.v1.executor.abstract import Executor
from vllm.v1.executor.ray_utils import initialize_ray_cluster
logger = init_logger(__name__) logger = init_logger(__name__)
...@@ -112,7 +111,6 @@ class LLMEngine: ...@@ -112,7 +111,6 @@ class LLMEngine:
distributed_executor_backend = ( distributed_executor_backend = (
vllm_config.parallel_config.distributed_executor_backend) vllm_config.parallel_config.distributed_executor_backend)
if distributed_executor_backend == "ray": if distributed_executor_backend == "ray":
initialize_ray_cluster(vllm_config.parallel_config)
from vllm.v1.executor.ray_executor import RayExecutor from vllm.v1.executor.ray_executor import RayExecutor
executor_class = RayExecutor executor_class = RayExecutor
elif distributed_executor_backend == "mp": elif distributed_executor_backend == "mp":
......
...@@ -8,7 +8,8 @@ from vllm.config import VllmConfig ...@@ -8,7 +8,8 @@ from vllm.config import VllmConfig
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import get_distributed_init_method, get_ip, get_open_port from vllm.utils import get_distributed_init_method, get_ip, get_open_port
from vllm.v1.executor.abstract import Executor from vllm.v1.executor.abstract import Executor
from vllm.v1.executor.ray_utils import RayWorkerWrapper, ray from vllm.v1.executor.ray_utils import (RayWorkerWrapper,
initialize_ray_cluster, ray)
from vllm.v1.outputs import ModelRunnerOutput from vllm.v1.outputs import ModelRunnerOutput
if ray is not None: if ray is not None:
...@@ -33,7 +34,9 @@ class RayExecutor(Executor): ...@@ -33,7 +34,9 @@ class RayExecutor(Executor):
if ray_usage != "1": if ray_usage != "1":
os.environ["RAY_USAGE_STATS_ENABLED"] = "0" os.environ["RAY_USAGE_STATS_ENABLED"] = "0"
initialize_ray_cluster(self.parallel_config)
placement_group = self.parallel_config.placement_group placement_group = self.parallel_config.placement_group
# Create the parallel GPU workers. # Create the parallel GPU workers.
self._init_workers_ray(placement_group) self._init_workers_ray(placement_group)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment