Better integration with Ray Serve (#1821)

Co-authored-by: FlorianJoncour <florian@zetta-sys.com>

Better integration with Ray Serve (#1821)
Co-authored-by: FlorianJoncour <florian@zetta-sys.com>
0229c386 · FlorianJoncour · GitHub · a7b3e330 · 0229c386 · 0229c386
Unverified Commit 0229c386 authored Nov 29, 2023 by FlorianJoncour Committed by GitHub Nov 29, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 5 deletions

vllm/engine/llm_engine.py vllm/engine/llm_engine.py +3 -3

vllm/engine/ray_utils.py vllm/engine/ray_utils.py +2 -2

No files found.
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -7,7 +7,7 @@ from vllm.config import (CacheConfig, ModelConfig, ParallelConfig,
                         SchedulerConfig)
 from vllm.core.scheduler import Scheduler, SchedulerOutputs
 from vllm.engine.arg_utils import EngineArgs
-from vllm.engine.ray_utils import RayWorker, initialize_cluster, ray
+from vllm.engine.ray_utils import RayWorkerVllm, initialize_cluster, ray
 from vllm.logger import init_logger
 from vllm.outputs import RequestOutput
 from vllm.sampling_params import SamplingParams
@@ -162,12 +162,12 @@ class LLMEngine:
                continue
            worker = ray.remote(
                num_cpus=0,
-                num_gpus=1,
+                num_gpus=self.cache_config.gpu_memory_utilization,
                scheduling_strategy=PlacementGroupSchedulingStrategy(
                    placement_group=placement_group,
                    placement_group_capture_child_tasks=True),
                **ray_remote_kwargs,
-            )(RayWorker).remote(self.model_config.trust_remote_code)
+            )(RayWorkerVllm).remote(self.model_config.trust_remote_code)
            self.workers.append(worker)
        # Initialize torch distributed process group for the workers.

--- a/vllm/engine/ray_utils.py
+++ b/vllm/engine/ray_utils.py
@@ -10,7 +10,7 @@ try:
    import ray
    from ray.air.util.torch_dist import TorchDistributedWorker
-    class RayWorker(TorchDistributedWorker):
+    class RayWorkerVllm(TorchDistributedWorker):
        """Ray wrapper for vllm.worker.Worker, allowing Worker to be
        lazliy initialized after Ray sets CUDA_VISIBLE_DEVICES."""
@@ -36,7 +36,7 @@ except ImportError as e:
                   "`pip install ray pandas pyarrow`.")
    ray = None
    TorchDistributedWorker = None
-    RayWorker = None
+    RayWorkerVllm = None
 if TYPE_CHECKING:
    from ray.util.placement_group import PlacementGroup