Unverified Commit 28b3a1c7 authored by Tyler Michael Smith's avatar Tyler Michael Smith Committed by GitHub
Browse files

[V1] Multiprocessing Tensor Parallel Support for v1 (#9856)


Signed-off-by: default avatarTyler Michael Smith <tyler@neuralmagic.com>
parent bc192a2b
...@@ -15,6 +15,7 @@ from vllm.logger import init_logger ...@@ -15,6 +15,7 @@ from vllm.logger import init_logger
from vllm.model_executor import set_random_seed from vllm.model_executor import set_random_seed
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, get_dtype_size from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, get_dtype_size
from vllm.v1.core.scheduler import SchedulerOutput
from vllm.v1.outputs import ModelRunnerOutput from vllm.v1.outputs import ModelRunnerOutput
from vllm.v1.worker.gpu_model_runner import GPUModelRunner from vllm.v1.worker.gpu_model_runner import GPUModelRunner
...@@ -56,7 +57,6 @@ class Worker: ...@@ -56,7 +57,6 @@ class Worker:
from vllm.utils import init_cached_hf_modules from vllm.utils import init_cached_hf_modules
init_cached_hf_modules() init_cached_hf_modules()
self.model_runner = GPUModelRunner(vllm_config)
# Torch profiler. Enabled and configured through env vars: # Torch profiler. Enabled and configured through env vars:
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace # VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
if envs.VLLM_TORCH_PROFILER_DIR: if envs.VLLM_TORCH_PROFILER_DIR:
...@@ -103,6 +103,9 @@ class Worker: ...@@ -103,6 +103,9 @@ class Worker:
# Set random seed. # Set random seed.
set_random_seed(self.model_config.seed) set_random_seed(self.model_config.seed)
# Construct the model runner
self.model_runner = GPUModelRunner(self.vllm_config, self.device)
def load_model(self) -> None: def load_model(self) -> None:
self.model_runner.load_model() self.model_runner.load_model()
...@@ -198,7 +201,7 @@ class Worker: ...@@ -198,7 +201,7 @@ class Worker:
scheduler_output: "SchedulerOutput", scheduler_output: "SchedulerOutput",
) -> ModelRunnerOutput: ) -> ModelRunnerOutput:
output = self.model_runner.execute_model(scheduler_output) output = self.model_runner.execute_model(scheduler_output)
# TODO(woosuk): Send the output to the engine process. return output if self.rank == 0 else None
return output return output
def profile(self, is_start=True): def profile(self, is_start=True):
...@@ -209,6 +212,10 @@ class Worker: ...@@ -209,6 +212,10 @@ class Worker:
else: else:
self.profiler.stop() self.profiler.stop()
def check_health(self) -> None:
# worker will always be healthy as long as it's running.
return
def init_worker_distributed_environment( def init_worker_distributed_environment(
parallel_config: ParallelConfig, parallel_config: ParallelConfig,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment