"vscode:/vscode.git/clone" did not exist on "4eade17a6e1baf6bc5c71daac7fc3ac595c378a2"
Unverified Commit b706d898 authored by Cody Yu's avatar Cody Yu Committed by GitHub
Browse files

[Bugfix][V1][PP] Only warmup sampler at last PP rank (#14643)


Signed-off-by: default avatarCody Yu <hao.yu.cody@gmail.com>
parent 863d315c
......@@ -14,6 +14,7 @@ from vllm.device_allocator.cumem import CuMemAllocator
from vllm.distributed import (ensure_model_parallel_initialized,
init_distributed_environment,
set_custom_all_reduce)
from vllm.distributed.parallel_state import get_pp_group
from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.model_executor import set_random_seed
......@@ -219,8 +220,10 @@ class Worker(WorkerBase):
# fragmentation issue.
# NOTE: This is called after `capture_model` on purpose to prevent
# memory buffers from being cleared by `torch.cuda.empty_cache`.
if get_pp_group().is_last_rank:
try:
max_num_reqs = min(self.scheduler_config.max_num_seqs,
max_num_reqs = min(
self.scheduler_config.max_num_seqs,
self.scheduler_config.max_num_batched_tokens)
self.model_runner._dummy_sampler_run(
hidden_states=self.model_runner._dummy_run(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment