"vllm/vscode:/vscode.git/clone" did not exist on "385eeae9bc5a609026c6e177076fa99e36f41263"
Unverified Commit 90d6cf92 authored by Xingyu Liu's avatar Xingyu Liu Committed by GitHub
Browse files

[BugFix][MM]support VLLM_RANDOMIZE_DP_DUMMY_INPUTS (#30472)


Signed-off-by: default avatarXingyu Liu <charlotteliu12x@gmail.com>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
parent cf3eacfe
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import functools
import gc import gc
import itertools import itertools
import time import time
...@@ -3892,19 +3893,21 @@ class GPUModelRunner( ...@@ -3892,19 +3893,21 @@ class GPUModelRunner(
return {} return {}
@contextmanager @contextmanager
def maybe_randomize_inputs(self, input_ids: torch.Tensor): def maybe_randomize_inputs(
self, input_ids: torch.Tensor | None, inputs_embeds: torch.Tensor | None
):
""" """
Randomize input_ids if VLLM_RANDOMIZE_DP_DUMMY_INPUTS is set. Randomize input_ids if VLLM_RANDOMIZE_DP_DUMMY_INPUTS is set.
This is to help balance expert-selection This is to help balance expert-selection
- during profile_run - during profile_run
- during DP rank dummy run - during DP rank dummy run
""" """
dp_size = self.vllm_config.parallel_config.data_parallel_size dp_size = self.vllm_config.parallel_config.data_parallel_size
randomize_inputs = envs.VLLM_RANDOMIZE_DP_DUMMY_INPUTS and dp_size > 1 randomize_inputs = envs.VLLM_RANDOMIZE_DP_DUMMY_INPUTS and dp_size > 1
if not randomize_inputs: if not randomize_inputs:
yield yield
else: elif input_ids is not None:
import functools
@functools.cache @functools.cache
def rand_input_ids() -> torch.Tensor: def rand_input_ids() -> torch.Tensor:
...@@ -3912,13 +3915,27 @@ class GPUModelRunner( ...@@ -3912,13 +3915,27 @@ class GPUModelRunner(
self.input_ids.gpu, self.input_ids.gpu,
low=0, low=0,
high=self.model_config.get_vocab_size(), high=self.model_config.get_vocab_size(),
dtype=input_ids.dtype,
) )
logger.debug_once("Randomizing dummy data for DP Rank") logger.debug_once("Randomizing dummy input_ids for DP Rank")
input_ids.copy_(rand_input_ids()[: input_ids.size(0)], non_blocking=True) input_ids.copy_(rand_input_ids()[: input_ids.size(0)], non_blocking=True)
yield yield
input_ids.fill_(0) input_ids.fill_(0)
else:
@functools.cache
def rand_inputs_embeds() -> torch.Tensor:
return torch.randn_like(
self.inputs_embeds.gpu,
)
assert inputs_embeds is not None
logger.debug_once("Randomizing dummy inputs_embeds for DP Rank")
inputs_embeds.copy_(
rand_inputs_embeds()[: inputs_embeds.size(0)], non_blocking=True
)
yield
inputs_embeds.fill_(0)
def _get_mm_dummy_batch( def _get_mm_dummy_batch(
self, self,
...@@ -4167,7 +4184,7 @@ class GPUModelRunner( ...@@ -4167,7 +4184,7 @@ class GPUModelRunner(
num_tokens_across_dp[:] = num_tokens_padded num_tokens_across_dp[:] = num_tokens_padded
with ( with (
self.maybe_randomize_inputs(input_ids), self.maybe_randomize_inputs(input_ids, inputs_embeds),
set_forward_context( set_forward_context(
attn_metadata, attn_metadata,
self.vllm_config, self.vllm_config,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment