Unverified Commit b4bb5f31 authored by maang-h's avatar maang-h Committed by GitHub
Browse files

[Core] Remove unused `num_tokens` parameter from `_init_model_kwargs` (#31517)


Signed-off-by: default avatarmaang <maang_h@163.com>
parent 70e1acef
...@@ -684,7 +684,7 @@ class GPUModelRunner( ...@@ -684,7 +684,7 @@ class GPUModelRunner(
with_numpy=numpy, with_numpy=numpy,
) )
def _init_model_kwargs(self, num_tokens: int): def _init_model_kwargs(self):
model_kwargs = dict[str, Any]() model_kwargs = dict[str, Any]()
if not self.is_pooling_model: if not self.is_pooling_model:
...@@ -2579,7 +2579,7 @@ class GPUModelRunner( ...@@ -2579,7 +2579,7 @@ class GPUModelRunner(
input_ids, inputs_embeds = self._prepare_mm_inputs(num_input_tokens) input_ids, inputs_embeds = self._prepare_mm_inputs(num_input_tokens)
model_kwargs = { model_kwargs = {
**self._init_model_kwargs(num_scheduled_tokens), **self._init_model_kwargs(),
**self._extract_mm_kwargs(scheduler_output), **self._extract_mm_kwargs(scheduler_output),
} }
elif self.enable_prompt_embeds and is_first_rank: elif self.enable_prompt_embeds and is_first_rank:
...@@ -2607,7 +2607,7 @@ class GPUModelRunner( ...@@ -2607,7 +2607,7 @@ class GPUModelRunner(
self.inputs_embeds.gpu[token_ids_idx] = tokens_to_embeds self.inputs_embeds.gpu[token_ids_idx] = tokens_to_embeds
inputs_embeds = self.inputs_embeds.gpu[:num_input_tokens] inputs_embeds = self.inputs_embeds.gpu[:num_input_tokens]
model_kwargs = self._init_model_kwargs(num_input_tokens) model_kwargs = self._init_model_kwargs()
input_ids = None input_ids = None
else: else:
# For text-only models, we use token ids as input. # For text-only models, we use token ids as input.
...@@ -2616,7 +2616,7 @@ class GPUModelRunner( ...@@ -2616,7 +2616,7 @@ class GPUModelRunner(
# then the embedding layer is not included in the CUDA graph. # then the embedding layer is not included in the CUDA graph.
input_ids = self.input_ids.gpu[:num_input_tokens] input_ids = self.input_ids.gpu[:num_input_tokens]
inputs_embeds = None inputs_embeds = None
model_kwargs = self._init_model_kwargs(num_input_tokens) model_kwargs = self._init_model_kwargs()
if self.uses_mrope: if self.uses_mrope:
positions = self.mrope_positions.gpu[:, :num_input_tokens] positions = self.mrope_positions.gpu[:, :num_input_tokens]
...@@ -4293,7 +4293,7 @@ class GPUModelRunner( ...@@ -4293,7 +4293,7 @@ class GPUModelRunner(
): ):
# Make sure padding doesn't exceed max_num_tokens # Make sure padding doesn't exceed max_num_tokens
assert num_tokens_padded <= self.max_num_tokens assert num_tokens_padded <= self.max_num_tokens
model_kwargs = self._init_model_kwargs(num_tokens_padded) model_kwargs = self._init_model_kwargs()
if self.supports_mm_inputs and not self.model_config.is_encoder_decoder: if self.supports_mm_inputs and not self.model_config.is_encoder_decoder:
input_ids, inputs_embeds = self._prepare_mm_inputs(num_tokens_padded) input_ids, inputs_embeds = self._prepare_mm_inputs(num_tokens_padded)
...@@ -4304,7 +4304,7 @@ class GPUModelRunner( ...@@ -4304,7 +4304,7 @@ class GPUModelRunner(
elif self.enable_prompt_embeds: elif self.enable_prompt_embeds:
input_ids = None input_ids = None
inputs_embeds = self.inputs_embeds.gpu[:num_tokens_padded] inputs_embeds = self.inputs_embeds.gpu[:num_tokens_padded]
model_kwargs = self._init_model_kwargs(num_tokens_padded) model_kwargs = self._init_model_kwargs()
else: else:
input_ids = self.input_ids.gpu[:num_tokens_padded] input_ids = self.input_ids.gpu[:num_tokens_padded]
inputs_embeds = None inputs_embeds = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment