Unverified Commit b4bb5f31 authored by maang-h's avatar maang-h Committed by GitHub
Browse files

[Core] Remove unused `num_tokens` parameter from `_init_model_kwargs` (#31517)


Signed-off-by: default avatarmaang <maang_h@163.com>
parent 70e1acef
......@@ -684,7 +684,7 @@ class GPUModelRunner(
with_numpy=numpy,
)
def _init_model_kwargs(self, num_tokens: int):
def _init_model_kwargs(self):
model_kwargs = dict[str, Any]()
if not self.is_pooling_model:
......@@ -2579,7 +2579,7 @@ class GPUModelRunner(
input_ids, inputs_embeds = self._prepare_mm_inputs(num_input_tokens)
model_kwargs = {
**self._init_model_kwargs(num_scheduled_tokens),
**self._init_model_kwargs(),
**self._extract_mm_kwargs(scheduler_output),
}
elif self.enable_prompt_embeds and is_first_rank:
......@@ -2607,7 +2607,7 @@ class GPUModelRunner(
self.inputs_embeds.gpu[token_ids_idx] = tokens_to_embeds
inputs_embeds = self.inputs_embeds.gpu[:num_input_tokens]
model_kwargs = self._init_model_kwargs(num_input_tokens)
model_kwargs = self._init_model_kwargs()
input_ids = None
else:
# For text-only models, we use token ids as input.
......@@ -2616,7 +2616,7 @@ class GPUModelRunner(
# then the embedding layer is not included in the CUDA graph.
input_ids = self.input_ids.gpu[:num_input_tokens]
inputs_embeds = None
model_kwargs = self._init_model_kwargs(num_input_tokens)
model_kwargs = self._init_model_kwargs()
if self.uses_mrope:
positions = self.mrope_positions.gpu[:, :num_input_tokens]
......@@ -4293,7 +4293,7 @@ class GPUModelRunner(
):
# Make sure padding doesn't exceed max_num_tokens
assert num_tokens_padded <= self.max_num_tokens
model_kwargs = self._init_model_kwargs(num_tokens_padded)
model_kwargs = self._init_model_kwargs()
if self.supports_mm_inputs and not self.model_config.is_encoder_decoder:
input_ids, inputs_embeds = self._prepare_mm_inputs(num_tokens_padded)
......@@ -4304,7 +4304,7 @@ class GPUModelRunner(
elif self.enable_prompt_embeds:
input_ids = None
inputs_embeds = self.inputs_embeds.gpu[:num_tokens_padded]
model_kwargs = self._init_model_kwargs(num_tokens_padded)
model_kwargs = self._init_model_kwargs()
else:
input_ids = self.input_ids.gpu[:num_tokens_padded]
inputs_embeds = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment