Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b4bb5f31
Unverified
Commit
b4bb5f31
authored
Dec 31, 2025
by
maang-h
Committed by
GitHub
Dec 30, 2025
Browse files
[Core] Remove unused `num_tokens` parameter from `_init_model_kwargs` (#31517)
Signed-off-by:
maang
<
maang_h@163.com
>
parent
70e1acef
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
6 deletions
+6
-6
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+6
-6
No files found.
vllm/v1/worker/gpu_model_runner.py
View file @
b4bb5f31
...
...
@@ -684,7 +684,7 @@ class GPUModelRunner(
with_numpy
=
numpy
,
)
def
_init_model_kwargs
(
self
,
num_tokens
:
int
):
def
_init_model_kwargs
(
self
):
model_kwargs
=
dict
[
str
,
Any
]()
if
not
self
.
is_pooling_model
:
...
...
@@ -2579,7 +2579,7 @@ class GPUModelRunner(
input_ids
,
inputs_embeds
=
self
.
_prepare_mm_inputs
(
num_input_tokens
)
model_kwargs
=
{
**
self
.
_init_model_kwargs
(
num_scheduled_tokens
),
**
self
.
_init_model_kwargs
(),
**
self
.
_extract_mm_kwargs
(
scheduler_output
),
}
elif
self
.
enable_prompt_embeds
and
is_first_rank
:
...
...
@@ -2607,7 +2607,7 @@ class GPUModelRunner(
self
.
inputs_embeds
.
gpu
[
token_ids_idx
]
=
tokens_to_embeds
inputs_embeds
=
self
.
inputs_embeds
.
gpu
[:
num_input_tokens
]
model_kwargs
=
self
.
_init_model_kwargs
(
num_input_tokens
)
model_kwargs
=
self
.
_init_model_kwargs
()
input_ids
=
None
else
:
# For text-only models, we use token ids as input.
...
...
@@ -2616,7 +2616,7 @@ class GPUModelRunner(
# then the embedding layer is not included in the CUDA graph.
input_ids
=
self
.
input_ids
.
gpu
[:
num_input_tokens
]
inputs_embeds
=
None
model_kwargs
=
self
.
_init_model_kwargs
(
num_input_tokens
)
model_kwargs
=
self
.
_init_model_kwargs
()
if
self
.
uses_mrope
:
positions
=
self
.
mrope_positions
.
gpu
[:,
:
num_input_tokens
]
...
...
@@ -4293,7 +4293,7 @@ class GPUModelRunner(
):
# Make sure padding doesn't exceed max_num_tokens
assert
num_tokens_padded
<=
self
.
max_num_tokens
model_kwargs
=
self
.
_init_model_kwargs
(
num_tokens_padded
)
model_kwargs
=
self
.
_init_model_kwargs
()
if
self
.
supports_mm_inputs
and
not
self
.
model_config
.
is_encoder_decoder
:
input_ids
,
inputs_embeds
=
self
.
_prepare_mm_inputs
(
num_tokens_padded
)
...
...
@@ -4304,7 +4304,7 @@ class GPUModelRunner(
elif
self
.
enable_prompt_embeds
:
input_ids
=
None
inputs_embeds
=
self
.
inputs_embeds
.
gpu
[:
num_tokens_padded
]
model_kwargs
=
self
.
_init_model_kwargs
(
num_tokens_padded
)
model_kwargs
=
self
.
_init_model_kwargs
()
else
:
input_ids
=
self
.
input_ids
.
gpu
[:
num_tokens_padded
]
inputs_embeds
=
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment