Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8b8a8afc
Unverified
Commit
8b8a8afc
authored
Sep 23, 2025
by
Wentao Ye
Committed by
GitHub
Sep 24, 2025
Browse files
[CI] Fix Pre-commit Issue (#25497)
Signed-off-by:
yewentao256
<
zhyanwentao@126.com
>
parent
8bdd8b5c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
2 deletions
+7
-2
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+7
-2
No files found.
vllm/v1/worker/gpu_model_runner.py
View file @
8b8a8afc
...
@@ -2367,7 +2367,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
...
@@ -2367,7 +2367,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
sampling_metadata
:
SamplingMetadata
,
sampling_metadata
:
SamplingMetadata
,
hidden_states
:
torch
.
Tensor
,
hidden_states
:
torch
.
Tensor
,
sample_hidden_states
:
torch
.
Tensor
,
sample_hidden_states
:
torch
.
Tensor
,
aux_hidden_states
:
Optional
[
torch
.
Tensor
],
aux_hidden_states
:
Optional
[
list
[
torch
.
Tensor
]
]
,
spec_decode_metadata
:
Optional
[
SpecDecodeMetadata
],
spec_decode_metadata
:
Optional
[
SpecDecodeMetadata
],
common_attn_metadata
:
CommonAttentionMetadata
,
common_attn_metadata
:
CommonAttentionMetadata
,
)
->
Union
[
list
[
list
[
int
]],
torch
.
Tensor
]:
)
->
Union
[
list
[
list
[
int
]],
torch
.
Tensor
]:
...
@@ -2387,6 +2387,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
...
@@ -2387,6 +2387,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
else
:
else
:
indices
=
[]
indices
=
[]
offset
=
0
offset
=
0
assert
spec_decode_metadata
is
not
None
for
num_draft
,
tokens
in
zip
(
for
num_draft
,
tokens
in
zip
(
spec_decode_metadata
.
num_draft_tokens
,
spec_decode_metadata
.
num_draft_tokens
,
sampled_token_ids
):
sampled_token_ids
):
...
@@ -2437,6 +2438,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
...
@@ -2437,6 +2438,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
# TODO(woosuk): Support M-RoPE.
# TODO(woosuk): Support M-RoPE.
target_positions
=
self
.
positions
.
gpu
[:
num_scheduled_tokens
]
target_positions
=
self
.
positions
.
gpu
[:
num_scheduled_tokens
]
if
self
.
use_aux_hidden_state_outputs
:
if
self
.
use_aux_hidden_state_outputs
:
assert
aux_hidden_states
is
not
None
target_hidden_states
=
torch
.
cat
(
target_hidden_states
=
torch
.
cat
(
[
h
[:
num_scheduled_tokens
]
for
h
in
aux_hidden_states
],
[
h
[:
num_scheduled_tokens
]
for
h
in
aux_hidden_states
],
dim
=-
1
)
dim
=-
1
)
...
@@ -2462,6 +2464,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
...
@@ -2462,6 +2464,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
# TODO(woosuk): Support M-RoPE.
# TODO(woosuk): Support M-RoPE.
target_positions
=
self
.
positions
.
gpu
[
token_indices
]
target_positions
=
self
.
positions
.
gpu
[
token_indices
]
if
self
.
use_aux_hidden_state_outputs
:
if
self
.
use_aux_hidden_state_outputs
:
assert
aux_hidden_states
is
not
None
target_hidden_states
=
torch
.
cat
(
target_hidden_states
=
torch
.
cat
(
[
h
[
token_indices
]
for
h
in
aux_hidden_states
],
dim
=-
1
)
[
h
[
token_indices
]
for
h
in
aux_hidden_states
],
dim
=-
1
)
else
:
else
:
...
@@ -2897,7 +2900,9 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
...
@@ -2897,7 +2900,9 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
assert
not
create_mixed_batch
assert
not
create_mixed_batch
num_reqs
=
cdiv
(
num_tokens
,
max_query_len
)
num_reqs
=
cdiv
(
num_tokens
,
max_query_len
)
assert
num_reqs
<=
max_num_reqs
,
\
assert
num_reqs
<=
max_num_reqs
,
\
"Do not capture num_reqs > max_num_reqs for uniform batch"
f
"Do not capture num_reqs
{
num_reqs
}
> max_num_reqs "
\
f
"
{
max_num_reqs
}
for uniform batch. Num tokens: "
\
f
"
{
num_tokens
}
, max_query_len:
{
max_query_len
}
"
num_scheduled_tokens_list
=
[
max_query_len
]
*
num_reqs
num_scheduled_tokens_list
=
[
max_query_len
]
*
num_reqs
if
num_tokens
%
max_query_len
!=
0
:
if
num_tokens
%
max_query_len
!=
0
:
num_scheduled_tokens_list
[
-
1
]
=
num_tokens
%
max_query_len
num_scheduled_tokens_list
[
-
1
]
=
num_tokens
%
max_query_len
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment