Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
067da2d1
Unverified
Commit
067da2d1
authored
Oct 07, 2025
by
Nick Hill
Committed by
GitHub
Oct 08, 2025
Browse files
[Core] Simplify setting new_token_ids in CachedRequestData (#26388)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
046118b9
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
1 addition
and
7 deletions
+1
-7
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/scheduler.py
+1
-7
No files found.
vllm/v1/core/sched/scheduler.py
View file @
067da2d1
...
@@ -712,7 +712,6 @@ class Scheduler(SchedulerInterface):
...
@@ -712,7 +712,6 @@ class Scheduler(SchedulerInterface):
num_computed_tokens
:
list
[
int
]
=
[]
num_computed_tokens
:
list
[
int
]
=
[]
num_output_tokens
:
list
[
int
]
=
[]
num_output_tokens
:
list
[
int
]
=
[]
use_connector
=
self
.
connector
is
not
None
for
req
in
itertools
.
chain
(
running_reqs
,
resumed_reqs
):
for
req
in
itertools
.
chain
(
running_reqs
,
resumed_reqs
):
req_id
=
req
.
request_id
req_id
=
req
.
request_id
req_ids
.
append
(
req_id
)
req_ids
.
append
(
req_id
)
...
@@ -729,16 +728,11 @@ class Scheduler(SchedulerInterface):
...
@@ -729,16 +728,11 @@ class Scheduler(SchedulerInterface):
req
.
num_computed_tokens
:
req
.
num_computed_tokens
+
num_tokens
req
.
num_computed_tokens
:
req
.
num_computed_tokens
+
num_tokens
]
]
new_token_ids
.
append
(
token_ids
)
new_token_ids
.
append
(
token_ids
)
elif
use_connector
:
# When using a KVConnector, we add a placeholder to avoid index
# out of bounds errors. TODO: Remove this once the KVConnector
# is updated to handle token IDs properly.
new_token_ids
.
append
([])
new_block_ids
.
append
(
new_block_ids
.
append
(
req_to_new_blocks
[
req_id
].
get_block_ids
(
allow_none
=
True
)
req_to_new_blocks
[
req_id
].
get_block_ids
(
allow_none
=
True
)
)
)
num_computed_tokens
.
append
(
req
.
num_computed_tokens
)
num_computed_tokens
.
append
(
req
.
num_computed_tokens
)
num_output_tokens
.
append
(
len
(
req
.
output_token
_id
s
)
)
num_output_tokens
.
append
(
req
.
num_
output_tokens
)
# Because resumed_reqs is usually empty, it is more efficient to do
# Because resumed_reqs is usually empty, it is more efficient to do
# in-place appending so that we don't need to allocate a new list.
# in-place appending so that we don't need to allocate a new list.
resumed_from_preemption
=
[
False
]
*
len
(
running_reqs
)
resumed_from_preemption
=
[
False
]
*
len
(
running_reqs
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment