Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6ca8031e
Unverified
Commit
6ca8031e
authored
Jul 30, 2024
by
youkaichao
Committed by
GitHub
Jul 30, 2024
Browse files
[core][misc] improve free_finished_seq_groups (#6865)
Co-authored-by:
Woosuk Kwon
<
woosuk.kwon@berkeley.edu
>
parent
d7a299ed
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
7 deletions
+12
-7
vllm/core/scheduler.py
vllm/core/scheduler.py
+12
-7
No files found.
vllm/core/scheduler.py
View file @
6ca8031e
...
...
@@ -313,6 +313,7 @@ class Scheduler:
# Sequence groups finished requests ids since last step iteration.
# It lets the model know that any state associated with these requests
# can and must be released after the current step.
# This is used to evict the finished requests from the Mamba cache.
self
.
_finished_requests_ids
:
List
[
str
]
=
list
()
# Time at previous scheduling step
self
.
prev_time
=
0.0
...
...
@@ -374,6 +375,7 @@ class Scheduler:
for
aborted_group
in
aborted_groups
:
# Remove the sequence group from the state queue.
state_queue
.
remove
(
aborted_group
)
# Remove the aborted request from the Mamba cache.
self
.
_finished_requests_ids
.
append
(
aborted_group
.
request_id
)
for
seq
in
aborted_group
.
get_seqs
():
if
seq
.
is_finished
():
...
...
@@ -1057,13 +1059,16 @@ class Scheduler:
self
.
block_manager
.
free
(
seq
)
def
free_finished_seq_groups
(
self
)
->
None
:
for
queue
in
[
self
.
running
,
self
.
swapped
,
self
.
waiting
]:
self
.
_finished_requests_ids
+=
[
seq_group
.
request_id
for
seq_group
in
queue
if
seq_group
.
is_finished
()
]
self
.
running
=
deque
(
seq_group
for
seq_group
in
self
.
running
if
not
seq_group
.
is_finished
())
remaining
:
Deque
[
SequenceGroup
]
=
deque
()
for
seq_group
in
self
.
running
:
if
seq_group
.
is_finished
():
# Add the finished requests to the finished requests list.
# This list will be used to update the Mamba cache in the
# next step.
self
.
_finished_requests_ids
.
append
(
seq_group
.
request_id
)
else
:
remaining
.
append
(
seq_group
)
self
.
running
=
remaining
def
_allocate_and_set_running
(
self
,
seq_group
:
SequenceGroup
)
->
None
:
self
.
block_manager
.
allocate
(
seq_group
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment