Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ad8d696a
Unverified
Commit
ad8d696a
authored
Apr 23, 2024
by
SangBin Cho
Committed by
GitHub
Apr 22, 2024
Browse files
[Core] Scheduler perf fix (#4270)
parent
3d925165
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
14 deletions
+11
-14
tests/core/test_scheduler.py
tests/core/test_scheduler.py
+9
-9
vllm/core/scheduler.py
vllm/core/scheduler.py
+2
-5
No files found.
tests/core/test_scheduler.py
View file @
ad8d696a
...
...
@@ -540,7 +540,7 @@ def test_decode_schedule_preempted():
curr_loras
=
None
for
i
in
range
(
3
):
_
,
seq_group
=
create_dummy_prompt
(
str
(
i
),
prompt_length
=
60
)
scheduler
.
_allocate_and_set_running
(
seq_group
,
60
)
scheduler
.
_allocate_and_set_running
(
seq_group
)
append_new_token_seq_group
(
60
,
seq_group
,
1
)
running
.
append
(
seq_group
)
scheduler
.
block_manager
.
can_append_slots
=
MagicMock
()
...
...
@@ -581,7 +581,7 @@ def test_decode_swap_beam_search():
budget
=
create_token_budget
()
for
i
in
range
(
3
):
_
,
seq_group
=
create_dummy_prompt
(
str
(
i
),
prompt_length
=
60
,
best_of
=
2
)
scheduler
.
_allocate_and_set_running
(
seq_group
,
60
)
scheduler
.
_allocate_and_set_running
(
seq_group
)
running
.
append
(
seq_group
)
append_new_token_seq_group
(
60
,
seq_group
,
1
)
budget
.
add_num_seqs
(
seq_group
.
request_id
,
...
...
@@ -629,7 +629,7 @@ def test_schedule_decode_blocks_to_copy_update():
running
=
deque
()
policy
=
PolicyFactory
.
get_policy
(
policy_name
=
"fcfs"
)
curr_loras
=
None
scheduler
.
_allocate_and_set_running
(
seq_group
,
60
)
scheduler
.
_allocate_and_set_running
(
seq_group
)
append_new_token_seq_group
(
60
,
seq_group
,
1
)
running
.
append
(
seq_group
)
...
...
@@ -659,7 +659,7 @@ def test_schedule_swapped_simple():
curr_loras
=
None
blocks_to_swap_out
=
{}
_
,
seq_group
=
create_dummy_prompt
(
"1"
,
prompt_length
=
60
,
best_of
=
2
)
scheduler
.
_allocate_and_set_running
(
seq_group
,
60
)
scheduler
.
_allocate_and_set_running
(
seq_group
)
append_new_token_seq_group
(
60
,
seq_group
,
1
)
scheduler
.
_swap_out
(
seq_group
,
blocks_to_swap_out
)
swapped
.
append
(
seq_group
)
...
...
@@ -687,7 +687,7 @@ def test_schedule_swapped_max_token_budget():
blocks_to_swap_out
=
{}
for
_
in
range
(
2
):
_
,
seq_group
=
create_dummy_prompt
(
"1"
,
prompt_length
=
60
,
best_of
=
2
)
scheduler
.
_allocate_and_set_running
(
seq_group
,
60
)
scheduler
.
_allocate_and_set_running
(
seq_group
)
append_new_token_seq_group
(
60
,
seq_group
,
1
)
scheduler
.
_swap_out
(
seq_group
,
blocks_to_swap_out
)
swapped
.
append
(
seq_group
)
...
...
@@ -721,7 +721,7 @@ def test_schedule_swapped_max_seqs():
blocks_to_swap_out
=
{}
for
i
in
range
(
4
):
_
,
seq_group
=
create_dummy_prompt
(
str
(
i
),
prompt_length
=
60
)
scheduler
.
_allocate_and_set_running
(
seq_group
,
60
)
scheduler
.
_allocate_and_set_running
(
seq_group
)
append_new_token_seq_group
(
60
,
seq_group
,
1
)
scheduler
.
_swap_out
(
seq_group
,
blocks_to_swap_out
)
swapped
.
append
(
seq_group
)
...
...
@@ -759,7 +759,7 @@ def test_schedule_swapped_max_loras():
lora_name
=
str
(
i
),
lora_int_id
=
i
+
1
,
lora_local_path
=
"abc"
))
scheduler
.
_allocate_and_set_running
(
seq_group
,
60
)
scheduler
.
_allocate_and_set_running
(
seq_group
)
append_new_token_seq_group
(
60
,
seq_group
,
1
)
scheduler
.
_swap_out
(
seq_group
,
blocks_to_swap_out
)
swapped
.
append
(
seq_group
)
...
...
@@ -783,7 +783,7 @@ def test_schedule_swapped_cannot_swap_in():
blocks_to_swap_out
=
{}
for
_
in
range
(
2
):
_
,
seq_group
=
create_dummy_prompt
(
"1"
,
prompt_length
=
60
,
best_of
=
2
)
scheduler
.
_allocate_and_set_running
(
seq_group
,
60
)
scheduler
.
_allocate_and_set_running
(
seq_group
)
append_new_token_seq_group
(
60
,
seq_group
,
1
)
scheduler
.
_swap_out
(
seq_group
,
blocks_to_swap_out
)
swapped
.
append
(
seq_group
)
...
...
@@ -808,7 +808,7 @@ def test_schedule_swapped_blocks_to_copy():
policy
=
PolicyFactory
.
get_policy
(
policy_name
=
"fcfs"
)
curr_loras
=
None
_
,
seq_group
=
create_dummy_prompt
(
"1"
,
prompt_length
=
60
,
best_of
=
2
)
scheduler
.
_allocate_and_set_running
(
seq_group
,
60
)
scheduler
.
_allocate_and_set_running
(
seq_group
)
append_new_token_seq_group
(
60
,
seq_group
,
1
)
blocks_to_swap_out
=
{}
scheduler
.
_swap_out
(
seq_group
,
blocks_to_swap_out
)
...
...
vllm/core/scheduler.py
View file @
ad8d696a
...
...
@@ -297,7 +297,6 @@ class Scheduler:
def
add_seq_group
(
self
,
seq_group
:
SequenceGroup
)
->
None
:
# Add sequence groups to the waiting queue.
logger
.
debug
(
f
"add_seq_group
{
seq_group
.
request_id
}
"
)
self
.
waiting
.
append
(
seq_group
)
def
abort_seq_group
(
self
,
request_id
:
Union
[
str
,
Iterable
[
str
]])
->
None
:
...
...
@@ -427,7 +426,6 @@ class Scheduler:
swapped_out
.
append
(
seq_group
)
break
else
:
logger
.
debug
(
f
"append slot for
{
seq_group
}
"
)
self
.
_append_slots
(
seq_group
,
blocks_to_copy
)
is_prefill
=
seq_group
.
is_prefill
()
if
is_prefill
:
...
...
@@ -659,7 +657,7 @@ class Scheduler:
if
curr_loras
is
not
None
and
lora_int_id
>
0
:
curr_loras
.
add
(
lora_int_id
)
waiting_queue
.
popleft
()
self
.
_allocate_and_set_running
(
seq_group
,
num_new_tokens
)
self
.
_allocate_and_set_running
(
seq_group
)
seq_groups
.
append
(
ScheduledSequenceGroup
(
seq_group
=
seq_group
,
token_chunk_size
=
num_new_tokens
))
...
...
@@ -952,8 +950,7 @@ class Scheduler:
self
.
running
=
deque
(
seq_group
for
seq_group
in
self
.
running
if
not
seq_group
.
is_finished
())
def
_allocate_and_set_running
(
self
,
seq_group
:
SequenceGroup
,
num_new_tokens
:
int
)
->
None
:
def
_allocate_and_set_running
(
self
,
seq_group
:
SequenceGroup
)
->
None
:
self
.
block_manager
.
allocate
(
seq_group
)
for
seq
in
seq_group
.
get_seqs
(
status
=
SequenceStatus
.
WAITING
):
seq
.
status
=
SequenceStatus
.
RUNNING
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment