Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
d2b2eed6
Unverified
Commit
d2b2eed6
authored
Aug 27, 2023
by
Zhuohan Li
Committed by
GitHub
Aug 27, 2023
Browse files
[Fix] Fix a condition for ignored sequences (#867)
parent
4b6f069b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
6 deletions
+6
-6
vllm/core/scheduler.py
vllm/core/scheduler.py
+6
-6
No files found.
vllm/core/scheduler.py
View file @
d2b2eed6
...
@@ -64,6 +64,9 @@ class Scheduler:
...
@@ -64,6 +64,9 @@ class Scheduler:
self
.
scheduler_config
=
scheduler_config
self
.
scheduler_config
=
scheduler_config
self
.
cache_config
=
cache_config
self
.
cache_config
=
cache_config
self
.
prompt_limit
=
min
(
self
.
scheduler_config
.
max_model_len
,
self
.
scheduler_config
.
max_num_batched_tokens
)
# Instantiate the scheduling policy.
# Instantiate the scheduling policy.
self
.
policy
=
PolicyFactory
.
get_policy
(
policy_name
=
"fcfs"
)
self
.
policy
=
PolicyFactory
.
get_policy
(
policy_name
=
"fcfs"
)
# Create the block space manager.
# Create the block space manager.
...
@@ -123,18 +126,15 @@ class Scheduler:
...
@@ -123,18 +126,15 @@ class Scheduler:
seq_group
=
self
.
waiting
[
0
]
seq_group
=
self
.
waiting
[
0
]
num_prompt_tokens
=
seq_group
.
get_seqs
()[
0
].
get_len
()
num_prompt_tokens
=
seq_group
.
get_seqs
()[
0
].
get_len
()
prompt_limit
=
min
(
if
num_prompt_tokens
>
self
.
prompt_limit
:
self
.
scheduler_config
.
max_model_len
,
self
.
scheduler_config
.
max_num_batched_tokens
)
if
num_prompt_tokens
>
prompt_limit
:
logger
.
warning
(
logger
.
warning
(
f
"Input prompt (
{
num_prompt_tokens
}
tokens) is too long"
f
"Input prompt (
{
num_prompt_tokens
}
tokens) is too long"
f
" and exceeds limit of
{
prompt_limit
}
"
)
f
" and exceeds limit of
{
self
.
prompt_limit
}
"
)
for
seq
in
seq_group
.
get_seqs
():
for
seq
in
seq_group
.
get_seqs
():
seq
.
status
=
SequenceStatus
.
FINISHED_IGNORED
seq
.
status
=
SequenceStatus
.
FINISHED_IGNORED
ignored_seq_groups
.
append
(
seq_group
)
ignored_seq_groups
.
append
(
seq_group
)
self
.
waiting
.
pop
(
0
)
self
.
waiting
.
pop
(
0
)
break
continue
# If the sequence group cannot be allocated, stop.
# If the sequence group cannot be allocated, stop.
if
not
self
.
block_manager
.
can_allocate
(
seq_group
):
if
not
self
.
block_manager
.
can_allocate
(
seq_group
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment