Unverified Commit d2b2eed6 authored by Zhuohan Li's avatar Zhuohan Li Committed by GitHub
Browse files

[Fix] Fix a condition for ignored sequences (#867)

parent 4b6f069b
...@@ -64,6 +64,9 @@ class Scheduler: ...@@ -64,6 +64,9 @@ class Scheduler:
self.scheduler_config = scheduler_config self.scheduler_config = scheduler_config
self.cache_config = cache_config self.cache_config = cache_config
self.prompt_limit = min(self.scheduler_config.max_model_len,
self.scheduler_config.max_num_batched_tokens)
# Instantiate the scheduling policy. # Instantiate the scheduling policy.
self.policy = PolicyFactory.get_policy(policy_name="fcfs") self.policy = PolicyFactory.get_policy(policy_name="fcfs")
# Create the block space manager. # Create the block space manager.
...@@ -123,18 +126,15 @@ class Scheduler: ...@@ -123,18 +126,15 @@ class Scheduler:
seq_group = self.waiting[0] seq_group = self.waiting[0]
num_prompt_tokens = seq_group.get_seqs()[0].get_len() num_prompt_tokens = seq_group.get_seqs()[0].get_len()
prompt_limit = min( if num_prompt_tokens > self.prompt_limit:
self.scheduler_config.max_model_len,
self.scheduler_config.max_num_batched_tokens)
if num_prompt_tokens > prompt_limit:
logger.warning( logger.warning(
f"Input prompt ({num_prompt_tokens} tokens) is too long" f"Input prompt ({num_prompt_tokens} tokens) is too long"
f" and exceeds limit of {prompt_limit}") f" and exceeds limit of {self.prompt_limit}")
for seq in seq_group.get_seqs(): for seq in seq_group.get_seqs():
seq.status = SequenceStatus.FINISHED_IGNORED seq.status = SequenceStatus.FINISHED_IGNORED
ignored_seq_groups.append(seq_group) ignored_seq_groups.append(seq_group)
self.waiting.pop(0) self.waiting.pop(0)
break continue
# If the sequence group cannot be allocated, stop. # If the sequence group cannot be allocated, stop.
if not self.block_manager.can_allocate(seq_group): if not self.block_manager.can_allocate(seq_group):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment