Unverified Commit 20c81199 authored by Ke Bao's avatar Ke Bao Committed by GitHub
Browse files

Fix eagle hang issue for max_new_tokens=1 (#4185)

parent 70866b6f
...@@ -957,7 +957,11 @@ class Scheduler: ...@@ -957,7 +957,11 @@ class Scheduler:
self.req_to_token_pool.free(self.chunked_req.req_pool_idx) self.req_to_token_pool.free(self.chunked_req.req_pool_idx)
self.batch_is_full = False self.batch_is_full = False
last_bs = self.last_batch.batch_size()
self.last_batch.filter_batch() self.last_batch.filter_batch()
if self.last_batch.batch_size() < last_bs:
self.batch_is_full = False
if not self.last_batch.is_empty(): if not self.last_batch.is_empty():
if self.running_batch is None: if self.running_batch is None:
self.running_batch = self.last_batch self.running_batch = self.last_batch
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment