Unverified Commit 8fae5ed7 authored by Woo-Yeon Lee's avatar Woo-Yeon Lee Committed by GitHub
Browse files

[Misc] Fix minor typo in scheduler (#8765)

parent 3368c3ab
...@@ -1554,14 +1554,14 @@ class Scheduler: ...@@ -1554,14 +1554,14 @@ class Scheduler:
# the number of new tokens that is dividable by the block size # the number of new tokens that is dividable by the block size
# to avoid partial block matching. # to avoid partial block matching.
block_size = self.cache_config.block_size block_size = self.cache_config.block_size
reminder = budget.token_budget % block_size remainder = budget.token_budget % block_size
if reminder != 0: if remainder != 0:
raise ValueError("When enabling chunked prefill and " raise ValueError("When enabling chunked prefill and "
"prefix caching, max_num_batched_tokens " "prefix caching, max_num_batched_tokens "
"(chunk size) must be dividable by " "(chunk size) must be dividable by "
"block size, but got chunk_size " "block size, but got chunk_size "
f"({budget.token_budget}) % block_size " f"({budget.token_budget}) % block_size "
f"({block_size}) = {reminder}") f"({block_size}) = {remainder}")
if remaining_token_budget < num_new_tokens: if remaining_token_budget < num_new_tokens:
num_new_tokens = (remaining_token_budget // num_new_tokens = (remaining_token_budget //
block_size) * block_size block_size) * block_size
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment