Unverified Commit baa24025 authored by Cade Daniel's avatar Cade Daniel Committed by GitHub
Browse files

[Core] Fix edge case in chunked prefill + block manager v2 (#7380)

parent 999ef0b9
...@@ -261,11 +261,22 @@ def test_lookahead_greedy_equality_with_preemption(baseline_llm_generator, ...@@ -261,11 +261,22 @@ def test_lookahead_greedy_equality_with_preemption(baseline_llm_generator,
# skip cuda graph creation for fast test. # skip cuda graph creation for fast test.
"enforce_eager": True, "enforce_eager": True,
"enable_chunked_prefill": True, "enable_chunked_prefill": True,
"max_num_batched_tokens": 2,
"max_num_seqs": 2,
}, },
]) ])
@pytest.mark.parametrize("per_test_common_llm_kwargs", [{}]) @pytest.mark.parametrize("per_test_common_llm_kwargs",
[{
"block_size": 8,
"max_num_batched_tokens": 2,
"max_num_seqs": 2,
}, {
"block_size": 8,
"max_num_batched_tokens": 3,
"max_num_seqs": 2,
}, {
"block_size": 8,
"max_num_batched_tokens": 256,
"max_num_seqs": 10,
}])
@pytest.mark.parametrize("baseline_llm_kwargs", [ @pytest.mark.parametrize("baseline_llm_kwargs", [
{ {
"use_v2_block_manager": False, "use_v2_block_manager": False,
...@@ -294,6 +305,7 @@ def test_chunked_prefill_block_manager_v2(baseline_llm_generator, ...@@ -294,6 +305,7 @@ def test_chunked_prefill_block_manager_v2(baseline_llm_generator,
prompts = [ prompts = [
"Hello, my name is", "Hello, my name is",
"The president of the United States is", "The president of the United States is",
("1 + " * 50) + " 1 = ", # Longer prompt.
"The capital of France is", "The capital of France is",
"The future of AI is", "The future of AI is",
] ]
......
...@@ -356,7 +356,13 @@ class BlockTable: ...@@ -356,7 +356,13 @@ class BlockTable:
appended to blocks. The first such "token block" may have less token ids appended to blocks. The first such "token block" may have less token ids
than the block size, since the last allocated block may be partially than the block size, since the last allocated block may be partially
full. full.
If no token ids are provided, then no chunks are returned.
""" """
if not token_ids:
return []
first_chunk_size = self._block_size - (self._num_full_slots % first_chunk_size = self._block_size - (self._num_full_slots %
self._block_size) self._block_size)
token_blocks = [token_ids[:first_chunk_size]] token_blocks = [token_ids[:first_chunk_size]]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment