Commit fc67613a authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.19.1' into v0.19.0

parents 31aec25b b1388b1f
...@@ -565,11 +565,17 @@ class SlidingWindowManager(SingleTypeKVCacheManager): ...@@ -565,11 +565,17 @@ class SlidingWindowManager(SingleTypeKVCacheManager):
for computed in computed_blocks: for computed in computed_blocks:
computed.pop() computed.pop()
if use_eagle and computed_blocks[0]: if use_eagle and computed_blocks[0]:
assert kv_cache_spec.block_size == alignment_tokens, (
"aligned_length is not compatible with eagle now"
)
for computed in computed_blocks: for computed in computed_blocks:
computed.pop() computed.pop()
# Re-align after eagle pop: the pop may break the alignment
# when block_size != alignment_tokens (hybrid models with
# different page sizes, e.g. Gemma4).
while (
block_size != alignment_tokens
and len(computed_blocks[0]) * block_size % alignment_tokens != 0
):
for computed in computed_blocks:
computed.pop()
return computed_blocks return computed_blocks
def get_num_skipped_tokens(self, num_computed_tokens: int) -> int: def get_num_skipped_tokens(self, num_computed_tokens: int) -> int:
......
...@@ -1282,6 +1282,7 @@ class SpecDecodeBaseProposer: ...@@ -1282,6 +1282,7 @@ class SpecDecodeBaseProposer:
"Qwen2_5_VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration",
"Qwen3VLForConditionalGeneration", "Qwen3VLForConditionalGeneration",
"Qwen3VLMoeForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration",
"Gemma4ForConditionalGeneration",
"HunYuanVLForConditionalGeneration", "HunYuanVLForConditionalGeneration",
"GlmOcrForConditionalGeneration", "GlmOcrForConditionalGeneration",
"Qwen3_5ForConditionalGeneration", "Qwen3_5ForConditionalGeneration",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment