Merge tag 'v0.19.1' into v0.19.0

fc67613a · zhuwenwen · 31aec25b · b1388b1f · fc67613a · fc67613a
Commit fc67613a authored Apr 18, 2026 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 3 deletions

vllm/v1/core/single_type_kv_cache_manager.py vllm/v1/core/single_type_kv_cache_manager.py +9 -3

vllm/v1/spec_decode/eagle.py vllm/v1/spec_decode/eagle.py +1 -0

No files found.
--- a/vllm/v1/core/single_type_kv_cache_manager.py
+++ b/vllm/v1/core/single_type_kv_cache_manager.py
@@ -565,11 +565,17 @@ class SlidingWindowManager(SingleTypeKVCacheManager):
                for computed in computed_blocks:
                    computed.pop()
        if use_eagle and computed_blocks[0]:
-            assert kv_cache_spec.block_size == alignment_tokens, (
-                "aligned_length is not compatible with eagle now"
-            )
            for computed in computed_blocks:
                computed.pop()
+            # Re-align after eagle pop: the pop may break the alignment
+            # when block_size != alignment_tokens (hybrid models with
+            # different page sizes, e.g. Gemma4).
+            while (
+                block_size != alignment_tokens
+                and len(computed_blocks[0]) * block_size % alignment_tokens != 0
+            ):
+                for computed in computed_blocks:
+                    computed.pop()
        return computed_blocks
    def get_num_skipped_tokens(self, num_computed_tokens: int) -> int:

--- a/vllm/v1/spec_decode/eagle.py
+++ b/vllm/v1/spec_decode/eagle.py
@@ -1282,6 +1282,7 @@ class SpecDecodeBaseProposer:
                "Qwen2_5_VLForConditionalGeneration",
                "Qwen3VLForConditionalGeneration",
                "Qwen3VLMoeForConditionalGeneration",
+                "Gemma4ForConditionalGeneration",
                "HunYuanVLForConditionalGeneration",
                "GlmOcrForConditionalGeneration",
                "Qwen3_5ForConditionalGeneration",