[BugFix][Core] Fix BlockManagerV2 when Encoder Input is None (#9103)

c8f26bb6 · sroy745 · GitHub · 487678d0 · c8f26bb6 · c8f26bb6
Unverified Commit c8f26bb6 authored Oct 06, 2024 by sroy745 Committed by GitHub Oct 07, 2024
Showing with 3 additions and 8 deletions

vllm/core/block/block_table.py vllm/core/block/block_table.py +0 -2

vllm/core/block_manager_v2.py vllm/core/block_manager_v2.py +3 -1

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +0 -5

No files found.
--- a/vllm/core/block/block_table.py
+++ b/vllm/core/block/block_table.py
@@ -220,7 +220,6 @@ class BlockTable:
        occupied by each block. After freeing all the blocks, the `_blocks` list
        is set to `None`.
        """
-        assert self._is_allocated
        for block in self.blocks:
            self._allocator.free(block)
        self._blocks.reset()
@@ -239,7 +238,6 @@ class BlockTable:
            List[int]: A list of physical block indices for the blocks in the
                BlockTable.
        """
-        assert self._is_allocated
        return self._blocks.ids()
    def get_unseen_token_ids(self, sequence_token_ids: List[int]) -> List[int]:

--- a/vllm/core/block_manager_v2.py
+++ b/vllm/core/block_manager_v2.py
@@ -151,6 +151,8 @@ class BlockSpaceManagerV2(BlockSpaceManager):
            block_allocator=self.block_allocator,
            max_block_sliding_window=self.max_block_sliding_window,
        )
+        if seq.get_token_ids():
+            # Add blocks to the block table only if the sequence is non empty.
            block_table.allocate(seq.get_token_ids())
        return block_table

--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -903,11 +903,6 @@ class EngineArgs:
                    "--enable-prefix-caching is currently not "
                    "supported for multimodal models and has been disabled.")
            self.enable_prefix_caching = False
-        if model_config.is_encoder_decoder_model:
-            logger.warning(
-                "Block Manager v2 does not support encoder-decoder models"
-                " currently. Using Block Manager v1 as fallback.")
-            self.use_v2_block_manager = False
        cache_config = CacheConfig(
            block_size=self.block_size if self.device != "neuron" else