Unverified Commit c8f26bb6 authored by sroy745's avatar sroy745 Committed by GitHub
Browse files

[BugFix][Core] Fix BlockManagerV2 when Encoder Input is None (#9103)

parent 487678d0
...@@ -220,7 +220,6 @@ class BlockTable: ...@@ -220,7 +220,6 @@ class BlockTable:
occupied by each block. After freeing all the blocks, the `_blocks` list occupied by each block. After freeing all the blocks, the `_blocks` list
is set to `None`. is set to `None`.
""" """
assert self._is_allocated
for block in self.blocks: for block in self.blocks:
self._allocator.free(block) self._allocator.free(block)
self._blocks.reset() self._blocks.reset()
...@@ -239,7 +238,6 @@ class BlockTable: ...@@ -239,7 +238,6 @@ class BlockTable:
List[int]: A list of physical block indices for the blocks in the List[int]: A list of physical block indices for the blocks in the
BlockTable. BlockTable.
""" """
assert self._is_allocated
return self._blocks.ids() return self._blocks.ids()
def get_unseen_token_ids(self, sequence_token_ids: List[int]) -> List[int]: def get_unseen_token_ids(self, sequence_token_ids: List[int]) -> List[int]:
......
...@@ -151,6 +151,8 @@ class BlockSpaceManagerV2(BlockSpaceManager): ...@@ -151,6 +151,8 @@ class BlockSpaceManagerV2(BlockSpaceManager):
block_allocator=self.block_allocator, block_allocator=self.block_allocator,
max_block_sliding_window=self.max_block_sliding_window, max_block_sliding_window=self.max_block_sliding_window,
) )
if seq.get_token_ids():
# Add blocks to the block table only if the sequence is non empty.
block_table.allocate(seq.get_token_ids()) block_table.allocate(seq.get_token_ids())
return block_table return block_table
......
...@@ -903,11 +903,6 @@ class EngineArgs: ...@@ -903,11 +903,6 @@ class EngineArgs:
"--enable-prefix-caching is currently not " "--enable-prefix-caching is currently not "
"supported for multimodal models and has been disabled.") "supported for multimodal models and has been disabled.")
self.enable_prefix_caching = False self.enable_prefix_caching = False
if model_config.is_encoder_decoder_model:
logger.warning(
"Block Manager v2 does not support encoder-decoder models"
" currently. Using Block Manager v1 as fallback.")
self.use_v2_block_manager = False
cache_config = CacheConfig( cache_config = CacheConfig(
block_size=self.block_size if self.device != "neuron" else block_size=self.block_size if self.device != "neuron" else
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment