Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c8f26bb6
Unverified
Commit
c8f26bb6
authored
Oct 06, 2024
by
sroy745
Committed by
GitHub
Oct 07, 2024
Browse files
[BugFix][Core] Fix BlockManagerV2 when Encoder Input is None (#9103)
parent
487678d0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
3 additions
and
8 deletions
+3
-8
vllm/core/block/block_table.py
vllm/core/block/block_table.py
+0
-2
vllm/core/block_manager_v2.py
vllm/core/block_manager_v2.py
+3
-1
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+0
-5
No files found.
vllm/core/block/block_table.py
View file @
c8f26bb6
...
...
@@ -220,7 +220,6 @@ class BlockTable:
occupied by each block. After freeing all the blocks, the `_blocks` list
is set to `None`.
"""
assert
self
.
_is_allocated
for
block
in
self
.
blocks
:
self
.
_allocator
.
free
(
block
)
self
.
_blocks
.
reset
()
...
...
@@ -239,7 +238,6 @@ class BlockTable:
List[int]: A list of physical block indices for the blocks in the
BlockTable.
"""
assert
self
.
_is_allocated
return
self
.
_blocks
.
ids
()
def
get_unseen_token_ids
(
self
,
sequence_token_ids
:
List
[
int
])
->
List
[
int
]:
...
...
vllm/core/block_manager_v2.py
View file @
c8f26bb6
...
...
@@ -151,7 +151,9 @@ class BlockSpaceManagerV2(BlockSpaceManager):
block_allocator
=
self
.
block_allocator
,
max_block_sliding_window
=
self
.
max_block_sliding_window
,
)
block_table
.
allocate
(
seq
.
get_token_ids
())
if
seq
.
get_token_ids
():
# Add blocks to the block table only if the sequence is non empty.
block_table
.
allocate
(
seq
.
get_token_ids
())
return
block_table
...
...
vllm/engine/arg_utils.py
View file @
c8f26bb6
...
...
@@ -903,11 +903,6 @@ class EngineArgs:
"--enable-prefix-caching is currently not "
"supported for multimodal models and has been disabled."
)
self
.
enable_prefix_caching
=
False
if
model_config
.
is_encoder_decoder_model
:
logger
.
warning
(
"Block Manager v2 does not support encoder-decoder models"
" currently. Using Block Manager v1 as fallback."
)
self
.
use_v2_block_manager
=
False
cache_config
=
CacheConfig
(
block_size
=
self
.
block_size
if
self
.
device
!=
"neuron"
else
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment