Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cb226321
Unverified
Commit
cb226321
authored
Mar 26, 2026
by
Chuan (Richard) Li
Committed by
GitHub
Mar 26, 2026
Browse files
[Bugfix][Minor] Fix potential NameError in mamba backend selector and misc typos (#35886)
Signed-off-by:
Li
<
chuali@amd.com
>
parent
e054f152
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
8 additions
and
5 deletions
+8
-5
vllm/model_executor/models/kimi_k25.py
vllm/model_executor/models/kimi_k25.py
+1
-1
vllm/v1/attention/backends/flex_attention.py
vllm/v1/attention/backends/flex_attention.py
+4
-1
vllm/v1/attention/backends/utils.py
vllm/v1/attention/backends/utils.py
+1
-1
vllm/v1/attention/selector.py
vllm/v1/attention/selector.py
+2
-2
No files found.
vllm/model_executor/models/kimi_k25.py
View file @
cb226321
...
@@ -369,7 +369,7 @@ class KimiK25ForConditionalGeneration(
...
@@ -369,7 +369,7 @@ class KimiK25ForConditionalGeneration(
target_dtype
=
next
(
self
.
vision_tower
.
parameters
()).
dtype
target_dtype
=
next
(
self
.
vision_tower
.
parameters
()).
dtype
pixel_values
=
pixel_values
.
to
(
target_dtype
)
pixel_values
=
pixel_values
.
to
(
target_dtype
)
assert
isinstance
(
grid_thws
,
torch
.
Tensor
),
(
assert
isinstance
(
grid_thws
,
torch
.
Tensor
),
(
f
"expect grid_thws to be a tensor, g
e
t
{
type
(
grid_thws
)
}
"
f
"expect grid_thws to be a tensor, g
o
t
{
type
(
grid_thws
)
}
"
)
)
# In some cases (e.g. with merger), grid_thws has an extra middle dimension
# In some cases (e.g. with merger), grid_thws has an extra middle dimension
grid_thws
=
grid_thws
.
reshape
(
-
1
,
grid_thws
.
shape
[
-
1
])
grid_thws
=
grid_thws
.
reshape
(
-
1
,
grid_thws
.
shape
[
-
1
])
...
...
vllm/v1/attention/backends/flex_attention.py
View file @
cb226321
...
@@ -749,7 +749,10 @@ class FlexAttentionMetadataBuilder(AttentionMetadataBuilder[FlexAttentionMetadat
...
@@ -749,7 +749,10 @@ class FlexAttentionMetadataBuilder(AttentionMetadataBuilder[FlexAttentionMetadat
prefix_kv_lens
=
None
prefix_kv_lens
=
None
suffix_kv_lens
=
None
suffix_kv_lens
=
None
if
use_cascade
:
if
use_cascade
:
raise
NotImplementedError
(
"Not yet my friend"
)
raise
NotImplementedError
(
"Cascade prefix attention is not yet implemented "
"for FlexAttention backend"
)
block_size
=
self
.
kv_cache_spec
.
block_size
block_size
=
self
.
kv_cache_spec
.
block_size
max_possible_seq_len
=
self
.
model_config
.
max_model_len
max_possible_seq_len
=
self
.
model_config
.
max_model_len
...
...
vllm/v1/attention/backends/utils.py
View file @
cb226321
...
@@ -253,7 +253,7 @@ def make_local_attention_virtual_batches(
...
@@ -253,7 +253,7 @@ def make_local_attention_virtual_batches(
# seqlens_q_local = [2, 2, 1, 4, 4, 1, 4, 1]
# seqlens_q_local = [2, 2, 1, 4, 4, 1, 4, 1]
#
#
# First Get batched arange. (E.g., [2, 4, 2] -> [0, 1, 0, 1, 2, 3, 0, 1])
# First Get batched arange. (E.g., [2, 4, 2] -> [0, 1, 0, 1, 2, 3, 0, 1])
# (TODO: ma
x
a utility to share this code with _prepare_inputs)
# (TODO: ma
ke
a utility to share this code with _prepare_inputs)
# arange step 1. [2, 4, 2] -> [2, 6, 8]
# arange step 1. [2, 4, 2] -> [2, 6, 8]
cu_num_blocks
=
np
.
cumsum
(
local_blocks
)
cu_num_blocks
=
np
.
cumsum
(
local_blocks
)
virtual_batches
=
cu_num_blocks
[
-
1
]
virtual_batches
=
cu_num_blocks
[
-
1
]
...
...
vllm/v1/attention/selector.py
View file @
cb226321
...
@@ -149,8 +149,8 @@ def _cached_get_mamba_attn_backend(
...
@@ -149,8 +149,8 @@ def _cached_get_mamba_attn_backend(
selected_backend
=
MambaAttentionBackendEnum
[
backend_name
]
selected_backend
=
MambaAttentionBackendEnum
[
backend_name
]
except
KeyError
as
e
:
except
KeyError
as
e
:
raise
ValueError
(
raise
ValueError
(
f
"Invalid mamba attention backend type: '
{
backend_nam
e
}
'. Valid "
f
"Invalid mamba attention backend type: '
{
mamba_typ
e
}
'. Valid "
f
"
backend
s are:
{
list
(
M
ambaAttentionBackendEnum
.
__members__
.
keys
())
}
"
f
"
type
s are:
{
list
(
M
AMBA_TYPE_TO_BACKEND_MAP
.
keys
())
}
"
)
from
e
)
from
e
mamba_attn_backend
=
selected_backend
.
get_class
()
mamba_attn_backend
=
selected_backend
.
get_class
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment