Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
da1e7311
Unverified
Commit
da1e7311
authored
Apr 23, 2026
by
Isotr0py
Committed by
GitHub
Apr 23, 2026
Browse files
[Misc] use model arch converter for bidi models identification (#40701)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
01cb41dc
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
15 deletions
+28
-15
vllm/config/model.py
vllm/config/model.py
+2
-15
vllm/config/model_arch.py
vllm/config/model_arch.py
+3
-0
vllm/transformers_utils/model_arch_config_convertor.py
vllm/transformers_utils/model_arch_config_convertor.py
+23
-0
No files found.
vllm/config/model.py
View file @
da1e7311
...
...
@@ -1197,22 +1197,9 @@ class ModelConfig:
def
is_deepseek_mla
(
self
)
->
bool
:
return
self
.
model_arch_config
.
is_deepseek_mla
@
cached_
property
@
property
def
is_mm_prefix_lm
(
self
)
->
bool
:
"""Whether to use bidirectional attention for mm positions."""
if
hasattr
(
self
.
hf_config
,
"is_mm_prefix_lm"
):
return
bool
(
self
.
hf_config
.
is_mm_prefix_lm
)
# fallback to list of known models
MM_PREFIX_LM_MODELS
=
(
"bagel"
,
"gemma3"
,
"molmo2"
,
"paligemma"
,
"umm"
,
)
if
not
hasattr
(
self
.
hf_config
,
"model_type"
):
return
False
return
self
.
hf_config
.
model_type
in
MM_PREFIX_LM_MODELS
return
self
.
model_arch_config
.
is_mm_prefix_lm
def
get_head_size
(
self
)
->
int
:
return
self
.
model_arch_config
.
head_size
...
...
vllm/config/model_arch.py
View file @
da1e7311
...
...
@@ -53,5 +53,8 @@ class ModelArchitectureConfig:
is_deepseek_mla
:
bool
"""Whether the model is a DeepSeek MLA model."""
is_mm_prefix_lm
:
bool
"""Whether the model uses image bidirectional attention."""
derived_max_model_len_and_key
:
tuple
[
float
,
str
|
None
]
"""Derived maximum model length and key from the hf config."""
vllm/transformers_utils/model_arch_config_convertor.py
View file @
da1e7311
...
...
@@ -250,6 +250,22 @@ class ModelArchConfigConvertorBase:
)
return
False
def
is_mm_prefix_lm
(
self
)
->
bool
:
"""Whether to use bidirectional attention for mm positions."""
if
hasattr
(
self
.
hf_config
,
"is_mm_prefix_lm"
):
return
bool
(
self
.
hf_config
.
is_mm_prefix_lm
)
# fallback to list of known models
MM_PREFIX_LM_MODELS
=
(
"bagel"
,
"gemma3"
,
"molmo2"
,
"paligemma"
,
"umm"
,
)
if
not
hasattr
(
self
.
hf_config
,
"model_type"
):
return
False
return
self
.
hf_config
.
model_type
in
MM_PREFIX_LM_MODELS
def
derive_max_model_len_and_key
(
self
)
->
tuple
[
float
,
str
|
None
]:
derived_max_model_len
=
float
(
"inf"
)
possible_keys
=
[
...
...
@@ -299,6 +315,7 @@ class ModelArchConfigConvertorBase:
num_experts
=
self
.
get_num_experts
(),
quantization_config
=
self
.
get_quantization_config
(),
is_deepseek_mla
=
self
.
is_deepseek_mla
(),
is_mm_prefix_lm
=
self
.
is_mm_prefix_lm
(),
derived_max_model_len_and_key
=
self
.
derive_max_model_len_and_key
(),
)
...
...
@@ -451,6 +468,12 @@ class LongCatFlashMTPModelArchConfigConvertor(ModelArchConfigConvertorBase):
class
Gemma4ModelArchConfigConvertor
(
ModelArchConfigConvertorBase
):
def
is_mm_prefix_lm
(
self
)
->
bool
:
return
(
getattr
(
self
.
hf_text_config
,
"use_bidirectional_attention"
,
None
)
==
"vision"
)
def
get_head_size
(
self
)
->
int
:
# Gemma4 uses dual head dimensions: head_dim (sliding attention)
# and global_head_dim (full attention). Return the largest so
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment