Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e57ef99b
Unverified
Commit
e57ef99b
authored
Feb 04, 2026
by
Cyrus Leung
Committed by
GitHub
Feb 04, 2026
Browse files
[Model] Apply #32631 for recent models (#33785)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
f8516a1a
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
46 additions
and
50 deletions
+46
-50
vllm/model_executor/models/eagle2_5_vl.py
vllm/model_executor/models/eagle2_5_vl.py
+16
-17
vllm/model_executor/models/funaudiochat.py
vllm/model_executor/models/funaudiochat.py
+0
-3
vllm/model_executor/models/openpangu_vl.py
vllm/model_executor/models/openpangu_vl.py
+17
-16
vllm/model_executor/models/qwen3_asr.py
vllm/model_executor/models/qwen3_asr.py
+13
-14
No files found.
vllm/model_executor/models/eagle2_5_vl.py
View file @
e57ef99b
...
...
@@ -222,6 +222,7 @@ class Eagle2_5_VLForConditionalGeneration(
self
.
select_layer
=
getattr
(
config
,
"select_layer"
,
-
1
)
with
self
.
_mark_tower_model
(
vllm_config
,
"image"
):
# Vision encoder (SigLIP)
self
.
vision_model
=
self
.
_init_vision_model
(
config
,
...
...
@@ -229,6 +230,10 @@ class Eagle2_5_VLForConditionalGeneration(
prefix
=
maybe_prefix
(
prefix
,
"vision_model"
),
)
# MLP projection
self
.
mlp1
=
self
.
_init_mlp1
(
config
)
with
self
.
_mark_language_model
(
vllm_config
):
# Language model (Qwen2)
self
.
language_model
=
init_vllm_registered_model
(
vllm_config
=
vllm_config
,
...
...
@@ -236,9 +241,6 @@ class Eagle2_5_VLForConditionalGeneration(
prefix
=
maybe_prefix
(
prefix
,
"language_model"
),
)
# MLP projection
self
.
mlp1
=
self
.
_init_mlp1
(
config
)
self
.
img_context_token_id
=
None
self
.
make_empty_intermediate_tensors
=
(
...
...
@@ -399,9 +401,6 @@ class Eagle2_5_VLForConditionalGeneration(
]
return
image_embeds
.
split
(
image_feature_sizes
)
def
get_language_model
(
self
)
->
torch
.
nn
.
Module
:
return
self
.
language_model
def
embed_multimodal
(
self
,
**
kwargs
:
object
)
->
MultiModalEmbeddings
:
"""Embed multimodal inputs."""
image_input
=
self
.
_parse_and_validate_image_input
(
**
kwargs
)
...
...
vllm/model_executor/models/funaudiochat.py
View file @
e57ef99b
...
...
@@ -820,9 +820,6 @@ class FunAudioChatForConditionalGeneration(nn.Module, SupportsMultiModal, Suppor
self
.
language_model
.
make_empty_intermediate_tensors
)
def
get_language_model
(
self
)
->
torch
.
nn
.
Module
:
return
self
.
language_model
def
_get_continuous_audio_features
(
self
,
input_features
:
torch
.
Tensor
,
...
...
vllm/model_executor/models/openpangu_vl.py
View file @
e57ef99b
...
...
@@ -843,6 +843,8 @@ class OpenPanguVLForConditionalGeneration(
self
.
config
=
config
self
.
vllm_config
=
vllm_config
quant_config
=
vllm_config
.
quant_config
with
self
.
_mark_tower_model
(
vllm_config
,
{
"image"
,
"video"
}):
self
.
visual
=
OpenPanguVisionTransformer
(
vision_config
=
config
.
vision_config
,
out_hidden_size
=
config
.
vision_config
.
out_hidden_size
,
...
...
@@ -852,11 +854,13 @@ class OpenPanguVLForConditionalGeneration(
prefix
=
maybe_prefix
(
prefix
,
"visual"
),
)
with
self
.
_mark_language_model
(
vllm_config
):
self
.
language_model
=
init_vllm_registered_model
(
vllm_config
=
vllm_config
,
prefix
=
maybe_prefix
(
"openpangu"
,
"language_model"
),
architectures
=
[
"PanguEmbeddedForCausalLM"
],
)
self
.
make_empty_intermediate_tensors
=
(
self
.
language_model
.
make_empty_intermediate_tensors
)
...
...
@@ -1008,9 +1012,6 @@ class OpenPanguVLForConditionalGeneration(
)
return
mm_input_by_modality
def
get_language_model
(
self
)
->
torch
.
nn
.
Module
:
return
self
.
language_model
def
embed_multimodal
(
self
,
**
kwargs
:
object
)
->
MultiModalEmbeddings
|
None
:
mm_input_by_modality
=
self
.
_parse_and_validate_multimodal_inputs
(
**
kwargs
)
if
not
mm_input_by_modality
:
...
...
vllm/model_executor/models/qwen3_asr.py
View file @
e57ef99b
...
...
@@ -296,13 +296,15 @@ class Qwen3ASRForConditionalGeneration(
multimodal_config
=
vllm_config
.
model_config
.
multimodal_config
self
.
config
=
thinker_config
self
.
multimodal_config
=
multimodal_config
self
.
quant_config
=
quant_config
with
self
.
_mark_tower_model
(
vllm_config
,
"audio"
):
self
.
audio_tower
=
Qwen3OmniMoeAudioEncoder
(
thinker_config
.
audio_config
,
prefix
=
maybe_prefix
(
prefix
,
"audio_tower"
),
)
self
.
quant_config
=
quant_config
with
self
.
_mark_language_model
(
vllm_config
):
self
.
language_model
=
Qwen3ForCausalLM
(
vllm_config
=
vllm_config
.
with_hf_config
(
thinker_config
.
text_config
,
architectures
=
[
"Qwen3ForCausalLM"
]
...
...
@@ -363,9 +365,6 @@ class Qwen3ASRForConditionalGeneration(
)
return
audio_features
.
split
(
audio_output_lengths
.
tolist
())
def
get_language_model
(
self
)
->
torch
.
nn
.
Module
:
return
self
.
language_model
def
embed_multimodal
(
self
,
**
kwargs
:
object
)
->
MultiModalEmbeddings
|
None
:
mm_input_by_modality
=
self
.
_parse_and_validate_multimodal_inputs
(
**
kwargs
)
if
not
mm_input_by_modality
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment