Unverified Commit 19a9b169 authored by Xiong Wang's avatar Xiong Wang Committed by GitHub
Browse files

Add Qwen3-Omni moe thinker (#25550)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: default avatarRoger Wang <hey@rogerw.io>
Signed-off-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: default avatarXiong Wang <feizi.wx@alibaba-inc.com>
Co-authored-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: default avatarRoger Wang <hey@rogerw.io>
Co-authored-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
parent 96ad65b7
...@@ -714,6 +714,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen ...@@ -714,6 +714,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
| `Qwen2_5OmniThinkerForConditionalGeneration` | Qwen2.5-Omni | T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>+</sup> | `Qwen/Qwen2.5-Omni-3B`, `Qwen/Qwen2.5-Omni-7B` | ✅︎ | ✅︎ | ✅︎ | | `Qwen2_5OmniThinkerForConditionalGeneration` | Qwen2.5-Omni | T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>+</sup> | `Qwen/Qwen2.5-Omni-3B`, `Qwen/Qwen2.5-Omni-7B` | ✅︎ | ✅︎ | ✅︎ |
| `Qwen3VLForConditionalGeneration` | Qwen3-VL | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/Qwen3-VL-4B-Instruct`, etc. | ✅︎ | ✅︎ | ✅︎ | | `Qwen3VLForConditionalGeneration` | Qwen3-VL | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/Qwen3-VL-4B-Instruct`, etc. | ✅︎ | ✅︎ | ✅︎ |
| `Qwen3VLMoeForConditionalGeneration` | Qwen3-VL-MOE | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/Qwen3-VL-30B-A3B-Instruct`, etc. | ✅︎ | ✅︎ | ✅︎ | | `Qwen3VLMoeForConditionalGeneration` | Qwen3-VL-MOE | T + I<sup>E+</sup> + V<sup>E+</sup> | `Qwen/Qwen3-VL-30B-A3B-Instruct`, etc. | ✅︎ | ✅︎ | ✅︎ |
| `Qwen3OmniMoeThinkerForConditionalGeneration` | Qwen3-Omni | T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>+</sup> | `Qwen/Qwen3-Omni-30B-A3B-Instruct`, `Qwen/Qwen3-Omni-30B-A3B-Thinking` | ✅︎ | ✅︎ | ✅︎ |
| `RForConditionalGeneration` | R-VL-4B | T + I<sup>E+</sup> | `YannQi/R-4B` | | ✅︎ | ✅︎ | | `RForConditionalGeneration` | R-VL-4B | T + I<sup>E+</sup> | `YannQi/R-4B` | | ✅︎ | ✅︎ |
| `SkyworkR1VChatModel` | Skywork-R1V-38B | T + I | `Skywork/Skywork-R1V-38B` | | ✅︎ | ✅︎ | | `SkyworkR1VChatModel` | Skywork-R1V-38B | T + I | `Skywork/Skywork-R1V-38B` | | ✅︎ | ✅︎ |
| `SmolVLMForConditionalGeneration` | SmolVLM2 | T + I | `SmolVLM2-2.2B-Instruct` | ✅︎ | | ✅︎ | | `SmolVLMForConditionalGeneration` | SmolVLM2 | T + I | `SmolVLM2-2.2B-Instruct` | ✅︎ | | ✅︎ |
...@@ -804,8 +805,7 @@ Some models are supported only via the [Transformers backend](#transformers). Th ...@@ -804,8 +805,7 @@ Some models are supported only via the [Transformers backend](#transformers). Th
Our PaliGemma implementations have the same problem as Gemma 3 (see above) for both V0 and V1. Our PaliGemma implementations have the same problem as Gemma 3 (see above) for both V0 and V1.
!!! note !!! note
For Qwen2.5-Omni, reading audio from video pre-processing (`--mm-processor-kwargs '{"use_audio_in_video": true}'`) For Qwen2.5-Omni and Qwen3-Omni, reading audio from video pre-processing (`--mm-processor-kwargs '{"use_audio_in_video": true}'`) is currently work in progress and not yet supported.
is currently supported on V0 (but not V1), because overlapping modalities is not yet supported in V1.
#### Transcription #### Transcription
......
...@@ -384,6 +384,7 @@ def _test_processing_correctness_one( ...@@ -384,6 +384,7 @@ def _test_processing_correctness_one(
"Qwen/Qwen2.5-Omni-3B", "Qwen/Qwen2.5-Omni-3B",
"Qwen/Qwen3-VL-4B-Instruct", "Qwen/Qwen3-VL-4B-Instruct",
"Qwen/Qwen3-VL-30B-A3B-Instruct", "Qwen/Qwen3-VL-30B-A3B-Instruct",
"Qwen/Qwen3-Omni-30B-A3B-Instruct",
"YannQi/R-4B", "YannQi/R-4B",
"Skywork/Skywork-R1V-38B", "Skywork/Skywork-R1V-38B",
"HuggingFaceTB/SmolVLM2-2.2B-Instruct", "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
......
...@@ -773,6 +773,11 @@ _MULTIMODAL_EXAMPLE_MODELS = { ...@@ -773,6 +773,11 @@ _MULTIMODAL_EXAMPLE_MODELS = {
min_transformers_version="4.57", min_transformers_version="4.57",
is_available_online=False, is_available_online=False,
), ),
"Qwen3OmniMoeForConditionalGeneration": _HfExamplesInfo(
"Qwen/Qwen3-Omni-30B-A3B-Instruct",
max_model_len=4096,
min_transformers_version="4.57",
),
"RForConditionalGeneration": _HfExamplesInfo("YannQi/R-4B", trust_remote_code=True), "RForConditionalGeneration": _HfExamplesInfo("YannQi/R-4B", trust_remote_code=True),
"SkyworkR1VChatModel": _HfExamplesInfo( "SkyworkR1VChatModel": _HfExamplesInfo(
"Skywork/Skywork-R1V-38B", trust_remote_code=True "Skywork/Skywork-R1V-38B", trust_remote_code=True
......
This diff is collapsed.
...@@ -355,6 +355,10 @@ _MULTIMODAL_MODELS = { ...@@ -355,6 +355,10 @@ _MULTIMODAL_MODELS = {
"qwen2_5_omni_thinker", "qwen2_5_omni_thinker",
"Qwen2_5OmniThinkerForConditionalGeneration", "Qwen2_5OmniThinkerForConditionalGeneration",
), ),
"Qwen3OmniMoeForConditionalGeneration": (
"qwen3_omni_moe_thinker",
"Qwen3OmniMoeThinkerForConditionalGeneration",
),
"Qwen3VLForConditionalGeneration": ("qwen3_vl", "Qwen3VLForConditionalGeneration"), # noqa: E501 "Qwen3VLForConditionalGeneration": ("qwen3_vl", "Qwen3VLForConditionalGeneration"), # noqa: E501
"Qwen3VLMoeForConditionalGeneration": ( "Qwen3VLMoeForConditionalGeneration": (
"qwen3_vl_moe", "qwen3_vl_moe",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment