Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5885e330
Unverified
Commit
5885e330
authored
Feb 13, 2026
by
Roger Wang
Committed by
GitHub
Feb 13, 2026
Browse files
[Misc] Port Qwen3.5 Configs (#34512)
Signed-off-by:
Roger Wang
<
hey@rogerw.io
>
parent
071d863e
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
410 additions
and
12 deletions
+410
-12
vllm/model_executor/models/qwen3_5.py
vllm/model_executor/models/qwen3_5.py
+8
-8
vllm/model_executor/models/qwen3_5_mtp.py
vllm/model_executor/models/qwen3_5_mtp.py
+2
-4
vllm/transformers_utils/config.py
vllm/transformers_utils/config.py
+2
-0
vllm/transformers_utils/configs/__init__.py
vllm/transformers_utils/configs/__init__.py
+8
-0
vllm/transformers_utils/configs/qwen3_5.py
vllm/transformers_utils/configs/qwen3_5.py
+189
-0
vllm/transformers_utils/configs/qwen3_5_moe.py
vllm/transformers_utils/configs/qwen3_5_moe.py
+201
-0
No files found.
vllm/model_executor/models/qwen3_5.py
View file @
5885e330
...
...
@@ -31,14 +31,6 @@ import torch
from
einops
import
rearrange
from
torch
import
nn
from
transformers.activations
import
ACT2FN
from
transformers.models.qwen3_5.configuration_qwen3_5
import
(
Qwen3_5Config
,
Qwen3_5TextConfig
,
)
from
transformers.models.qwen3_5_moe.configuration_qwen3_5_moe
import
(
Qwen3_5MoeConfig
,
Qwen3_5MoeTextConfig
,
)
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
(
...
...
@@ -87,6 +79,14 @@ from vllm.model_executor.utils import set_weight_attrs
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.platforms
import
current_platform
from
vllm.sequence
import
IntermediateTensors
from
vllm.transformers_utils.configs.qwen3_5
import
(
Qwen3_5Config
,
Qwen3_5TextConfig
,
)
from
vllm.transformers_utils.configs.qwen3_5_moe
import
(
Qwen3_5MoeConfig
,
Qwen3_5MoeTextConfig
,
)
from
.interfaces
import
(
HasInnerState
,
...
...
vllm/model_executor/models/qwen3_5_mtp.py
View file @
5885e330
...
...
@@ -7,10 +7,6 @@ from collections.abc import Callable, Iterable
import
torch
from
torch
import
nn
from
transformers.models.qwen3_5.configuration_qwen3_5
import
Qwen3_5TextConfig
from
transformers.models.qwen3_5_moe.configuration_qwen3_5_moe
import
(
Qwen3_5MoeTextConfig
,
)
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
VllmConfig
...
...
@@ -27,6 +23,8 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from
vllm.model_executor.models.qwen3_5
import
Qwen3_5DecoderLayer
,
Qwen3_5RMSNorm
from
vllm.model_executor.models.qwen3_next
import
QwenNextMixtureOfExperts
from
vllm.sequence
import
IntermediateTensors
from
vllm.transformers_utils.configs.qwen3_5
import
Qwen3_5TextConfig
from
vllm.transformers_utils.configs.qwen3_5_moe
import
Qwen3_5MoeTextConfig
from
.interfaces
import
(
MultiModalEmbeddings
,
...
...
vllm/transformers_utils/config.py
View file @
5885e330
...
...
@@ -100,6 +100,8 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = LazyConfigDict(
step3p5
=
"Step3p5Config"
,
qwen3_asr
=
"Qwen3ASRConfig"
,
qwen3_next
=
"Qwen3NextConfig"
,
qwen3_5
=
"Qwen3_5Config"
,
qwen3_5_moe
=
"Qwen3_5MoeConfig"
,
lfm2_moe
=
"Lfm2MoeConfig"
,
tarsier2
=
"Tarsier2Config"
,
)
...
...
vllm/transformers_utils/configs/__init__.py
View file @
5885e330
...
...
@@ -55,6 +55,10 @@ _CLASS_TO_MODULE: dict[str, str] = {
"Step3p5Config"
:
"vllm.transformers_utils.configs.step3p5"
,
"Qwen3ASRConfig"
:
"vllm.transformers_utils.configs.qwen3_asr"
,
"Qwen3NextConfig"
:
"vllm.transformers_utils.configs.qwen3_next"
,
"Qwen3_5Config"
:
"vllm.transformers_utils.configs.qwen3_5"
,
"Qwen3_5TextConfig"
:
"vllm.transformers_utils.configs.qwen3_5"
,
"Qwen3_5MoeConfig"
:
"vllm.transformers_utils.configs.qwen3_5_moe"
,
"Qwen3_5MoeTextConfig"
:
"vllm.transformers_utils.configs.qwen3_5_moe"
,
"Tarsier2Config"
:
"vllm.transformers_utils.configs.tarsier2"
,
# Special case: DeepseekV3Config is from HuggingFace Transformers
"DeepseekV3Config"
:
"transformers"
,
...
...
@@ -99,6 +103,10 @@ __all__ = [
"Step3p5Config"
,
"Qwen3ASRConfig"
,
"Qwen3NextConfig"
,
"Qwen3_5Config"
,
"Qwen3_5TextConfig"
,
"Qwen3_5MoeConfig"
,
"Qwen3_5MoeTextConfig"
,
"Tarsier2Config"
,
]
...
...
vllm/transformers_utils/configs/qwen3_5.py
0 → 100644
View file @
5885e330
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# Copyright 2025 The Qwen Team and The HuggingFace Inc. team.
# All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Qwen3.5 model configuration"""
from
transformers.configuration_utils
import
PretrainedConfig
,
layer_type_validation
class
Qwen3_5TextConfig
(
PretrainedConfig
):
model_type
=
"qwen3_5_text"
keys_to_ignore_at_inference
=
[
"past_key_values"
]
base_model_tp_plan
=
{
"layers.*.self_attn.q_proj"
:
"colwise"
,
"layers.*.self_attn.k_proj"
:
"colwise"
,
"layers.*.self_attn.v_proj"
:
"colwise"
,
"layers.*.self_attn.o_proj"
:
"rowwise"
,
"layers.*.mlp.gate_proj"
:
"colwise"
,
"layers.*.mlp.up_proj"
:
"colwise"
,
"layers.*.mlp.down_proj"
:
"rowwise"
,
}
base_model_pp_plan
=
{
"embed_tokens"
:
([
"input_ids"
],
[
"inputs_embeds"
]),
"layers"
:
([
"hidden_states"
,
"attention_mask"
],
[
"hidden_states"
]),
"norm"
:
([
"hidden_states"
],
[
"hidden_states"
]),
}
base_config_key
=
"text_config"
def
__init__
(
self
,
vocab_size
=
248320
,
hidden_size
=
4096
,
intermediate_size
=
12288
,
num_hidden_layers
=
32
,
num_attention_heads
=
16
,
num_key_value_heads
=
4
,
hidden_act
=
"silu"
,
max_position_embeddings
=
32768
,
initializer_range
=
0.02
,
rms_norm_eps
=
1e-6
,
use_cache
=
True
,
tie_word_embeddings
=
False
,
rope_parameters
=
None
,
attention_bias
=
False
,
attention_dropout
=
0.0
,
head_dim
=
256
,
linear_conv_kernel_dim
=
4
,
linear_key_head_dim
=
128
,
linear_value_head_dim
=
128
,
linear_num_key_heads
=
16
,
linear_num_value_heads
=
32
,
layer_types
=
None
,
pad_token_id
=
None
,
bos_token_id
=
None
,
eos_token_id
=
None
,
**
kwargs
,
):
kwargs
[
"ignore_keys_at_rope_validation"
]
=
[
"mrope_section"
,
"mrope_interleaved"
,
]
self
.
pad_token_id
=
pad_token_id
self
.
bos_token_id
=
bos_token_id
self
.
eos_token_id
=
eos_token_id
self
.
tie_word_embeddings
=
tie_word_embeddings
self
.
vocab_size
=
vocab_size
self
.
max_position_embeddings
=
max_position_embeddings
self
.
hidden_size
=
hidden_size
self
.
intermediate_size
=
intermediate_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
num_key_value_heads
=
num_key_value_heads
self
.
hidden_act
=
hidden_act
self
.
initializer_range
=
initializer_range
self
.
rms_norm_eps
=
rms_norm_eps
self
.
use_cache
=
use_cache
self
.
attention_bias
=
attention_bias
self
.
attention_dropout
=
attention_dropout
self
.
head_dim
=
head_dim
self
.
rope_parameters
=
rope_parameters
kwargs
.
setdefault
(
"partial_rotary_factor"
,
0.25
)
self
.
layer_types
=
layer_types
if
self
.
layer_types
is
None
:
interval_pattern
=
kwargs
.
get
(
"full_attention_interval"
,
4
)
self
.
layer_types
=
[
"linear_attention"
if
bool
((
i
+
1
)
%
interval_pattern
)
else
"full_attention"
for
i
in
range
(
self
.
num_hidden_layers
)
]
layer_type_validation
(
self
.
layer_types
,
self
.
num_hidden_layers
)
# linear attention part
self
.
linear_conv_kernel_dim
=
linear_conv_kernel_dim
self
.
linear_key_head_dim
=
linear_key_head_dim
self
.
linear_value_head_dim
=
linear_value_head_dim
self
.
linear_num_key_heads
=
linear_num_key_heads
self
.
linear_num_value_heads
=
linear_num_value_heads
super
().
__init__
(
**
kwargs
)
class
Qwen3_5VisionConfig
(
PretrainedConfig
):
model_type
=
"qwen3_5"
base_config_key
=
"vision_config"
def
__init__
(
self
,
depth
=
27
,
hidden_size
=
1152
,
hidden_act
=
"gelu_pytorch_tanh"
,
intermediate_size
=
4304
,
num_heads
=
16
,
in_channels
=
3
,
patch_size
=
16
,
spatial_merge_size
=
2
,
temporal_patch_size
=
2
,
out_hidden_size
=
3584
,
num_position_embeddings
=
2304
,
initializer_range
=
0.02
,
**
kwargs
,
):
super
().
__init__
(
**
kwargs
)
self
.
depth
=
depth
self
.
hidden_size
=
hidden_size
self
.
hidden_act
=
hidden_act
self
.
intermediate_size
=
intermediate_size
self
.
num_heads
=
num_heads
self
.
in_channels
=
in_channels
self
.
patch_size
=
patch_size
self
.
spatial_merge_size
=
spatial_merge_size
self
.
temporal_patch_size
=
temporal_patch_size
self
.
out_hidden_size
=
out_hidden_size
self
.
num_position_embeddings
=
num_position_embeddings
self
.
initializer_range
=
initializer_range
class
Qwen3_5Config
(
PretrainedConfig
):
model_type
=
"qwen3_5"
sub_configs
=
{
"vision_config"
:
Qwen3_5VisionConfig
,
"text_config"
:
Qwen3_5TextConfig
,
}
keys_to_ignore_at_inference
=
[
"past_key_values"
]
def
__init__
(
self
,
text_config
=
None
,
vision_config
=
None
,
image_token_id
=
248056
,
video_token_id
=
248057
,
vision_start_token_id
=
248053
,
vision_end_token_id
=
248054
,
tie_word_embeddings
=
False
,
**
kwargs
,
):
if
isinstance
(
vision_config
,
dict
):
self
.
vision_config
=
self
.
sub_configs
[
"vision_config"
](
**
vision_config
)
elif
vision_config
is
None
:
self
.
vision_config
=
self
.
sub_configs
[
"vision_config"
]()
if
isinstance
(
text_config
,
dict
):
self
.
text_config
=
self
.
sub_configs
[
"text_config"
](
**
text_config
)
elif
text_config
is
None
:
self
.
text_config
=
self
.
sub_configs
[
"text_config"
]()
self
.
image_token_id
=
image_token_id
self
.
video_token_id
=
video_token_id
self
.
vision_start_token_id
=
vision_start_token_id
self
.
vision_end_token_id
=
vision_end_token_id
self
.
tie_word_embeddings
=
tie_word_embeddings
super
().
__init__
(
**
kwargs
)
__all__
=
[
"Qwen3_5Config"
,
"Qwen3_5TextConfig"
]
vllm/transformers_utils/configs/qwen3_5_moe.py
0 → 100644
View file @
5885e330
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# Copyright 2025 The Qwen Team and The HuggingFace Inc. team.
# All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Qwen3.5-MoE model configuration"""
from
transformers.configuration_utils
import
PretrainedConfig
,
layer_type_validation
class
Qwen3_5MoeTextConfig
(
PretrainedConfig
):
model_type
=
"qwen3_5_moe_text"
keys_to_ignore_at_inference
=
[
"past_key_values"
]
base_model_tp_plan
=
{
"layers.*.self_attn.q_proj"
:
"colwise"
,
"layers.*.self_attn.k_proj"
:
"colwise"
,
"layers.*.self_attn.v_proj"
:
"colwise"
,
"layers.*.self_attn.o_proj"
:
"rowwise"
,
"layers.*.mlp.experts.gate_up_proj"
:
"packed_colwise"
,
"layers.*.mlp.experts.down_proj"
:
"rowwise"
,
"layers.*.mlp.shared_expert.gate_proj"
:
"colwise"
,
"layers.*.mlp.shared_expert.up_proj"
:
"colwise"
,
"layers.*.mlp.shared_expert.down_proj"
:
"rowwise"
,
}
base_model_pp_plan
=
{
"embed_tokens"
:
([
"input_ids"
],
[
"inputs_embeds"
]),
"layers"
:
([
"hidden_states"
,
"attention_mask"
],
[
"hidden_states"
]),
"norm"
:
([
"hidden_states"
],
[
"hidden_states"
]),
}
base_config_key
=
"text_config"
def
__init__
(
self
,
vocab_size
=
248320
,
hidden_size
=
2048
,
num_hidden_layers
=
40
,
num_attention_heads
=
16
,
num_key_value_heads
=
2
,
hidden_act
=
"silu"
,
max_position_embeddings
=
32768
,
initializer_range
=
0.02
,
rms_norm_eps
=
1e-6
,
use_cache
=
True
,
tie_word_embeddings
=
False
,
rope_parameters
=
None
,
attention_bias
=
False
,
attention_dropout
=
0.0
,
head_dim
=
256
,
linear_conv_kernel_dim
=
4
,
linear_key_head_dim
=
128
,
linear_value_head_dim
=
128
,
linear_num_key_heads
=
16
,
linear_num_value_heads
=
32
,
moe_intermediate_size
=
512
,
shared_expert_intermediate_size
=
512
,
num_experts_per_tok
=
8
,
num_experts
=
256
,
output_router_logits
=
False
,
router_aux_loss_coef
=
0.001
,
layer_types
=
None
,
pad_token_id
=
None
,
bos_token_id
=
None
,
eos_token_id
=
None
,
**
kwargs
,
):
kwargs
[
"ignore_keys_at_rope_validation"
]
=
[
"mrope_section"
,
"mrope_interleaved"
,
]
self
.
pad_token_id
=
pad_token_id
self
.
bos_token_id
=
bos_token_id
self
.
eos_token_id
=
eos_token_id
self
.
tie_word_embeddings
=
tie_word_embeddings
self
.
vocab_size
=
vocab_size
self
.
max_position_embeddings
=
max_position_embeddings
self
.
hidden_size
=
hidden_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
self
.
num_key_value_heads
=
num_key_value_heads
self
.
hidden_act
=
hidden_act
self
.
initializer_range
=
initializer_range
self
.
rms_norm_eps
=
rms_norm_eps
self
.
use_cache
=
use_cache
self
.
attention_bias
=
attention_bias
self
.
attention_dropout
=
attention_dropout
self
.
head_dim
=
head_dim
self
.
rope_parameters
=
rope_parameters
kwargs
.
setdefault
(
"partial_rotary_factor"
,
0.25
)
self
.
layer_types
=
layer_types
if
self
.
layer_types
is
None
:
interval_pattern
=
kwargs
.
get
(
"full_attention_interval"
,
4
)
self
.
layer_types
=
[
"linear_attention"
if
bool
((
i
+
1
)
%
interval_pattern
)
else
"full_attention"
for
i
in
range
(
self
.
num_hidden_layers
)
]
layer_type_validation
(
self
.
layer_types
,
self
.
num_hidden_layers
)
# linear attention part
self
.
linear_conv_kernel_dim
=
linear_conv_kernel_dim
self
.
linear_key_head_dim
=
linear_key_head_dim
self
.
linear_value_head_dim
=
linear_value_head_dim
self
.
linear_num_key_heads
=
linear_num_key_heads
self
.
linear_num_value_heads
=
linear_num_value_heads
self
.
moe_intermediate_size
=
moe_intermediate_size
self
.
shared_expert_intermediate_size
=
shared_expert_intermediate_size
self
.
num_experts_per_tok
=
num_experts_per_tok
self
.
num_experts
=
num_experts
self
.
output_router_logits
=
output_router_logits
self
.
router_aux_loss_coef
=
router_aux_loss_coef
super
().
__init__
(
**
kwargs
)
class
Qwen3_5MoeVisionConfig
(
PretrainedConfig
):
model_type
=
"qwen3_5_moe"
base_config_key
=
"vision_config"
def
__init__
(
self
,
depth
=
27
,
hidden_size
=
1152
,
hidden_act
=
"gelu_pytorch_tanh"
,
intermediate_size
=
4304
,
num_heads
=
16
,
in_channels
=
3
,
patch_size
=
16
,
spatial_merge_size
=
2
,
temporal_patch_size
=
2
,
out_hidden_size
=
3584
,
num_position_embeddings
=
2304
,
initializer_range
=
0.02
,
**
kwargs
,
):
super
().
__init__
(
**
kwargs
)
self
.
depth
=
depth
self
.
hidden_size
=
hidden_size
self
.
hidden_act
=
hidden_act
self
.
intermediate_size
=
intermediate_size
self
.
num_heads
=
num_heads
self
.
in_channels
=
in_channels
self
.
patch_size
=
patch_size
self
.
spatial_merge_size
=
spatial_merge_size
self
.
temporal_patch_size
=
temporal_patch_size
self
.
out_hidden_size
=
out_hidden_size
self
.
num_position_embeddings
=
num_position_embeddings
self
.
initializer_range
=
initializer_range
class
Qwen3_5MoeConfig
(
PretrainedConfig
):
model_type
=
"qwen3_5_moe"
sub_configs
=
{
"vision_config"
:
Qwen3_5MoeVisionConfig
,
"text_config"
:
Qwen3_5MoeTextConfig
,
}
keys_to_ignore_at_inference
=
[
"past_key_values"
]
def
__init__
(
self
,
text_config
=
None
,
vision_config
=
None
,
image_token_id
=
248056
,
video_token_id
=
248057
,
vision_start_token_id
=
248053
,
vision_end_token_id
=
248054
,
tie_word_embeddings
=
False
,
**
kwargs
,
):
if
isinstance
(
vision_config
,
dict
):
self
.
vision_config
=
self
.
sub_configs
[
"vision_config"
](
**
vision_config
)
elif
vision_config
is
None
:
self
.
vision_config
=
self
.
sub_configs
[
"vision_config"
]()
if
isinstance
(
text_config
,
dict
):
self
.
text_config
=
self
.
sub_configs
[
"text_config"
](
**
text_config
)
elif
text_config
is
None
:
self
.
text_config
=
self
.
sub_configs
[
"text_config"
]()
self
.
image_token_id
=
image_token_id
self
.
video_token_id
=
video_token_id
self
.
vision_start_token_id
=
vision_start_token_id
self
.
vision_end_token_id
=
vision_end_token_id
self
.
tie_word_embeddings
=
tie_word_embeddings
super
().
__init__
(
**
kwargs
)
__all__
=
[
"Qwen3_5MoeConfig"
,
"Qwen3_5MoeTextConfig"
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment