Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
004203e9
Unverified
Commit
004203e9
authored
Jul 31, 2025
by
Cyrus Leung
Committed by
GitHub
Jul 30, 2025
Browse files
[CI/Build] Fix registry tests (#21934)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
5c765aec
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
70 additions
and
19 deletions
+70
-19
tests/models/registry.py
tests/models/registry.py
+11
-5
vllm/model_executor/models/mpt.py
vllm/model_executor/models/mpt.py
+10
-10
vllm/model_executor/models/telechat2.py
vllm/model_executor/models/telechat2.py
+13
-2
vllm/transformers_utils/config.py
vllm/transformers_utils/config.py
+3
-2
vllm/transformers_utils/configs/__init__.py
vllm/transformers_utils/configs/__init__.py
+2
-0
vllm/transformers_utils/configs/nvlm_d.py
vllm/transformers_utils/configs/nvlm_d.py
+31
-0
No files found.
tests/models/registry.py
View file @
004203e9
...
@@ -170,8 +170,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -170,8 +170,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
min_transformers_version
=
"4.54"
),
min_transformers_version
=
"4.54"
),
"Ernie4_5_MoeForCausalLM"
:
_HfExamplesInfo
(
"baidu/ERNIE-4.5-21B-A3B-PT"
,
"Ernie4_5_MoeForCausalLM"
:
_HfExamplesInfo
(
"baidu/ERNIE-4.5-21B-A3B-PT"
,
min_transformers_version
=
"4.54"
),
min_transformers_version
=
"4.54"
),
"ExaoneForCausalLM"
:
_HfExamplesInfo
(
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
),
# noqa: E501
"ExaoneForCausalLM"
:
_HfExamplesInfo
(
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
,
"Exaone4ForCausalLM"
:
_HfExamplesInfo
(
"LGAI-EXAONE/EXAONE-4.0-32B"
),
# noqa: E501
trust_remote_code
=
True
),
"Exaone4ForCausalLM"
:
_HfExamplesInfo
(
"LGAI-EXAONE/EXAONE-4.0-32B"
,
min_transformers_version
=
"4.54"
),
"Fairseq2LlamaForCausalLM"
:
_HfExamplesInfo
(
"mgleize/fairseq2-dummy-Llama-3.2-1B"
),
# noqa: E501
"Fairseq2LlamaForCausalLM"
:
_HfExamplesInfo
(
"mgleize/fairseq2-dummy-Llama-3.2-1B"
),
# noqa: E501
"FalconForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/falcon-7b"
),
"FalconForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/falcon-7b"
),
"FalconH1ForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/Falcon-H1-0.5B-Base"
,
"FalconH1ForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/Falcon-H1-0.5B-Base"
,
...
@@ -199,8 +201,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -199,8 +201,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
trust_remote_code
=
True
),
trust_remote_code
=
True
),
"HunYuanMoEV1ForCausalLM"
:
_HfExamplesInfo
(
"tencent/Hunyuan-A13B-Instruct"
,
"HunYuanMoEV1ForCausalLM"
:
_HfExamplesInfo
(
"tencent/Hunyuan-A13B-Instruct"
,
trust_remote_code
=
True
),
trust_remote_code
=
True
),
# TODO: Remove is_available_online once their config.json is fixed
"HunYuanDenseV1ForCausalLM"
:
_HfExamplesInfo
(
"tencent/Hunyuan-7B-Instruct-0124"
,
"HunYuanDenseV1ForCausalLM"
:
_HfExamplesInfo
(
"tencent/Hunyuan-7B-Instruct-0124"
,
trust_remote_code
=
True
),
trust_remote_code
=
True
,
is_available_online
=
False
),
"HCXVisionForCausalLM"
:
_HfExamplesInfo
(
"HCXVisionForCausalLM"
:
_HfExamplesInfo
(
"naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
,
"naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
,
trust_remote_code
=
True
),
trust_remote_code
=
True
),
...
@@ -275,7 +279,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
...
@@ -275,7 +279,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"StableLMEpochForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-zephyr-3b"
),
# noqa: E501
"StableLMEpochForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-zephyr-3b"
),
# noqa: E501
"StableLmForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-3b-4e1t"
),
"StableLmForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-3b-4e1t"
),
"Starcoder2ForCausalLM"
:
_HfExamplesInfo
(
"bigcode/starcoder2-3b"
),
"Starcoder2ForCausalLM"
:
_HfExamplesInfo
(
"bigcode/starcoder2-3b"
),
"SolarForCausalLM"
:
_HfExamplesInfo
(
"upstage/solar-pro-preview-instruct"
),
"SolarForCausalLM"
:
_HfExamplesInfo
(
"upstage/solar-pro-preview-instruct"
,
trust_remote_code
=
True
),
"TeleChat2ForCausalLM"
:
_HfExamplesInfo
(
"Tele-AI/TeleChat2-3B"
,
"TeleChat2ForCausalLM"
:
_HfExamplesInfo
(
"Tele-AI/TeleChat2-3B"
,
trust_remote_code
=
True
),
trust_remote_code
=
True
),
"TeleFLMForCausalLM"
:
_HfExamplesInfo
(
"CofeAI/FLM-2-52B-Instruct-2407"
,
"TeleFLMForCausalLM"
:
_HfExamplesInfo
(
"CofeAI/FLM-2-52B-Instruct-2407"
,
...
@@ -449,7 +454,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
...
@@ -449,7 +454,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
max_model_len
=
4096
),
max_model_len
=
4096
),
"Qwen2_5OmniModel"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-3B"
),
"Qwen2_5OmniModel"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-3B"
),
"Qwen2_5OmniForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-7B-AWQ"
),
# noqa: E501
"Qwen2_5OmniForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-7B-AWQ"
),
# noqa: E501
"SkyworkR1VChatModel"
:
_HfExamplesInfo
(
"Skywork/Skywork-R1V-38B"
),
"SkyworkR1VChatModel"
:
_HfExamplesInfo
(
"Skywork/Skywork-R1V-38B"
,
trust_remote_code
=
True
),
"SmolVLMForConditionalGeneration"
:
_HfExamplesInfo
(
"HuggingFaceTB/SmolVLM2-2.2B-Instruct"
),
# noqa: E501
"SmolVLMForConditionalGeneration"
:
_HfExamplesInfo
(
"HuggingFaceTB/SmolVLM2-2.2B-Instruct"
),
# noqa: E501
"UltravoxModel"
:
_HfExamplesInfo
(
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
,
# noqa: E501
"UltravoxModel"
:
_HfExamplesInfo
(
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
,
# noqa: E501
trust_remote_code
=
True
),
trust_remote_code
=
True
),
...
...
vllm/model_executor/models/mpt.py
View file @
004203e9
...
@@ -8,7 +8,7 @@ from typing import Optional, Union
...
@@ -8,7 +8,7 @@ from typing import Optional, Union
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
transformers
import
Pretrained
Config
from
transformers
import
Mpt
Config
from
vllm.attention
import
Attention
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
...
@@ -50,7 +50,7 @@ class MPTAttention(nn.Module):
...
@@ -50,7 +50,7 @@ class MPTAttention(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Mpt
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
...
@@ -59,15 +59,15 @@ class MPTAttention(nn.Module):
...
@@ -59,15 +59,15 @@ class MPTAttention(nn.Module):
self
.
d_model
=
config
.
d_model
self
.
d_model
=
config
.
d_model
self
.
total_num_heads
=
config
.
n_heads
self
.
total_num_heads
=
config
.
n_heads
self
.
head_dim
=
self
.
d_model
//
self
.
total_num_heads
self
.
head_dim
=
self
.
d_model
//
self
.
total_num_heads
self
.
clip_qkv
=
config
.
attn_config
[
"
clip_qkv
"
]
self
.
clip_qkv
=
config
.
attn_config
.
clip_qkv
self
.
qk_ln
=
config
.
attn_config
[
"
qk_ln
"
]
self
.
qk_ln
=
config
.
attn_config
.
qk_ln
self
.
alibi_bias_max
=
config
.
attn_config
[
"
alibi_bias_max
"
]
self
.
alibi_bias_max
=
config
.
attn_config
.
alibi_bias_max
if
"kv_n_heads"
in
config
.
attn_config
:
if
"kv_n_heads"
in
config
.
attn_config
:
self
.
total_num_kv_heads
=
config
.
attn_config
[
'
kv_n_heads
'
]
self
.
total_num_kv_heads
=
config
.
attn_config
.
kv_n_heads
else
:
else
:
self
.
total_num_kv_heads
=
self
.
total_num_heads
self
.
total_num_kv_heads
=
self
.
total_num_heads
assert
not
config
.
attn_config
[
"
prefix_lm
"
]
assert
not
config
.
attn_config
.
prefix_lm
assert
config
.
attn_config
[
"
alibi
"
]
assert
config
.
attn_config
.
alibi
# pylint: disable=invalid-name
# pylint: disable=invalid-name
self
.
Wqkv
=
QKVParallelLinear
(
self
.
Wqkv
=
QKVParallelLinear
(
...
@@ -144,7 +144,7 @@ class MPTMLP(nn.Module):
...
@@ -144,7 +144,7 @@ class MPTMLP(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Mpt
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
):
):
super
().
__init__
()
super
().
__init__
()
...
@@ -176,7 +176,7 @@ class MPTBlock(nn.Module):
...
@@ -176,7 +176,7 @@ class MPTBlock(nn.Module):
def
__init__
(
def
__init__
(
self
,
self
,
config
:
Pretrained
Config
,
config
:
Mpt
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
prefix
:
str
=
""
,
...
...
vllm/model_executor/models/telechat2.py
View file @
004203e9
...
@@ -37,9 +37,20 @@ from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper,
...
@@ -37,9 +37,20 @@ from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper,
class
TeleChat2Model
(
LlamaModel
):
class
TeleChat2Model
(
LlamaModel
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
hf_config
=
vllm_config
.
model_config
.
hf_config
vllm_config
.
model_config
.
hf_config
.
attribute_map
=
{
"num_hidden_layers"
:
"n_layer"
,
"num_attention_heads"
:
"n_head"
,
"intermediate_size"
:
"ffn_hidden_size"
,
"rms_norm_eps"
:
"layer_norm_epsilon"
}
vllm_config
.
model_config
.
hf_config
.
hidden_act
=
"silu"
# 1. Initialize the LlamaModel with bias
# 1. Initialize the LlamaModel with bias
vllm_config
.
model_config
.
hf_config
.
bias
=
True
hf_config
.
bias
=
True
vllm_config
.
model_config
.
hf_config
.
mlp_bias
=
True
hf_config
.
mlp_bias
=
True
super
().
__init__
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
super
().
__init__
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
# 2. Remove the bias from the qkv_proj and gate_up_proj based on config
# 2. Remove the bias from the qkv_proj and gate_up_proj based on config
# Telechat2's gate_up_proj and qkv_proj don't have bias
# Telechat2's gate_up_proj and qkv_proj don't have bias
...
...
vllm/transformers_utils/config.py
View file @
004203e9
...
@@ -34,8 +34,8 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, DeepseekVLV2Config,
...
@@ -34,8 +34,8 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, DeepseekVLV2Config,
KimiVLConfig
,
MedusaConfig
,
KimiVLConfig
,
MedusaConfig
,
MllamaConfig
,
MLPSpeculatorConfig
,
MllamaConfig
,
MLPSpeculatorConfig
,
Nemotron_Nano_VL_Config
,
Nemotron_Nano_VL_Config
,
NemotronConfig
,
RW
Config
,
NemotronConfig
,
NVLM_D_
Config
,
UltravoxConfig
)
RWConfig
,
UltravoxConfig
)
# yapf: enable
# yapf: enable
from
vllm.transformers_utils.configs.mistral
import
adapt_config_dict
from
vllm.transformers_utils.configs.mistral
import
adapt_config_dict
from
vllm.transformers_utils.utils
import
check_gguf_file
from
vllm.transformers_utils.utils
import
check_gguf_file
...
@@ -81,6 +81,7 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
...
@@ -81,6 +81,7 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
"medusa"
:
MedusaConfig
,
"medusa"
:
MedusaConfig
,
"eagle"
:
EAGLEConfig
,
"eagle"
:
EAGLEConfig
,
"nemotron"
:
NemotronConfig
,
"nemotron"
:
NemotronConfig
,
"NVLM_D"
:
NVLM_D_Config
,
"ultravox"
:
UltravoxConfig
,
"ultravox"
:
UltravoxConfig
,
**
_CONFIG_REGISTRY_OVERRIDE_HF
**
_CONFIG_REGISTRY_OVERRIDE_HF
}
}
...
...
vllm/transformers_utils/configs/__init__.py
View file @
004203e9
...
@@ -23,6 +23,7 @@ from vllm.transformers_utils.configs.moonvit import MoonViTConfig
...
@@ -23,6 +23,7 @@ from vllm.transformers_utils.configs.moonvit import MoonViTConfig
from
vllm.transformers_utils.configs.nemotron
import
NemotronConfig
from
vllm.transformers_utils.configs.nemotron
import
NemotronConfig
from
vllm.transformers_utils.configs.nemotron_h
import
NemotronHConfig
from
vllm.transformers_utils.configs.nemotron_h
import
NemotronHConfig
from
vllm.transformers_utils.configs.nemotron_vl
import
Nemotron_Nano_VL_Config
from
vllm.transformers_utils.configs.nemotron_vl
import
Nemotron_Nano_VL_Config
from
vllm.transformers_utils.configs.nvlm_d
import
NVLM_D_Config
from
vllm.transformers_utils.configs.ultravox
import
UltravoxConfig
from
vllm.transformers_utils.configs.ultravox
import
UltravoxConfig
__all__
=
[
__all__
=
[
...
@@ -39,5 +40,6 @@ __all__ = [
...
@@ -39,5 +40,6 @@ __all__ = [
"NemotronConfig"
,
"NemotronConfig"
,
"NemotronHConfig"
,
"NemotronHConfig"
,
"Nemotron_Nano_VL_Config"
,
"Nemotron_Nano_VL_Config"
,
"NVLM_D_Config"
,
"UltravoxConfig"
,
"UltravoxConfig"
,
]
]
vllm/transformers_utils/configs/nvlm_d.py
0 → 100644
View file @
004203e9
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# Adapted from
# https://huggingface.co/nvidia/NVLM-D-72B/blob/main/configuration_nvlm_d.py
# --------------------------------------------------------
# NVLM-D
# Copyright (c) 2024 NVIDIA
# Licensed under Apache 2.0 License [see LICENSE for details]
# --------------------------------------------------------
from
transformers
import
Qwen2Config
from
transformers.configuration_utils
import
PretrainedConfig
class
NVLM_D_Config
(
PretrainedConfig
):
model_type
=
'NVLM_D'
is_composition
=
True
def
__init__
(
self
,
vision_config
=
None
,
llm_config
=
None
,
**
kwargs
):
super
().
__init__
(
**
kwargs
)
# Handle vision_config initialization
if
vision_config
is
None
:
vision_config
=
{}
# Handle llm_config initialization
if
llm_config
is
None
:
llm_config
=
{}
self
.
vision_config
=
PretrainedConfig
(
**
vision_config
)
self
.
text_config
=
Qwen2Config
(
**
llm_config
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment