Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
004203e9
Unverified
Commit
004203e9
authored
Jul 31, 2025
by
Cyrus Leung
Committed by
GitHub
Jul 30, 2025
Browse files
[CI/Build] Fix registry tests (#21934)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
5c765aec
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
70 additions
and
19 deletions
+70
-19
tests/models/registry.py
tests/models/registry.py
+11
-5
vllm/model_executor/models/mpt.py
vllm/model_executor/models/mpt.py
+10
-10
vllm/model_executor/models/telechat2.py
vllm/model_executor/models/telechat2.py
+13
-2
vllm/transformers_utils/config.py
vllm/transformers_utils/config.py
+3
-2
vllm/transformers_utils/configs/__init__.py
vllm/transformers_utils/configs/__init__.py
+2
-0
vllm/transformers_utils/configs/nvlm_d.py
vllm/transformers_utils/configs/nvlm_d.py
+31
-0
No files found.
tests/models/registry.py
View file @
004203e9
...
...
@@ -170,8 +170,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
min_transformers_version
=
"4.54"
),
"Ernie4_5_MoeForCausalLM"
:
_HfExamplesInfo
(
"baidu/ERNIE-4.5-21B-A3B-PT"
,
min_transformers_version
=
"4.54"
),
"ExaoneForCausalLM"
:
_HfExamplesInfo
(
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
),
# noqa: E501
"Exaone4ForCausalLM"
:
_HfExamplesInfo
(
"LGAI-EXAONE/EXAONE-4.0-32B"
),
# noqa: E501
"ExaoneForCausalLM"
:
_HfExamplesInfo
(
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
,
trust_remote_code
=
True
),
"Exaone4ForCausalLM"
:
_HfExamplesInfo
(
"LGAI-EXAONE/EXAONE-4.0-32B"
,
min_transformers_version
=
"4.54"
),
"Fairseq2LlamaForCausalLM"
:
_HfExamplesInfo
(
"mgleize/fairseq2-dummy-Llama-3.2-1B"
),
# noqa: E501
"FalconForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/falcon-7b"
),
"FalconH1ForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/Falcon-H1-0.5B-Base"
,
...
...
@@ -199,8 +201,10 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
trust_remote_code
=
True
),
"HunYuanMoEV1ForCausalLM"
:
_HfExamplesInfo
(
"tencent/Hunyuan-A13B-Instruct"
,
trust_remote_code
=
True
),
# TODO: Remove is_available_online once their config.json is fixed
"HunYuanDenseV1ForCausalLM"
:
_HfExamplesInfo
(
"tencent/Hunyuan-7B-Instruct-0124"
,
trust_remote_code
=
True
),
trust_remote_code
=
True
,
is_available_online
=
False
),
"HCXVisionForCausalLM"
:
_HfExamplesInfo
(
"naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
,
trust_remote_code
=
True
),
...
...
@@ -275,7 +279,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
"StableLMEpochForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-zephyr-3b"
),
# noqa: E501
"StableLmForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-3b-4e1t"
),
"Starcoder2ForCausalLM"
:
_HfExamplesInfo
(
"bigcode/starcoder2-3b"
),
"SolarForCausalLM"
:
_HfExamplesInfo
(
"upstage/solar-pro-preview-instruct"
),
"SolarForCausalLM"
:
_HfExamplesInfo
(
"upstage/solar-pro-preview-instruct"
,
trust_remote_code
=
True
),
"TeleChat2ForCausalLM"
:
_HfExamplesInfo
(
"Tele-AI/TeleChat2-3B"
,
trust_remote_code
=
True
),
"TeleFLMForCausalLM"
:
_HfExamplesInfo
(
"CofeAI/FLM-2-52B-Instruct-2407"
,
...
...
@@ -449,7 +454,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
max_model_len
=
4096
),
"Qwen2_5OmniModel"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-3B"
),
"Qwen2_5OmniForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-7B-AWQ"
),
# noqa: E501
"SkyworkR1VChatModel"
:
_HfExamplesInfo
(
"Skywork/Skywork-R1V-38B"
),
"SkyworkR1VChatModel"
:
_HfExamplesInfo
(
"Skywork/Skywork-R1V-38B"
,
trust_remote_code
=
True
),
"SmolVLMForConditionalGeneration"
:
_HfExamplesInfo
(
"HuggingFaceTB/SmolVLM2-2.2B-Instruct"
),
# noqa: E501
"UltravoxModel"
:
_HfExamplesInfo
(
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
,
# noqa: E501
trust_remote_code
=
True
),
...
...
vllm/model_executor/models/mpt.py
View file @
004203e9
...
...
@@ -8,7 +8,7 @@ from typing import Optional, Union
import
torch
import
torch.nn
as
nn
from
transformers
import
Pretrained
Config
from
transformers
import
Mpt
Config
from
vllm.attention
import
Attention
from
vllm.compilation.decorators
import
support_torch_compile
...
...
@@ -50,7 +50,7 @@ class MPTAttention(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Mpt
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
...
...
@@ -59,15 +59,15 @@ class MPTAttention(nn.Module):
self
.
d_model
=
config
.
d_model
self
.
total_num_heads
=
config
.
n_heads
self
.
head_dim
=
self
.
d_model
//
self
.
total_num_heads
self
.
clip_qkv
=
config
.
attn_config
[
"
clip_qkv
"
]
self
.
qk_ln
=
config
.
attn_config
[
"
qk_ln
"
]
self
.
alibi_bias_max
=
config
.
attn_config
[
"
alibi_bias_max
"
]
self
.
clip_qkv
=
config
.
attn_config
.
clip_qkv
self
.
qk_ln
=
config
.
attn_config
.
qk_ln
self
.
alibi_bias_max
=
config
.
attn_config
.
alibi_bias_max
if
"kv_n_heads"
in
config
.
attn_config
:
self
.
total_num_kv_heads
=
config
.
attn_config
[
'
kv_n_heads
'
]
self
.
total_num_kv_heads
=
config
.
attn_config
.
kv_n_heads
else
:
self
.
total_num_kv_heads
=
self
.
total_num_heads
assert
not
config
.
attn_config
[
"
prefix_lm
"
]
assert
config
.
attn_config
[
"
alibi
"
]
assert
not
config
.
attn_config
.
prefix_lm
assert
config
.
attn_config
.
alibi
# pylint: disable=invalid-name
self
.
Wqkv
=
QKVParallelLinear
(
...
...
@@ -144,7 +144,7 @@ class MPTMLP(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Mpt
Config
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
):
super
().
__init__
()
...
...
@@ -176,7 +176,7 @@ class MPTBlock(nn.Module):
def
__init__
(
self
,
config
:
Pretrained
Config
,
config
:
Mpt
Config
,
cache_config
:
Optional
[
CacheConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
prefix
:
str
=
""
,
...
...
vllm/model_executor/models/telechat2.py
View file @
004203e9
...
...
@@ -37,9 +37,20 @@ from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper,
class
TeleChat2Model
(
LlamaModel
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
hf_config
=
vllm_config
.
model_config
.
hf_config
vllm_config
.
model_config
.
hf_config
.
attribute_map
=
{
"num_hidden_layers"
:
"n_layer"
,
"num_attention_heads"
:
"n_head"
,
"intermediate_size"
:
"ffn_hidden_size"
,
"rms_norm_eps"
:
"layer_norm_epsilon"
}
vllm_config
.
model_config
.
hf_config
.
hidden_act
=
"silu"
# 1. Initialize the LlamaModel with bias
vllm_config
.
model_config
.
hf_config
.
bias
=
True
vllm_config
.
model_config
.
hf_config
.
mlp_bias
=
True
hf_config
.
bias
=
True
hf_config
.
mlp_bias
=
True
super
().
__init__
(
vllm_config
=
vllm_config
,
prefix
=
prefix
)
# 2. Remove the bias from the qkv_proj and gate_up_proj based on config
# Telechat2's gate_up_proj and qkv_proj don't have bias
...
...
vllm/transformers_utils/config.py
View file @
004203e9
...
...
@@ -34,8 +34,8 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, DeepseekVLV2Config,
KimiVLConfig
,
MedusaConfig
,
MllamaConfig
,
MLPSpeculatorConfig
,
Nemotron_Nano_VL_Config
,
NemotronConfig
,
RW
Config
,
UltravoxConfig
)
NemotronConfig
,
NVLM_D_
Config
,
RWConfig
,
UltravoxConfig
)
# yapf: enable
from
vllm.transformers_utils.configs.mistral
import
adapt_config_dict
from
vllm.transformers_utils.utils
import
check_gguf_file
...
...
@@ -81,6 +81,7 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
"medusa"
:
MedusaConfig
,
"eagle"
:
EAGLEConfig
,
"nemotron"
:
NemotronConfig
,
"NVLM_D"
:
NVLM_D_Config
,
"ultravox"
:
UltravoxConfig
,
**
_CONFIG_REGISTRY_OVERRIDE_HF
}
...
...
vllm/transformers_utils/configs/__init__.py
View file @
004203e9
...
...
@@ -23,6 +23,7 @@ from vllm.transformers_utils.configs.moonvit import MoonViTConfig
from
vllm.transformers_utils.configs.nemotron
import
NemotronConfig
from
vllm.transformers_utils.configs.nemotron_h
import
NemotronHConfig
from
vllm.transformers_utils.configs.nemotron_vl
import
Nemotron_Nano_VL_Config
from
vllm.transformers_utils.configs.nvlm_d
import
NVLM_D_Config
from
vllm.transformers_utils.configs.ultravox
import
UltravoxConfig
__all__
=
[
...
...
@@ -39,5 +40,6 @@ __all__ = [
"NemotronConfig"
,
"NemotronHConfig"
,
"Nemotron_Nano_VL_Config"
,
"NVLM_D_Config"
,
"UltravoxConfig"
,
]
vllm/transformers_utils/configs/nvlm_d.py
0 → 100644
View file @
004203e9
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# Adapted from
# https://huggingface.co/nvidia/NVLM-D-72B/blob/main/configuration_nvlm_d.py
# --------------------------------------------------------
# NVLM-D
# Copyright (c) 2024 NVIDIA
# Licensed under Apache 2.0 License [see LICENSE for details]
# --------------------------------------------------------
from
transformers
import
Qwen2Config
from
transformers.configuration_utils
import
PretrainedConfig
class
NVLM_D_Config
(
PretrainedConfig
):
model_type
=
'NVLM_D'
is_composition
=
True
def
__init__
(
self
,
vision_config
=
None
,
llm_config
=
None
,
**
kwargs
):
super
().
__init__
(
**
kwargs
)
# Handle vision_config initialization
if
vision_config
is
None
:
vision_config
=
{}
# Handle llm_config initialization
if
llm_config
is
None
:
llm_config
=
{}
self
.
vision_config
=
PretrainedConfig
(
**
vision_config
)
self
.
text_config
=
Qwen2Config
(
**
llm_config
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment