registry.py 35.3 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
from collections.abc import Mapping, Set
5
from dataclasses import dataclass, field
6
from typing import Any, Literal, Optional
7

zhuwenwen's avatar
zhuwenwen committed
8
import os
9
10
11
import pytest
from packaging.version import Version
from transformers import __version__ as TRANSFORMERS_VERSION
zhuwenwen's avatar
zhuwenwen committed
12
# from ..utils import models_path_prefix
13

zhuwenwen's avatar
zhuwenwen committed
14
models_path_prefix = os.getenv('VLLM_OPTEST_MODELS_PATH') or os.getenv("OPTEST_MODELS_PATH")
15

16
17
from vllm.config import TokenizerMode

zhuwenwen's avatar
zhuwenwen committed
18

19
20
21
22
23
24
25
26
27
28
29
@dataclass(frozen=True)
class _HfExamplesInfo:
    default: str
    """The default model to use for testing this architecture."""

    extras: Mapping[str, str] = field(default_factory=dict)
    """Extra models to use for testing this architecture."""

    tokenizer: Optional[str] = None
    """Set the tokenizer to load for this architecture."""

30
    tokenizer_mode: TokenizerMode = "auto"
31
32
33
34
35
36
37
38
    """Set the tokenizer type for this architecture."""

    speculative_model: Optional[str] = None
    """
    The default model to use for testing this architecture, which is only used
    for speculative decoding.
    """

39
40
41
42
43
    min_transformers_version: Optional[str] = None
    """
    The minimum version of HF Transformers that is required to run this model.
    """

44
45
46
47
48
49
50
51
52
53
    max_transformers_version: Optional[str] = None
    """
    The maximum version of HF Transformers that this model runs on.
    """

    transformers_version_reason: Optional[str] = None
    """
    The reason for the minimum/maximum version requirement.
    """

54
55
56
57
58
59
60
61
62
63
64
    is_available_online: bool = True
    """
    Set this to ``False`` if the name of this architecture no longer exists on
    the HF repo. To maintain backwards compatibility, we have not removed them
    from the main model registry, so without this flag the registry tests will
    fail.
    """

    trust_remote_code: bool = False
    """The ``trust_remote_code`` level required to load the model."""

65
66
67
    v0_only: bool = False
    """The model is only available with the vLLM V0 engine."""

68
69
70
    hf_overrides: dict[str, Any] = field(default_factory=dict)
    """The ``hf_overrides`` required to load the model."""

71
72
73
74
75
76
    max_model_len: Optional[int] = None
    """
    The maximum model length to use for this model. Some models default to a
    length that is too large to fit into memory in CI.
    """

77
78
79
80
81
82
83
84
85
    def check_transformers_version(
        self,
        *,
        on_fail: Literal["error", "skip"],
    ) -> None:
        """
        If the installed transformers version does not meet the requirements,
        perform the given action.
        """
86
87
        if (self.min_transformers_version is None
                and self.max_transformers_version is None):
88
89
90
            return

        current_version = TRANSFORMERS_VERSION
91
        cur_base_version = Version(current_version).base_version
92
93
94
        min_version = self.min_transformers_version
        max_version = self.max_transformers_version
        msg = f"`transformers=={current_version}` installed, but `transformers"
95
96
97
        # Only check the base version for the min/max version, otherwise preview
        # models cannot be run because `x.yy.0.dev0`<`x.yy.0`
        if min_version and Version(cur_base_version) < Version(min_version):
98
            msg += f">={min_version}` is required to run this model."
99
        elif max_version and Version(cur_base_version) > Version(max_version):
100
101
102
            msg += f"<={max_version}` is required to run this model."
        else:
            return
103

104
105
106
107
108
109
110
        if self.transformers_version_reason:
            msg += f" Reason: {self.transformers_version_reason}"

        if on_fail == "error":
            raise RuntimeError(msg)
        else:
            pytest.skip(msg)
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

    def check_available_online(
        self,
        *,
        on_fail: Literal["error", "skip"],
    ) -> None:
        """
        If the model is not available online, perform the given action.
        """
        if not self.is_available_online:
            msg = "Model is not available online"

            if on_fail == "error":
                raise RuntimeError(msg)
            else:
                pytest.skip(msg)

128
129
130
131

# yapf: disable
_TEXT_GENERATION_EXAMPLE_MODELS = {
    # [Decoder-only]
zhuwenwen's avatar
zhuwenwen committed
132
    "AquilaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat-7B"),
133
                                   trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
134
    "AquilaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat2-7B"),
135
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
136
    "ArcticForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-instruct"),
137
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
138
    "BaiChuanForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baichuan-inc/Baichuan-7B"),
139
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
140
    "BaichuanForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baichuan-inc/Baichuan2-7B-chat"),
141
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
142
143
144
145
    "BambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"ibm-ai-platform/Bamba-9B"),
                                        extras={"tiny": os.path.join(models_path_prefix,"hmellor/tiny-random-BambaForCausalLM")}),  # noqa: E501
    "BloomForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"bigscience/bloom-560m"),
                                        {"1b": os.path.join(models_path_prefix,"bigscience/bloomz-1b1")}),
zhuwenwen's avatar
zhuwenwen committed
146
    "ChatGLMModel": _HfExamplesInfo(os.path.join(models_path_prefix, "THUDM/chatglm3-6b"),
147
                                    trust_remote_code=True,
148
                                    max_transformers_version="4.48"),
zhuwenwen's avatar
zhuwenwen committed
149
    "ChatGLMForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "thu-coai/ShieldLM-6B-chatglm3"),  # noqa: E501
150
                                                       trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
151
    "CohereForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "CohereForAI/c4ai-command-r-v01"),
152
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
153
    "Cohere2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "CohereForAI/c4ai-command-r7b-12-2024"), # noqa: E501
154
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
155
156
    "DbrxForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "databricks/dbrx-instruct")),
    "DeciLMForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/Llama-3_3-Nemotron-Super-49B-v1"), # noqa: E501
157
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
158
159
    "DeepseekForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "deepseek-ai/deepseek-llm-7b-chat")),
    "DeepseekV2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V2-Lite-Chat"),  # noqa: E501
160
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
161
    "DeepseekV3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3"),  # noqa: E501
Robert Shaw's avatar
Robert Shaw committed
162
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
163
164
165
166
    "ExaoneForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct")),  # noqa: E501
    "Fairseq2LlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"mgleize/fairseq2-dummy-Llama-3.2-1B")),  # noqa: E501
    "FalconForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"tiiuae/falcon-7b")),
    "FalconH1ForCausalLM":_HfExamplesInfo(os.path.join(models_path_prefix,"tiiuae/Falcon-H1-1.5B-Instruct"),
167
                                          min_transformers_version="4.53"),
zhuwenwen's avatar
zhuwenwen committed
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
    "GemmaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"google/gemma-1.1-2b-it")),
    "Gemma2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"google/gemma-2-9b")),
    "Gemma3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"google/gemma-3-1b-it")),
    "GlmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"THUDM/glm-4-9b-chat-hf")),
    "Glm4ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"THUDM/GLM-4-9B-0414")),
    "GPT2LMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix,"openai-community/gpt2"),
                                       {"alias": os.path.join(models_path_prefix,"gpt2")}),
    "GPTBigCodeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"bigcode/starcoder"),
                                             {"tiny": os.path.join(models_path_prefix,"bigcode/tiny_starcoder_py")}),  # noqa: E501
    "GPTJForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"Milos/slovak-gpt-j-405M"),
                                       {"6b": os.path.join(models_path_prefix,"EleutherAI/gpt-j-6b")}),
    "GPTNeoXForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"EleutherAI/pythia-70m"),
                                          {"1b": os.path.join(models_path_prefix,"EleutherAI/pythia-1.4b")}),
    "GraniteForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"ibm/PowerLM-3b")),
    "GraniteMoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"ibm/PowerMoE-3b")),
    "GraniteMoeHybridForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"ibm-granite/granite-4.0-tiny-preview")),  # noqa: E501
    "GraniteMoeSharedForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"ibm-research/moe-7b-1b-active-shared-experts")),  # noqa: E501
    "Grok1ModelForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"hpcai-tech/grok-1"),
Michael Goin's avatar
Michael Goin committed
186
                                             trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
187
    "InternLMForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "internlm/internlm-chat-7b"),
188
                                           trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
189
    "InternLM2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "internlm/internlm2-chat-7b"),
190
                                            trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
191
    "InternLM2VEForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "OpenGVLab/Mono-InternVL-2B"),
192
                                              trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
193
    "InternLM3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "internlm/internlm3-8b-instruct"),
194
                                            trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
195
196
197
198
199
200
201
    "JAISLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix,"inceptionai/jais-13b-chat")),
    "JambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"ai21labs/AI21-Jamba-1.5-Mini"),
                                        extras={"tiny": os.path.join(models_path_prefix,"ai21labs/Jamba-tiny-dev")}),  # noqa: E501
    "LlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"meta-llama/Llama-3.2-1B-Instruct"),
                                        extras={"guard": os.path.join(models_path_prefix,"meta-llama/Llama-Guard-3-1B"),  # noqa: E501
                                                "hermes": os.path.join(models_path_prefix,"NousResearch/Hermes-3-Llama-3.1-8B")}),  # noqa: E501
    "LLaMAForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"decapoda-research/llama-7b-hf"),
202
                                        is_available_online=False),
zhuwenwen's avatar
zhuwenwen committed
203
204
205
206
    "MambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"state-spaces/mamba-130m-hf")),
    "Mamba2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"mistralai/Mamba-Codestral-7B-v0.1")),
    "FalconMambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"tiiuae/falcon-mamba-7b-instruct")),  # noqa: E501
    "MiniCPMForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"openbmb/MiniCPM-2B-sft-bf16"),
207
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
208
    "MiniCPM3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "openbmb/MiniCPM3-4B"),
209
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
210
    "MiniMaxText01ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01"),
211
                                                trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
212
213
    "MistralForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Mistral-7B-Instruct-v0.1")),
    "MixtralForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Mixtral-8x7B-Instruct-v0.1"),  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
214
                                          {"tiny": os.path.join(models_path_prefix, "TitanML/tiny-mixtral")}),  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
215
216
217
218
    "QuantMixtralForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mistral-community/Mixtral-8x22B-v0.1-AWQ")),  # noqa: E501
    "MptForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mpt"), is_available_online=False),
    "MPTForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mosaicml/mpt-7b")),
    "NemotronForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/Minitron-8B-Base")),
zhuwenwen's avatar
zhuwenwen committed
219
    "NemotronHForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/Nemotron-H-8B-Base-8K"),
Luis Vega's avatar
Luis Vega committed
220
                                            trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
221
    "OlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-1B-hf")),
zhuwenwen's avatar
zhuwenwen committed
222
    "Olmo2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-2-0425-1B")),
zhuwenwen's avatar
zhuwenwen committed
223
224
225
226
    "OlmoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924-Instruct")),
    "OPTForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/opt-125m"),
                                      {"1b": os.path.join(models_path_prefix, "facebook/opt-iml-max-1.3b")}),
    "OrionForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "OrionStarAI/Orion-14B-Chat"),
227
                                        trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
228
    "PersimmonForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/persimmon-8b-chat")),
zhuwenwen's avatar
zhuwenwen committed
229
    "PhiForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/phi-2"), v0_only=True),
zhuwenwen's avatar
zhuwenwen committed
230
231
    "Phi3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-3-mini-4k-instruct")),
    "Phi3SmallForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-3-small-8k-instruct"),
232
233
                                            trust_remote_code=True,
                                            v0_only=True),
234
235
    "PhiMoEForCausalLM": _HfExamplesInfo("microsoft/Phi-3.5-MoE-instruct",
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
236
    "Plamo2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "pfnet/plamo-2-1b"),
Shinichi Hemmi's avatar
Shinichi Hemmi committed
237
                                        trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
238
    "QWenLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen-7B-Chat"),
239
                                       trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
240
241
242
    "Qwen2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2-0.5B-Instruct"),
                                        extras={"2.5": os.path.join(models_path_prefix, "Qwen/Qwen2.5-0.5B-Instruct")}), # noqa: E501
    "Qwen2MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen1.5-MoE-A2.7B-Chat")),
zhuwenwen's avatar
zhuwenwen committed
243
244
245
    "Qwen3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-8B")),
    "Qwen3MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B")),
    "RWForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-40b")),
zhuwenwen's avatar
zhuwenwen committed
246
    "StableLMEpochForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-zephyr-3b"),  # noqa: E501
247
                                                v0_only=True),
zhuwenwen's avatar
zhuwenwen committed
248
    "StableLmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-3b-4e1t"),
249
                                           v0_only=True),
zhuwenwen's avatar
zhuwenwen committed
250
251
252
    "Starcoder2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "bigcode/starcoder2-3b")),
    "SolarForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "upstage/solar-pro-preview-instruct")),
    "TeleChat2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Tele-AI/TeleChat2-3B"),
253
                                            trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
254
    "TeleFLMForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "CofeAI/FLM-2-52B-Instruct-2407"),
255
                                            trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
256
    "XverseForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "xverse/XVERSE-7B-Chat"),
zhuwenwen's avatar
zhuwenwen committed
257
                                         tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-2-7b"),
258
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
259
    "Zamba2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Zyphra/Zamba2-7B-instruct")),
zhuwenwen's avatar
zhuwenwen committed
260
    "MiMoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"),
261
                                        trust_remote_code=True),
262
    # [Encoder-decoder]
zhuwenwen's avatar
zhuwenwen committed
263
264
    "BartModel": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/bart-base")),
    "BartForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/bart-large-cnn")),
265
266
267
268
}

_EMBEDDING_EXAMPLE_MODELS = {
    # [Text-only]
zhuwenwen's avatar
zhuwenwen committed
269
270
271
272
    "BertModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5")),
    "Gemma2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-multilingual-gemma2")),
    "GritLM": _HfExamplesInfo(os.path.join(models_path_prefix, "parasail-ai/GritLM-7B-vllm")),
    "GteModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-embed-m-v2.0"),
273
                                               trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
274
    "GteNewModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Alibaba-NLP/gte-base-en-v1.5"),
275
                                   trust_remote_code=True,
276
                                   hf_overrides={"architectures": ["GteNewModel"]}),  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
277
    "InternLM2ForRewardModel": _HfExamplesInfo(os.path.join(models_path_prefix, "internlm/internlm2-1_8b-reward"),
278
                                               trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
279
    "JambaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-reward-dev")),  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
280
    "LlamaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "llama", is_available_online=False),
zhuwenwen's avatar
zhuwenwen committed
281
    "MistralModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")),
zhuwenwen's avatar
zhuwenwen committed
282
    "ModernBertModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Alibaba-NLP/gte-modernbert-base"),
283
                                trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
284
    "NomicBertModel": _HfExamplesInfo(os.path.join(models_path_prefix, "nomic-ai/nomic-embed-text-v2-moe"),
285
                                               trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
286
287
288
289
290
291
292
    "Qwen2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "ssmits/Qwen2-7B-Instruct-embed-base")),
    "Qwen2ForRewardModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-RM-72B")),
    "Qwen2ForProcessRewardModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-PRM-7B")),
    "Qwen2ForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "jason9693/Qwen2.5-1.5B-apeach")),  # noqa: E501
    "RobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/stsb-roberta-base-v2")),  # noqa: E501
    "RobertaForMaskedLM": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/all-roberta-large-v1")),  # noqa: E501
    "XLMRobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/multilingual-e5-small")),
293
    # [Multimodal]
zhuwenwen's avatar
zhuwenwen committed
294
295
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "royokong/e5-v")),
    "Phi3VForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "TIGER-Lab/VLM2Vec-Full"),
296
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
297
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "MrLight/dse-qwen2-2b-mrl-v1")), # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
298
    "PrithviGeoSpatialMAE": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"), # noqa: E501
299
                                            is_available_online=False),  # noqa: E501
300
301
}

302
303
_CROSS_ENCODER_EXAMPLE_MODELS = {
    # [Text-only]
zhuwenwen's avatar
zhuwenwen committed
304
305
306
307
    "BertForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "cross-encoder/ms-marco-MiniLM-L-6-v2")),  # noqa: E501
    "RobertaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "cross-encoder/quora-roberta-base")),  # noqa: E501
    "XLMRobertaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-m3")),  # noqa: E501
    "ModernBertForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "Alibaba-NLP/gte-reranker-modernbert-base")),  # noqa: E501
308
309
}

310
311
_MULTIMODAL_EXAMPLE_MODELS = {
    # [Decoder-only]
zhuwenwen's avatar
zhuwenwen committed
312
313
314
315
    "AriaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix,"rhymes-ai/Aria")),
    "AyaVisionForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix,"CohereForAI/aya-vision-8b")), # noqa: E501
    "Blip2ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix,"Salesforce/blip2-opt-2.7b"),  # noqa: E501
                                                     extras={"6b": os.path.join(models_path_prefix,"Salesforce/blip2-opt-6.7b")},  # noqa: E501
316
                                                     v0_only=True),
zhuwenwen's avatar
zhuwenwen committed
317
318
319
    "ChameleonForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix,"facebook/chameleon-7b")),  # noqa: E501
    "DeepseekVLV2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"deepseek-ai/deepseek-vl2-tiny"),  # noqa: E501
                                                extras={"fork": os.path.join(models_path_prefix,"Isotr0py/deepseek-vl2-tiny")},  # noqa: E501
320
321
                                                max_transformers_version="4.48",  # noqa: E501
                                                transformers_version_reason="HF model is not compatible.",  # noqa: E501
322
                                                hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}),  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
323
324
325
326
    "FuyuForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"adept/fuyu-8b")),
    "Gemma3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix,"google/gemma-3-4b-it")),
    "GraniteSpeechForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix,"ibm-granite/granite-speech-3.3-2b")),  # noqa: E501
    "GLM4VForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"THUDM/glm-4v-9b"),
327
328
                                        trust_remote_code=True,
                                        hf_overrides={"architectures": ["GLM4VForCausalLM"]}),  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
329
330
    "H2OVLChatModel": _HfExamplesInfo(os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-800m"),
                                      extras={"2b": os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-2b")},  # noqa: E501
331
332
                                      max_transformers_version="4.48",  # noqa: E501
                                      transformers_version_reason="HF model is not compatible."),  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
333
    "InternVLChatModel": _HfExamplesInfo(os.path.join(models_path_prefix, "OpenGVLab/InternVL2-1B"),
zhuwenwen's avatar
zhuwenwen committed
334
335
                                         extras={"2B": os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B"),
                                                 "3.0": os.path.join(models_path_prefix, "OpenGVLab/InternVL3-1B")},  # noqa: E501
336
                                         trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
337
338
339
340
    "Idefics3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "HuggingFaceM4/Idefics3-8B-Llama3"),  # noqa: E501
                                                        {"tiny": os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM-256M-Instruct")}),  # noqa: E501
    "KimiVLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Instruct"),  # noqa: E501
                                                      extras={"thinking": os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Thinking")},  # noqa: E501
341
342
                                                      trust_remote_code=True,
                                                      v0_only=True),
zhuwenwen's avatar
zhuwenwen committed
343
    "Llama4ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),   # noqa: E501
344
                                                      max_model_len=10240),
zhuwenwen's avatar
zhuwenwen committed
345
346
347
348
349
350
351
    "LlavaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
                                                     extras={"mistral": os.path.join(models_path_prefix, "mistral-community/pixtral-12b"), # noqa: E501
                                                             "mistral-fp8": os.path.join(models_path_prefix, "nm-testing/pixtral-12b-FP8-dynamic")}),  # noqa: E501
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "llava-hf/llava-v1.6-mistral-7b-hf")),  # noqa: E501
    "LlavaNextVideoForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "llava-hf/LLaVA-NeXT-Video-7B-hf")),  # noqa: E501
    "LlavaOnevisionForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "llava-hf/llava-onevision-qwen2-0.5b-ov-hf")),  # noqa: E501
    "MantisForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "TIGER-Lab/Mantis-8B-siglip-llama3"),  # noqa: E501
352
353
                                                      max_transformers_version="4.48",  # noqa: E501
                                                      transformers_version_reason="HF model is not compatible.",  # noqa: E501
354
                                                      hf_overrides={"architectures": ["MantisForConditionalGeneration"]}),  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
355
    "MiniCPMO": _HfExamplesInfo(os.path.join(models_path_prefix, "openbmb/MiniCPM-o-2_6"),
356
                                trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
357
358
    "MiniCPMV": _HfExamplesInfo(os.path.join(models_path_prefix, "openbmb/MiniCPM-Llama3-V-2_5"),
                                extras={"2.6": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-2_6")},  # noqa: E501
359
                                trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
360
    "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-VL-01"), # noqa: E501
361
362
                                              trust_remote_code=True,
                                              v0_only=True),
zhuwenwen's avatar
zhuwenwen committed
363
364
365
    "Mistral3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Mistral-Small-3.1-24B-Instruct-2503"),  # noqa: E501
                                                        extras={"fp8": os.path.join(models_path_prefix, "nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic")}),  # noqa: E501
    "MolmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Molmo-7B-D-0924"),
366
                                        max_transformers_version="4.48",
367
                                        transformers_version_reason="Incorrectly-detected `tensorflow` import.",  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
368
                                        extras={"olmo": os.path.join(models_path_prefix, "allenai/Molmo-7B-O-0924")},  # noqa: E501
369
                                        trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
370
    "NVLM_D": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/NVLM-D-72B"),
371
                              trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
372
373
374
    "PaliGemmaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/paligemma-3b-mix-224"),  # noqa: E501
                                                         extras={"v2": os.path.join(models_path_prefix, "google/paligemma2-3b-ft-docci-448")}),  # noqa: E501
    "Phi3VForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-3-vision-128k-instruct"),
375
                                        trust_remote_code=True,
376
377
                                        max_transformers_version="4.48",
                                        transformers_version_reason="Use of deprecated imports which have been removed.",  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
378
379
380
381
382
                              extras={"phi3.5": os.path.join(models_path_prefix,"microsoft/Phi-3.5-vision-instruct"})),  # noqa: E501
    "Ovis": _HfExamplesInfo(os.path.join(models_path_prefix,"AIDC-AI/Ovis2-1B"), trust_remote_code=True,
                            extras={"1.6-llama": os.path.join(models_path_prefix,"AIDC-AI/Ovis1.6-Llama3.2-3B"),
                                    "1.6-gemma": os.path.join(models_path_prefix,"AIDC-AI/Ovis1.6-Gemma2-9B")}),  # noqa: E501
    "Phi4MMForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"microsoft/Phi-4-multimodal-instruct"),
383
                                        trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
384
    "PixtralForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Pixtral-12B-2409"),  # noqa: E501
385
                                                       tokenizer_mode="mistral"),
zhuwenwen's avatar
zhuwenwen committed
386
387
    "QwenVLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen-VL"),
                                                      extras={"chat": os.path.join(models_path_prefix, "Qwen/Qwen-VL-Chat")},  # noqa: E501
388
389
                                                      trust_remote_code=True,
                                                      hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]}),  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
390
391
392
    "Qwen2AudioForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2-Audio-7B-Instruct")),  # noqa: E501
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2-VL-2B-Instruct")),  # noqa: E501
    "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-3B-Instruct")),  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
393
394
    "Qwen2_5OmniModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-3B")),
    "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-7B-AWQ")),  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
395
396
397
    "SkyworkR1VChatModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Skywork/Skywork-R1V-38B")),
    "SmolVLMForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM2-2.2B-Instruct")),  # noqa: E501
    "UltravoxModel": _HfExamplesInfo(os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_5-llama-3_2-1b"),  # noqa: E501
398
                                     trust_remote_code=True),
汪志鹏's avatar
汪志鹏 committed
399
400
    "TarsierForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier-7b",  # noqa: E501
                                                        hf_overrides={"architectures": ["TarsierForConditionalGeneration"]}),  # noqa: E501
401
    # [Encoder-decoder]
402
403
    # Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
    # Therefore, we borrow the BartTokenizer from the original Bart model
zhuwenwen's avatar
zhuwenwen committed
404
405
    "Florence2ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix,"microsoft/Florence-2-base"),  # noqa: E501
                                                         tokenizer=os.path.join(models_path_prefix,"Isotr0py/Florence-2-tokenizer"),  # noqa: E501
406
                                                         trust_remote_code=True),  # noqa: E501
zhuwenwen's avatar
zhuwenwen committed
407
408
409
    "MllamaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "meta-llama/Llama-3.2-11B-Vision-Instruct")),  # noqa: E501
    "Llama4ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct")),  # noqa: E501
    "WhisperForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "openai/whisper-large-v3")),  # noqa: E501
410
411
412
}

_SPECULATIVE_DECODING_EXAMPLE_MODELS = {
zhuwenwen's avatar
zhuwenwen committed
413
414
415
416
417
418
419
420
    "EAGLEModel": _HfExamplesInfo(os.path.join(models_path_prefix, "JackFram/llama-68m"),
                                  speculative_model=os.path.join(models_path_prefix, "abhigoyal/vllm-eagle-llama-68m-random")),  # noqa: E501
    "MedusaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "JackFram/llama-68m"),
                                   speculative_model=os.path.join(models_path_prefix, "abhigoyal/vllm-medusa-llama-68m-random")),  # noqa: E501
    "MLPSpeculatorPreTrainedModel": _HfExamplesInfo(os.path.join(models_path_prefix, "JackFram/llama-160m"),
                                                    speculative_model=os.path.join(models_path_prefix, "ibm-ai-platform/llama-160m-accelerator")),  # noqa: E501
    "DeepSeekMTPModel": _HfExamplesInfo(os.path.join(models_path_prefix, "luccafong/deepseek_mtp_main_random"),
                                        speculative_model=os.path.join(models_path_prefix, "luccafong/deepseek_mtp_draft_random"),  # noqa: E501
421
                                        trust_remote_code=True),
zhuwenwen's avatar
zhuwenwen committed
422
    "EagleLlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "yuhuili/EAGLE-LLaMA3-Instruct-8B"),
423
                                             trust_remote_code=True,
zhuwenwen's avatar
zhuwenwen committed
424
425
426
                                             speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE-LLaMA3-Instruct-8B"),
                                             tokenizer=os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct")),  # noqa: E501
    "Eagle3LlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"),  # noqa: E501
427
                                            trust_remote_code=True,
zhuwenwen's avatar
zhuwenwen committed
428
429
430
                                            speculative_model=os.path.join(models_path_prefix,"yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"),
                                            tokenizer=os.path.join(models_path_prefix,"meta-llama/Llama-3.1-8B-Instruct")),
    "EagleMiniCPMForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix,"openbmb/MiniCPM-1B-sft-bf16"),
431
432
                                            trust_remote_code=True,
                                            is_available_online=False,
zhuwenwen's avatar
zhuwenwen committed
433
434
435
                                            speculative_model=os.path.join(models_path_prefix,"openbmb/MiniCPM-2B-sft-bf16"),
                                            tokenizer=os.path.join(models_path_prefix,"openbmb/MiniCPM-2B-sft-bf16")),
    "MiMoMTPModel": _HfExamplesInfo(os.path.join(models_path_prefix,"XiaomiMiMo/MiMo-7B-RL"),
436
                                    trust_remote_code=True,
zhuwenwen's avatar
zhuwenwen committed
437
                                    speculative_model=os.path.join(models_path_prefix,"XiaomiMiMo/MiMo-7B-RL"))
438
439
}

440
_TRANSFORMERS_MODELS = {
zhuwenwen's avatar
zhuwenwen committed
441
    "TransformersForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ArthurZ/Ilama-3.2-1B"), trust_remote_code=True),  # noqa: E501
442
443
}

444
445
446
_EXAMPLE_MODELS = {
    **_TEXT_GENERATION_EXAMPLE_MODELS,
    **_EMBEDDING_EXAMPLE_MODELS,
447
    **_CROSS_ENCODER_EXAMPLE_MODELS,
448
449
    **_MULTIMODAL_EXAMPLE_MODELS,
    **_SPECULATIVE_DECODING_EXAMPLE_MODELS,
450
    **_TRANSFORMERS_MODELS,
451
452
453
454
455
456
457
458
459
}


class HfExampleModels:
    def __init__(self, hf_models: Mapping[str, _HfExamplesInfo]) -> None:
        super().__init__()

        self.hf_models = hf_models

460
    def get_supported_archs(self) -> Set[str]:
461
462
463
464
465
        return self.hf_models.keys()

    def get_hf_info(self, model_arch: str) -> _HfExamplesInfo:
        return self.hf_models[model_arch]

466
467
468
469
470
    def find_hf_info(self, model_id: str) -> _HfExamplesInfo:
        for info in self.hf_models.values():
            if info.default == model_id:
                return info

471
472
473
474
475
        # Fallback to extras
        for info in self.hf_models.values():
            if any(extra == model_id for extra in info.extras.values()):
                return info

476
477
        raise ValueError(f"No example model defined for {model_id}")

478

zhuwenwen's avatar
zhuwenwen committed
479
HF_EXAMPLE_MODELS = HfExampleModels(_EXAMPLE_MODELS)