"vscode:/vscode.git/clone" did not exist on "68311891f5036a4faac89ead7dd40826d18da0b1"
registry.py 50.2 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
from collections.abc import Mapping, Set
5
from dataclasses import dataclass, field
6
from typing import Any, Literal
7

zhuwenwen's avatar
zhuwenwen committed
8
import os
9
10
11
import pytest
from packaging.version import Version
from transformers import __version__ as TRANSFORMERS_VERSION
zhuwenwen's avatar
zhuwenwen committed
12
# from ..utils import models_path_prefix
13

zhuwenwen's avatar
zhuwenwen committed
14
models_path_prefix = os.getenv('VLLM_OPTEST_MODELS_PATH') or os.getenv("OPTEST_MODELS_PATH")
15

16
from vllm.config.model import ModelDType, TokenizerMode
17

zhuwenwen's avatar
zhuwenwen committed
18

19
20
21
22
23
24
25
26
@dataclass(frozen=True)
class _HfExamplesInfo:
    default: str
    """The default model to use for testing this architecture."""

    extras: Mapping[str, str] = field(default_factory=dict)
    """Extra models to use for testing this architecture."""

27
    tokenizer: str | None = None
28
29
    """Set the tokenizer to load for this architecture."""

30
    tokenizer_mode: TokenizerMode | str = "auto"
31
32
    """Set the tokenizer type for this architecture."""

33
    speculative_model: str | None = None
34
35
36
37
38
    """
    The default model to use for testing this architecture, which is only used
    for speculative decoding.
    """

39
40
41
42
43
    speculative_method: str | None = None
    """
    The method to use for speculative decoding.
    """

44
    min_transformers_version: str | None = None
45
46
47
48
    """
    The minimum version of HF Transformers that is required to run this model.
    """

49
    max_transformers_version: str | None = None
50
51
52
53
    """
    The maximum version of HF Transformers that this model runs on.
    """

54
    transformers_version_reason: str | None = None
55
56
57
58
    """
    The reason for the minimum/maximum version requirement.
    """

59
    require_embed_inputs: bool = False
60
    """
61
62
    If `True`, enables prompt and multi-modal embedding inputs while
    disabling tokenization.
63
64
65
66
67
68
69
70
71
72
73
74
75
76
    """

    dtype: ModelDType = "auto"
    """
    The data type for the model weights and activations.
    """

    enforce_eager: bool = False
    """
    Whether to enforce eager execution. If True, we will
    disable CUDA graph and always execute the model in eager mode.
    If False, we will use CUDA graph and eager execution in hybrid.
    """

77
78
    is_available_online: bool = True
    """
79
    Set this to `False` if the name of this architecture no longer exists on
80
81
82
83
84
85
    the HF repo. To maintain backwards compatibility, we have not removed them
    from the main model registry, so without this flag the registry tests will
    fail.
    """

    trust_remote_code: bool = False
86
    """The `trust_remote_code` level required to load the model."""
87

88
    hf_overrides: dict[str, Any] = field(default_factory=dict)
89
    """The `hf_overrides` required to load the model."""
90

91
    max_model_len: int | None = None
92
93
94
95
96
    """
    The maximum model length to use for this model. Some models default to a
    length that is too large to fit into memory in CI.
    """

97
    revision: str | None = None
98
99
100
101
102
    """
    The specific revision (commit hash, tag, or branch) to use for the model.
    If not specified, the default revision will be used.
    """

103
    max_num_seqs: int | None = None
104
105
    """Maximum number of sequences to be processed in a single iteration."""

106
107
108
109
110
111
    use_original_num_layers: bool = False
    """
    If True, use the original number of layers from the model config 
    instead of minimal layers for testing.
    """

112
113
114
    def check_transformers_version(
        self,
        *,
115
        on_fail: Literal["error", "skip", "return"],
116
117
        check_min_version: bool = True,
        check_max_version: bool = True,
118
    ) -> str | None:
119
120
121
122
        """
        If the installed transformers version does not meet the requirements,
        perform the given action.
        """
123
124
125
126
        if (
            self.min_transformers_version is None
            and self.max_transformers_version is None
        ):
127
            return None
128
129

        current_version = TRANSFORMERS_VERSION
130
        cur_base_version = Version(current_version).base_version
131
132
133
        min_version = self.min_transformers_version
        max_version = self.max_transformers_version
        msg = f"`transformers=={current_version}` installed, but `transformers"
134
135
        # Only check the base version for the min/max version, otherwise preview
        # models cannot be run because `x.yy.0.dev0`<`x.yy.0`
136
137
138
139
140
        if (
            check_min_version
            and min_version
            and Version(cur_base_version) < Version(min_version)
        ):
141
            msg += f">={min_version}` is required to run this model."
142
143
144
145
146
        elif (
            check_max_version
            and max_version
            and Version(cur_base_version) > Version(max_version)
        ):
147
148
            msg += f"<={max_version}` is required to run this model."
        else:
149
            return None
150

151
152
153
154
155
        if self.transformers_version_reason:
            msg += f" Reason: {self.transformers_version_reason}"

        if on_fail == "error":
            raise RuntimeError(msg)
156
        elif on_fail == "skip":
157
            pytest.skip(msg)
158

159
160
        return msg

161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
    def check_available_online(
        self,
        *,
        on_fail: Literal["error", "skip"],
    ) -> None:
        """
        If the model is not available online, perform the given action.
        """
        if not self.is_available_online:
            msg = "Model is not available online"

            if on_fail == "error":
                raise RuntimeError(msg)
            else:
                pytest.skip(msg)

177
178
179

_TEXT_GENERATION_EXAMPLE_MODELS = {
    # [Decoder-only]
180
    "AfmoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "arcee-ai/Trinity-Nano-Preview")),
181
182
183
    "ApertusForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "swiss-ai/Apertus-8B-Instruct-2509")),
    "AquilaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat-7B"), trust_remote_code=True),
    "AquilaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat2-7B"), trust_remote_code=True),
184
    "ArceeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "arcee-ai/AFM-4.5B-Base")),
185
    "ArcticForCausalLM": _HfExamplesInfo(
186
        os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-instruct"), trust_remote_code=True
187
188
    ),
    "BaiChuanForCausalLM": _HfExamplesInfo(
189
        os.path.join(models_path_prefix, "baichuan-inc/Baichuan-7B"), trust_remote_code=True
190
191
    ),
    "BaichuanForCausalLM": _HfExamplesInfo(
192
        os.path.join(models_path_prefix, "baichuan-inc/Baichuan2-7B-chat"), trust_remote_code=True
193
194
    ),
    "BailingMoeForCausalLM": _HfExamplesInfo(
195
        os.path.join(models_path_prefix, "inclusionAI/Ling-lite-1.5"), trust_remote_code=True
196
197
    ),
    "BailingMoeV2ForCausalLM": _HfExamplesInfo(
198
        os.path.join(models_path_prefix, "inclusionAI/Ling-mini-2.0"), trust_remote_code=True
199
200
    ),
    "BambaForCausalLM": _HfExamplesInfo(
201
202
        os.path.join(models_path_prefix, "ibm-ai-platform/Bamba-9B-v1"),
        extras={"tiny": os.path.join(models_path_prefix, "hmellor/tiny-random-BambaForCausalLM")},
203
204
    ),
    "BloomForCausalLM": _HfExamplesInfo(
205
        "bigscience/bloom-560m", {"1b": os.path.join(models_path_prefix, "bigscience/bloomz-1b1")}
206
207
    ),
    "ChatGLMModel": _HfExamplesInfo(
208
        os.path.join(models_path_prefix, "zai-org/chatglm3-6b"), trust_remote_code=True, max_transformers_version="4.48"
209
210
    ),
    "ChatGLMForConditionalGeneration": _HfExamplesInfo(
211
        os.path.join(models_path_prefix, "thu-coai/ShieldLM-6B-chatglm3"),
212
213
214
        trust_remote_code=True,
    ),
    "CohereForCausalLM": _HfExamplesInfo(
215
        os.path.join(models_path_prefix, "CohereLabs/c4ai-command-r-v01"), trust_remote_code=True
216
217
    ),
    "Cohere2ForCausalLM": _HfExamplesInfo(
218
        os.path.join(models_path_prefix, "CohereLabs/c4ai-command-r7b-12-2024"),
219
220
        trust_remote_code=True,
    ),
221
    "CwmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/cwm"), min_transformers_version="4.58"),
zhuwenwen's avatar
zhuwenwen committed
222
    "DbrxForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "databricks/dbrx-instruct")),
223
    "DeciLMForCausalLM": _HfExamplesInfo(
224
        os.path.join(models_path_prefix, "nvidia/Llama-3_3-Nemotron-Super-49B-v1"),
225
226
        trust_remote_code=True,
    ),
227
    "DeepseekForCausalLM": _HfExamplesInfo(
228
        os.path.join(models_path_prefix, "deepseek-ai/deepseek-moe-16b-base"),
229
230
        trust_remote_code=True,
    ),
231
    "DeepseekV2ForCausalLM": _HfExamplesInfo(
232
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V2-Lite-Chat"),
233
234
235
        trust_remote_code=True,
    ),
    "DeepseekV3ForCausalLM": _HfExamplesInfo(
236
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3"),
237
238
        trust_remote_code=True,
    ),
239
    "DeepseekV32ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3.2-Exp")),
240
241
    "Ernie4_5ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baidu/ERNIE-4.5-0.3B-PT")),
    "Ernie4_5_MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT")),
242
    "ExaoneForCausalLM": _HfExamplesInfo(
243
244
245
246
        os.path.join(models_path_prefix, "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"), trust_remote_code=True
    ),
    "Exaone4ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "LGAI-EXAONE/EXAONE-4.0-32B")),
    "Fairseq2LlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mgleize/fairseq2-dummy-Llama-3.2-1B")),
247
    "FalconForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-7b")),
248
249
    "FalconH1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/Falcon-H1-0.5B-Base")),
    "FlexOlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Flex-reddit-2x7B-1T")),
250
    "GemmaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")),
251
    "Gemma2ForCausalLM": _HfExamplesInfo(
252
        os.path.join(models_path_prefix, "google/gemma-2-9b"), extras={"tiny": os.path.join(models_path_prefix, "google/gemma-2-2b-it")}
253
    ),
254
    "Gemma3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-1b-it")),
255
    "Gemma3nForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3n-E2B-it")),
256
257
    "GlmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/glm-4-9b-chat-hf")),
    "Glm4ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4-9B-0414")),
258
    "Glm4MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.5")),
259
    "GPT2LMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "openai-community/gpt2"), {"alias": os.path.join(models_path_prefix, "gpt2")}),
260
    "GPTBigCodeForCausalLM": _HfExamplesInfo(
261
        os.path.join(models_path_prefix, "bigcode/starcoder"),
262
        extras={
263
264
            "tiny": os.path.join(models_path_prefix, "bigcode/tiny_starcoder_py"),
            "santacoder": os.path.join(models_path_prefix, "bigcode/gpt_bigcode-santacoder"),
265
        },
266
267
    ),
    "GPTJForCausalLM": _HfExamplesInfo(
268
        os.path.join(models_path_prefix, "Milos/slovak-gpt-j-405M"), {"6b": os.path.join(models_path_prefix, "EleutherAI/gpt-j-6b")}
269
270
    ),
    "GPTNeoXForCausalLM": _HfExamplesInfo(
271
        os.path.join(models_path_prefix, "EleutherAI/pythia-70m"), {"1b": os.path.join(models_path_prefix, "EleutherAI/pythia-1.4b")}
272
    ),
273
    "GptOssForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "lmsys/gpt-oss-20b-bf16")),
274
    "GraniteForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerLM-3b")),
275
    "GraniteMoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerMoE-3b")),
276
    "GraniteMoeHybridForCausalLM": _HfExamplesInfo(
277
        os.path.join(models_path_prefix, "ibm-granite/granite-4.0-tiny-preview")
278
279
    ),
    "GraniteMoeSharedForCausalLM": _HfExamplesInfo(
280
        os.path.join(models_path_prefix, "ibm-research/moe-7b-1b-active-shared-experts")
281
282
    ),
    "Grok1ModelForCausalLM": _HfExamplesInfo(
283
        os.path.join(models_path_prefix, "hpcai-tech/grok-1"), trust_remote_code=True
284
    ),
285
    "HunYuanDenseV1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tencent/Hunyuan-7B-Instruct")),
286
    "HunYuanMoEV1ForCausalLM": _HfExamplesInfo(
287
        os.path.join(models_path_prefix, "tencent/Hunyuan-A13B-Instruct"), trust_remote_code=True
288
289
    ),
    "InternLMForCausalLM": _HfExamplesInfo(
290
        os.path.join(models_path_prefix, "internlm/internlm-chat-7b"), trust_remote_code=True
291
292
    ),
    "InternLM2ForCausalLM": _HfExamplesInfo(
293
        os.path.join(models_path_prefix, "internlm/internlm2-chat-7b"), trust_remote_code=True
294
295
    ),
    "InternLM2VEForCausalLM": _HfExamplesInfo(
296
        os.path.join(models_path_prefix, "OpenGVLab/Mono-InternVL-2B"), trust_remote_code=True
297
298
    ),
    "InternLM3ForCausalLM": _HfExamplesInfo(
299
        os.path.join(models_path_prefix, "internlm/internlm3-8b-instruct"), trust_remote_code=True
300
    ),
301
    "JAISLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "inceptionai/jais-13b-chat")),
302
    "Jais2ForCausalLM": _HfExamplesInfo(
303
        os.path.join(models_path_prefix, "inceptionai/Jais-2-8B-Chat"), min_transformers_version="4.58"
304
    ),
305
    "JambaForCausalLM": _HfExamplesInfo(
306
        os.path.join(models_path_prefix, "ai21labs/AI21-Jamba-1.5-Mini"),
307
        extras={
308
            "tiny": os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-dev"),
309
            "random": os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-random"),
310
311
        },
    ),
312
    "KimiLinearForCausalLM": _HfExamplesInfo(
313
        os.path.join(models_path_prefix, "moonshotai/Kimi-Linear-48B-A3B-Instruct"), trust_remote_code=True
314
    ),
315
    "Lfm2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "LiquidAI/LFM2-1.2B")),
Paul Pak's avatar
Paul Pak committed
316
    "Lfm2MoeForCausalLM": _HfExamplesInfo(
317
        os.path.join(models_path_prefix, "LiquidAI/LFM2-8B-A1B"), min_transformers_version="4.58"
Paul Pak's avatar
Paul Pak committed
318
    ),
319
    "LlamaForCausalLM": _HfExamplesInfo(
320
        os.path.join(models_path_prefix, "meta-llama/Llama-3.2-1B-Instruct"),
321
        extras={
322
323
324
325
            "guard": os.path.join(models_path_prefix, "meta-llama/Llama-Guard-3-1B"),
            "hermes": os.path.join(models_path_prefix, "NousResearch/Hermes-3-Llama-3.1-8B"),
            "fp8": os.path.join(models_path_prefix, "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8"),
            "tiny": os.path.join(models_path_prefix, "hmellor/tiny-random-LlamaForCausalLM"),
326
327
328
        },
    ),
    "LLaMAForCausalLM": _HfExamplesInfo(
329
        os.path.join(models_path_prefix, "decapoda-research/llama-7b-hf"), is_available_online=False
330
331
    ),
    "Llama4ForCausalLM": _HfExamplesInfo(
332
        os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
333
334
    ),
    "LongcatFlashForCausalLM": _HfExamplesInfo(
335
        os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat"), trust_remote_code=True
336
    ),
337
    "MambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "state-spaces/mamba-130m-hf")),
338
    "Mamba2ForCausalLM": _HfExamplesInfo(
339
        os.path.join(models_path_prefix, "mistralai/Mamba-Codestral-7B-v0.1"),
340
        extras={
341
            "random": os.path.join(models_path_prefix, "yujiepan/mamba2-codestral-v0.1-tiny-random"),
342
343
        },
    ),
344
    "FalconMambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-mamba-7b-instruct")),
345
    "MiniCPMForCausalLM": _HfExamplesInfo(
346
        os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"), trust_remote_code=True
347
348
    ),
    "MiniCPM3ForCausalLM": _HfExamplesInfo(
349
        os.path.join(models_path_prefix, "openbmb/MiniCPM3-4B"), trust_remote_code=True
350
    ),
351
    "MiniMaxForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01-hf")),
352
    "MiniMaxText01ForCausalLM": _HfExamplesInfo(
353
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01"),
354
355
356
357
        trust_remote_code=True,
        revision="a59aa9cbc53b9fb8742ca4e9e1531b9802b6fdc3",
    ),
    "MiniMaxM1ForCausalLM": _HfExamplesInfo(
358
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-M1-40k"), trust_remote_code=True
359
    ),
360
    "MiniMaxM2ForCausalLM": _HfExamplesInfo(
361
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-M2"),
youkaichao's avatar
youkaichao committed
362
        trust_remote_code=True,
363
    ),
zhuwenwen's avatar
zhuwenwen committed
364
    "MistralForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Mistral-7B-Instruct-v0.1")),
365
    "MistralLarge3ForCausalLM": _HfExamplesInfo(
366
        os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4")
367
    ),
368
    "MixtralForCausalLM": _HfExamplesInfo(
369
370
        os.path.join(models_path_prefix, "mistralai/Mixtral-8x7B-Instruct-v0.1"),
        {"tiny": os.path.join(models_path_prefix, "TitanML/tiny-mixtral")},
371
    ),
zhuwenwen's avatar
zhuwenwen committed
372
373
374
    "MptForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mpt"), is_available_online=False),
    "MPTForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mosaicml/mpt-7b")),
    "NemotronForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/Minitron-8B-Base")),
375
    "NemotronHForCausalLM": _HfExamplesInfo(
376
        os.path.join(models_path_prefix, "nvidia/Nemotron-H-8B-Base-8K"), trust_remote_code=True
377
    ),
zhuwenwen's avatar
zhuwenwen committed
378
    "OlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-1B-hf")),
zhuwenwen's avatar
zhuwenwen committed
379
    "Olmo2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-2-0425-1B")),
380
    "Olmo3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Olmo-3-7B-Instruct")),
zhuwenwen's avatar
zhuwenwen committed
381
    "OlmoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924-Instruct")),
382
    "OpenPanguMTPModel": _HfExamplesInfo(
383
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1"),
384
385
386
        trust_remote_code=True,
        is_available_online=False,
    ),
387
    "OPTForCausalLM": _HfExamplesInfo(
388
        os.path.join(models_path_prefix, "facebook/opt-125m"), {"1b": os.path.join(models_path_prefix, "facebook/opt-iml-max-1.3b")}
389
390
    ),
    "OrionForCausalLM": _HfExamplesInfo(
391
        os.path.join(models_path_prefix, "OrionStarAI/Orion-14B-Chat"), trust_remote_code=True
392
    ),
393
    "OuroForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ByteDance/Ouro-1.4B"), trust_remote_code=True),
394
    "PanguEmbeddedForCausalLM": _HfExamplesInfo(
395
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Embedded-7B-V1.1"), trust_remote_code=True
396
397
    ),
    "PanguUltraMoEForCausalLM": _HfExamplesInfo(
398
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1"),
399
400
401
        trust_remote_code=True,
        is_available_online=False,
    ),
402
403
404
    "PersimmonForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/persimmon-8b-chat")),
    "PhiForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/phi-2")),
    "Phi3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-3-mini-4k-instruct")),
405
    "PhiMoEForCausalLM": _HfExamplesInfo(
406
        os.path.join(models_path_prefix, "microsoft/Phi-3.5-MoE-instruct"), trust_remote_code=True
407
408
    ),
    "Plamo2ForCausalLM": _HfExamplesInfo(
409
        os.path.join(models_path_prefix, "pfnet/plamo-2-1b"),
410
411
        trust_remote_code=True,
    ),
412
    "Plamo3ForCausalLM": _HfExamplesInfo(
413
        os.path.join(models_path_prefix, "pfnet/plamo-3-nict-2b-base"),
414
415
        trust_remote_code=True,
    ),
416
    "QWenLMHeadModel": _HfExamplesInfo(
417
        os.path.join(models_path_prefix, "Qwen/Qwen-7B-Chat"),
418
419
420
421
422
        max_transformers_version="4.53",
        transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers",  # noqa: E501
        trust_remote_code=True,
    ),
    "Qwen2ForCausalLM": _HfExamplesInfo(
423
        os.path.join(models_path_prefix, "Qwen/Qwen2-0.5B-Instruct"),
424
        extras={
425
426
            "2.5": os.path.join(models_path_prefix, "Qwen/Qwen2.5-0.5B-Instruct"),
            "2.5-1.5B": os.path.join(models_path_prefix, "Qwen/Qwen2.5-1.5B-Instruct"),
427
        },
428
    ),
429
430
431
    "Qwen2MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen1.5-MoE-A2.7B-Chat")),
    "Qwen3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-8B")),
    "Qwen3MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B")),
432
    "Qwen3NextForCausalLM": _HfExamplesInfo(
433
434
        os.path.join(models_path_prefix, "Qwen/Qwen3-Next-80B-A3B-Instruct"),
        extras={"tiny-random": os.path.join(models_path_prefix, "tiny-random/qwen3-next-moe")},
435
436
        min_transformers_version="4.56.3",
    ),
437
    "RWForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-40b")),
438
    "SeedOssForCausalLM": _HfExamplesInfo(
439
        os.path.join(models_path_prefix, "ByteDance-Seed/Seed-OSS-36B-Instruct"),
440
441
        trust_remote_code=True,
    ),
442
443
444
445
446
    "SmolLM3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "HuggingFaceTB/SmolLM3-3B")),
    "StableLMEpochForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-zephyr-3b")),
    "StableLmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-3b-4e1t")),
    "Starcoder2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "bigcode/starcoder2-3b")),
    "Step3TextForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stepfun-ai/step3"), trust_remote_code=True),
447
    "SolarForCausalLM": _HfExamplesInfo(
448
        os.path.join(models_path_prefix, "upstage/solar-pro-preview-instruct"), trust_remote_code=True
449
    ),
450
    "TeleChatForCausalLM": _HfExamplesInfo(
451
        os.path.join(models_path_prefix, "chuhac/TeleChat2-35B"), trust_remote_code=True
452
    ),
453
    "TeleChat2ForCausalLM": _HfExamplesInfo(
454
        os.path.join(models_path_prefix, "Tele-AI/TeleChat2-3B"), trust_remote_code=True
455
456
    ),
    "TeleFLMForCausalLM": _HfExamplesInfo(
457
        os.path.join(models_path_prefix, "CofeAI/FLM-2-52B-Instruct-2407"), trust_remote_code=True
458
459
    ),
    "XverseForCausalLM": _HfExamplesInfo(
460
461
        os.path.join(models_path_prefix, "xverse/XVERSE-7B-Chat"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-2-7b"),
462
463
        trust_remote_code=True,
    ),
zhuwenwen's avatar
zhuwenwen committed
464
    "Zamba2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Zyphra/Zamba2-7B-instruct")),
465
    "MiMoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"), trust_remote_code=True),
466
    "Dots1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "rednote-hilab/dots.llm1.inst")),
467
468
469
470
}

_EMBEDDING_EXAMPLE_MODELS = {
    # [Text-only]
471
    "BertModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5")),
472
    "Gemma2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-multilingual-gemma2")),
473
    "Gemma3TextModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/embeddinggemma-300m")),
474
    "GritLM": _HfExamplesInfo(os.path.join(models_path_prefix, "parasail-ai/GritLM-7B-vllm")),
475
    "GteModel": _HfExamplesInfo(
476
        os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-embed-m-v2.0"), trust_remote_code=True
477
478
    ),
    "GteNewModel": _HfExamplesInfo(
479
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-base-en-v1.5"),
480
481
482
483
        trust_remote_code=True,
        hf_overrides={"architectures": ["GteNewModel"]},
    ),
    "InternLM2ForRewardModel": _HfExamplesInfo(
484
        os.path.join(models_path_prefix, "internlm/internlm2-1_8b-reward"), trust_remote_code=True
485
    ),
486
487
    "JambaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-reward-dev")),
    "LlamaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "llama", is_available_online=False)),
488
    "MistralModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")),
489
    "ModernBertModel": _HfExamplesInfo(
490
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-modernbert-base"), trust_remote_code=True
491
492
    ),
    "NomicBertModel": _HfExamplesInfo(
493
        os.path.join(models_path_prefix, "nomic-ai/nomic-embed-text-v2-moe"), trust_remote_code=True
494
    ),
495
    "Qwen2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "ssmits/Qwen2-7B-Instruct-embed-base")),
496
    "Qwen2ForRewardModel": _HfExamplesInfo(
497
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-RM-72B"),
498
499
500
501
        max_transformers_version="4.53",
        transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers",  # noqa: E501
    ),
    "Qwen2ForProcessRewardModel": _HfExamplesInfo(
502
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-PRM-7B"),
503
504
505
        max_transformers_version="4.53",
        transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers",  # noqa: E501
    ),
506
507
508
    "RobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/stsb-roberta-base-v2")),
    "RobertaForMaskedLM": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/all-roberta-large-v1")),
    "XLMRobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/multilingual-e5-small")),
509
    "BertSpladeSparseEmbeddingModel": _HfExamplesInfo(
510
        os.path.join(models_path_prefix, "naver/splade-v3"),
511
        hf_overrides={"architectures": ["BertSpladeSparseEmbeddingModel"]},
512
    ),
513
    # [Multimodal]
514
    "CLIPModel": _HfExamplesInfo(os.path.join(models_path_prefix, "openai/clip-vit-base-patch32")),
zhuwenwen's avatar
zhuwenwen committed
515
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "royokong/e5-v")),
516
    "Phi3VForCausalLM": _HfExamplesInfo(
517
        os.path.join(models_path_prefix, "TIGER-Lab/VLM2Vec-Full"), trust_remote_code=True
518
    ),
519
520
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "MrLight/dse-qwen2-2b-mrl-v1")),
    "SiglipModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/siglip-base-patch16-224")),
521
    "PrithviGeoSpatialMAE": _HfExamplesInfo(
522
        os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"),
523
        dtype="float16",
524
        enforce_eager=True,
525
526
        require_embed_inputs=True,
        # This is to avoid the model going OOM in CI
527
528
529
        max_num_seqs=32,
    ),
    "Terratorch": _HfExamplesInfo(
530
        os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"),
531
        dtype="float16",
532
        enforce_eager=True,
533
        require_embed_inputs=True,
534
535
536
        # This is to avoid the model going OOM in CI
        max_num_seqs=32,
    ),
537
538
}

539
540
_SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS = {
    # [Decoder-only]
541
    "GPT2ForSequenceClassification": _HfExamplesInfo(
542
        os.path.join(models_path_prefix, "nie3e/sentiment-polish-gpt2-small")
543
    ),
544
    # [Cross-encoder]
545
    "BertForSequenceClassification": _HfExamplesInfo(
546
        os.path.join(models_path_prefix, "cross-encoder/ms-marco-MiniLM-L-6-v2")
547
    ),
548
    "BertForTokenClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "boltuix/NeuroBERT-NER")),
549
    "GteNewForSequenceClassification": _HfExamplesInfo(
550
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-multilingual-reranker-base"),
551
552
553
554
        trust_remote_code=True,
        hf_overrides={"architectures": ["GteNewForSequenceClassification"]},
    ),
    "ModernBertForSequenceClassification": _HfExamplesInfo(
555
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-reranker-modernbert-base")
556
    ),
557
    "ModernBertForTokenClassification": _HfExamplesInfo(
558
        os.path.join(models_path_prefix, "disham993/electrical-ner-ModernBERT-base")
559
    ),
560
    "RobertaForSequenceClassification": _HfExamplesInfo(
561
        os.path.join(models_path_prefix, "cross-encoder/quora-roberta-base")
562
    ),
563
    "XLMRobertaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-m3")),
564
565
}

566
567
_AUTOMATIC_CONVERTED_MODELS = {
    # Use as_seq_cls_model for automatic conversion
568
    "GemmaForSequenceClassification": _HfExamplesInfo(
569
        os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-gemma"),
570
571
572
573
574
575
576
        hf_overrides={
            "architectures": ["GemmaForSequenceClassification"],
            "classifier_from_token": ["Yes"],
            "method": "no_post_processing",
        },
    ),
    "LlamaForSequenceClassification": _HfExamplesInfo(
577
        os.path.join(models_path_prefix, "Skywork/Skywork-Reward-V2-Llama-3.2-1B")
578
    ),
579
    "Qwen2ForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "jason9693/Qwen2.5-1.5B-apeach")),
580
    "Qwen3ForSequenceClassification": _HfExamplesInfo(
581
        os.path.join(models_path_prefix, "tomaarsen/Qwen3-Reranker-0.6B-seq-cls")
582
    ),
583
    "Qwen3ForTokenClassification": _HfExamplesInfo("bd2lcco/Qwen3-0.6B-finetuned"),
584
585
}

586
587
_MULTIMODAL_EXAMPLE_MODELS = {
    # [Decoder-only]
588
    "AriaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "rhymes-ai/Aria")),
589
    "AudioFlamingo3ForConditionalGeneration": _HfExamplesInfo(
590
        os.path.join(models_path_prefix, "nvidia/audio-flamingo-3-hf"), min_transformers_version="5.0.0.dev"
591
    ),
592
593
    "AyaVisionForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "CohereLabs/aya-vision-8b")),
    "BagelForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "ByteDance-Seed/BAGEL-7B-MoT")),
594
    "BeeForConditionalGeneration": _HfExamplesInfo(
595
        os.path.join(models_path_prefix, "Open-Bee/Bee-8B-RL"),
596
597
        trust_remote_code=True,
    ),
598
    "Blip2ForConditionalGeneration": _HfExamplesInfo(
599
600
        os.path.join(models_path_prefix, "Salesforce/blip2-opt-2.7b"),
        extras={"6b": os.path.join(models_path_prefix, "Salesforce/blip2-opt-6.7b")},
601
    ),
602
    "ChameleonForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/chameleon-7b")),
603
    "Cohere2VisionForConditionalGeneration": _HfExamplesInfo(
604
        os.path.join(models_path_prefix, "CohereLabs/command-a-vision-07-2025")
605
606
    ),
    "DeepseekVLV2ForCausalLM": _HfExamplesInfo(
607
608
        os.path.join(models_path_prefix, "deepseek-ai/deepseek-vl2-tiny"),
        extras={"fork": os.path.join(models_path_prefix, "Isotr0py/deepseek-vl2-tiny")},
609
610
611
612
        max_transformers_version="4.48",
        transformers_version_reason="HF model is not compatible.",
        hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]},
    ),
613
    "DeepseekOCRForCausalLM": _HfExamplesInfo(
614
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-OCR"),
615
    ),
616
    "DotsOCRForCausalLM": _HfExamplesInfo(
617
       os.path.join(models_path_prefix,  "rednote-hilab/dots.ocr"), trust_remote_code=True
618
    ),
619
    "Emu3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/Emu3-Chat-hf")),
620
    "Ernie4_5_VLMoeForConditionalGeneration": _HfExamplesInfo(
621
        os.path.join(models_path_prefix, "baidu/ERNIE-4.5-VL-28B-A3B-PT"),
622
623
        trust_remote_code=True,
    ),
624
625
    "FuyuForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/fuyu-8b")),
    "Gemma3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-4b-it")),
626
    "Gemma3nForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3n-E2B-it")),
627
    "GraniteSpeechForConditionalGeneration": _HfExamplesInfo(
628
        os.path.join(models_path_prefix, "ibm-granite/granite-speech-3.3-2b")
629
630
    ),
    "GLM4VForCausalLM": _HfExamplesInfo(
631
        os.path.join(models_path_prefix, "zai-org/glm-4v-9b"),
632
633
634
        trust_remote_code=True,
        hf_overrides={"architectures": ["GLM4VForCausalLM"]},
    ),
635
636
    "Glm4vForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.1V-9B-Thinking")),
    "Glm4vMoeForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.5V")),
637
    "H2OVLChatModel": _HfExamplesInfo(
638
        os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-800m"),
639
        trust_remote_code=True,
640
        extras={"2b": os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-2b")},
641
642
643
644
        max_transformers_version="4.48",
        transformers_version_reason="HF model is not compatible.",
    ),
    "HCXVisionForCausalLM": _HfExamplesInfo(
645
        os.path.join(models_path_prefix, "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"),
646
647
        trust_remote_code=True,
    ),
648
    "HunYuanVLForConditionalGeneration": _HfExamplesInfo(
649
        os.path.join(models_path_prefix, "tencent/HunyuanOCR"),
650
        hf_overrides={"num_experts": 0},
651
    ),
652
    "Idefics3ForConditionalGeneration": _HfExamplesInfo(
653
654
        os.path.join(models_path_prefix, "HuggingFaceM4/Idefics3-8B-Llama3"),
        extras={"tiny": os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM-256M-Instruct")},
655
656
    ),
    "InternS1ForConditionalGeneration": _HfExamplesInfo(
657
        os.path.join(models_path_prefix, "internlm/Intern-S1"), trust_remote_code=True
658
659
    ),
    "InternVLChatModel": _HfExamplesInfo(
660
        os.path.join(models_path_prefix, "OpenGVLab/InternVL2-1B"),
661
        extras={
662
663
664
665
666
            "2B": os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B"),
            "3.0": os.path.join(models_path_prefix, "OpenGVLab/InternVL3-1B"),
            "3.5-qwen3": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-1B"),
            "3.5-qwen3moe": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-30B-A3B"),
            "3.5-gptoss": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview"),
667
668
669
        },
        trust_remote_code=True,
    ),
670
    "InternVLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "OpenGVLab/InternVL3-1B-hf")),
671
    "KeyeForConditionalGeneration": _HfExamplesInfo(
672
        os.path.join(models_path_prefix, "Kwai-Keye/Keye-VL-8B-Preview"),
673
674
675
        trust_remote_code=True,
    ),
    "KeyeVL1_5ForConditionalGeneration": _HfExamplesInfo(
676
        os.path.join(models_path_prefix, "Kwai-Keye/Keye-VL-1_5-8B"),
677
678
679
        trust_remote_code=True,
    ),
    "KimiVLForConditionalGeneration": _HfExamplesInfo(
680
681
        os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Instruct"),
        extras={"thinking": os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Thinking")},
682
        trust_remote_code=True,
683
684
685
686
        max_transformers_version="4.53.3",
        transformers_version_reason="HF model uses deprecated transformers API "
        "(PytorchGELUTanh, DynamicCache.seen_tokens, and more). See: "
        "https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/discussions/31",
687
    ),
688
    "LightOnOCRForConditionalGeneration": _HfExamplesInfo(
689
        os.path.join(models_path_prefix, "lightonai/LightOnOCR-1B-1025")
690
    ),
691
    "Llama4ForConditionalGeneration": _HfExamplesInfo(
692
        os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
693
        max_model_len=10240,
694
        extras={"llama-guard-4": os.path.join(models_path_prefix, "meta-llama/Llama-Guard-4-12B")},
695
696
    ),
    "LlavaForConditionalGeneration": _HfExamplesInfo(
697
        os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
698
        extras={
699
700
            "mistral": os.path.join(models_path_prefix, "mistral-community/pixtral-12b"),
            "mistral-fp8": os.path.join(models_path_prefix, "nm-testing/pixtral-12b-FP8-dynamic"),
701
702
703
        },
    ),
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(
704
        os.path.join(models_path_prefix, "llava-hf/llava-v1.6-mistral-7b-hf")
705
706
    ),
    "LlavaNextVideoForConditionalGeneration": _HfExamplesInfo(
707
        os.path.join(models_path_prefix, "llava-hf/LLaVA-NeXT-Video-7B-hf")
708
709
    ),
    "LlavaOnevisionForConditionalGeneration": _HfExamplesInfo(
710
        os.path.join(models_path_prefix, "llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
711
712
    ),
    "MantisForConditionalGeneration": _HfExamplesInfo(
713
        os.path.join(models_path_prefix, "TIGER-Lab/Mantis-8B-siglip-llama3"),
714
715
716
717
718
        max_transformers_version="4.48",
        transformers_version_reason="HF model is not compatible.",
        hf_overrides={"architectures": ["MantisForConditionalGeneration"]},
    ),
    "MiDashengLMModel": _HfExamplesInfo(
719
        os.path.join(models_path_prefix, "mispeech/midashenglm-7b"), trust_remote_code=True
720
    ),
721
    "MiniCPMO": _HfExamplesInfo(os.path.join(models_path_prefix, "openbmb/MiniCPM-o-2_6"), trust_remote_code=True),
722
    "MiniCPMV": _HfExamplesInfo(
723
        os.path.join(models_path_prefix, "openbmb/MiniCPM-Llama3-V-2_5"),
724
        extras={
725
726
727
            "2.6": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-2_6"),
            "4.0": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-4"),
            "4.5": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-4_5"),
728
729
730
731
        },
        trust_remote_code=True,
    ),
    "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo(
732
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-VL-01"),
733
734
735
        trust_remote_code=True,
    ),
    "Mistral3ForConditionalGeneration": _HfExamplesInfo(
736
737
        os.path.join(models_path_prefix, "mistralai/Mistral-Small-3.1-24B-Instruct-2503"),
        extras={"fp8": os.path.join(models_path_prefix, "nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic")},
738
739
    ),
    "MolmoForCausalLM": _HfExamplesInfo(
740
        os.path.join(models_path_prefix, "allenai/Molmo-7B-D-0924"),
741
742
        max_transformers_version="4.48",
        transformers_version_reason="Incorrectly-detected `tensorflow` import.",
743
        extras={"olmo": os.path.join(models_path_prefix, "allenai/Molmo-7B-O-0924")},
744
745
        trust_remote_code=True,
    ),
746
    "NVLM_D": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/NVLM-D-72B"), trust_remote_code=True),
747
    "Llama_Nemotron_Nano_VL": _HfExamplesInfo(
748
        os.path.join(models_path_prefix, "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1"),
749
750
751
        trust_remote_code=True,
    ),
    "NemotronH_Nano_VL_V2": _HfExamplesInfo(
752
        os.path.join(models_path_prefix, "nano_vl_dummy"), is_available_online=False, trust_remote_code=True
753
    ),
Zero's avatar
Zero committed
754
    "OpenCUAForConditionalGeneration": _HfExamplesInfo(
755
        os.path.join(models_path_prefix, "xlangai/OpenCUA-7B"), trust_remote_code=True
Zero's avatar
Zero committed
756
    ),
757
    "Ovis": _HfExamplesInfo(
758
        os.path.join(models_path_prefix, "AIDC-AI/Ovis2-1B"),
759
760
761
762
        trust_remote_code=True,
        max_transformers_version="4.53",
        transformers_version_reason="HF model is not compatible",
        extras={
763
764
            "1.6-llama": os.path.join(models_path_prefix, "AIDC-AI/Ovis1.6-Llama3.2-3B"),
            "1.6-gemma": os.path.join(models_path_prefix, "AIDC-AI/Ovis1.6-Gemma2-9B"),
765
766
        },
    ),
767
    "Ovis2_5": _HfExamplesInfo(os.path.join(models_path_prefix, "AIDC-AI/Ovis2.5-2B"), trust_remote_code=True),
768
    "PaddleOCRVLForConditionalGeneration": _HfExamplesInfo(
769
       os.path.join(models_path_prefix,  "PaddlePaddle/PaddleOCR-VL"),
770
771
        trust_remote_code=True,
    ),
772
    "PaliGemmaForConditionalGeneration": _HfExamplesInfo(
773
774
        os.path.join(models_path_prefix, "google/paligemma-3b-mix-224"),
        extras={"v2": os.path.join(models_path_prefix, "google/paligemma2-3b-ft-docci-448")},
775
776
    ),
    "Phi3VForCausalLM": _HfExamplesInfo(
777
        os.path.join(models_path_prefix, "microsoft/Phi-3-vision-128k-instruct"),
778
779
780
        trust_remote_code=True,
        max_transformers_version="4.48",
        transformers_version_reason="Use of deprecated imports which have been removed.",  # noqa: E501
781
        extras={"phi3.5": os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")},
782
783
    ),
    "Phi4MMForCausalLM": _HfExamplesInfo(
784
        os.path.join(models_path_prefix, "microsoft/Phi-4-multimodal-instruct"), trust_remote_code=True
785
786
    ),
    "PixtralForConditionalGeneration": _HfExamplesInfo(
787
        os.path.join(models_path_prefix, "mistralai/Pixtral-12B-2409"),
788
        extras={
789
790
            "mistral-large-3": os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4"),
            "ministral-3": os.path.join(models_path_prefix, "mistralai/Ministral-3-3B-Instruct-2512"),
791
        },
792
793
794
        tokenizer_mode="mistral",
    ),
    "QwenVLForConditionalGeneration": _HfExamplesInfo(
795
796
        os.path.join(models_path_prefix, "Qwen/Qwen-VL"),
        extras={"chat": os.path.join(models_path_prefix, "Qwen/Qwen-VL-Chat")},
797
        trust_remote_code=True,
798
799
        max_transformers_version="4.53.3",
        transformers_version_reason="Use of deprecated imports which have been removed.",  # noqa: E501
800
801
802
        hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]},
    ),
    "Qwen2AudioForConditionalGeneration": _HfExamplesInfo(
803
        os.path.join(models_path_prefix, "Qwen/Qwen2-Audio-7B-Instruct")
804
    ),
805
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2-VL-2B-Instruct")),
806
    "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo(
807
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-3B-Instruct"),
808
809
        max_model_len=4096,
    ),
zhuwenwen's avatar
zhuwenwen committed
810
    "Qwen2_5OmniModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-3B")),
811
    "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-7B-AWQ")),
812
    "Qwen3VLForConditionalGeneration": _HfExamplesInfo(
813
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-4B-Instruct"),
814
815
816
817
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
    "Qwen3VLMoeForConditionalGeneration": _HfExamplesInfo(
818
       os.path.join(models_path_prefix,  "Qwen/Qwen3-VL-30B-A3B-Instruct"),
819
820
821
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
822
    "Qwen3OmniMoeForConditionalGeneration": _HfExamplesInfo(
823
        os.path.join(models_path_prefix, "Qwen/Qwen3-Omni-30B-A3B-Instruct"),
824
825
826
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
827
    "RForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "YannQi/R-4B"), trust_remote_code=True),
828
    "SkyworkR1VChatModel": _HfExamplesInfo(
829
        os.path.join(models_path_prefix, "Skywork/Skywork-R1V-38B"), trust_remote_code=True
830
831
    ),
    "SmolVLMForConditionalGeneration": _HfExamplesInfo(
832
        os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM2-2.2B-Instruct")
833
834
    ),
    "Step3VLForConditionalGeneration": _HfExamplesInfo(
835
        os.path.join(models_path_prefix, "stepfun-ai/step3"), trust_remote_code=True
836
837
    ),
    "UltravoxModel": _HfExamplesInfo(
838
        os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_5-llama-3_2-1b"),
839
840
        trust_remote_code=True,
    ),
841
    "TarsierForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "omni-research/Tarsier-7b")),
842
    "Tarsier2ForConditionalGeneration": _HfExamplesInfo(
843
        os.path.join(models_path_prefix, "omni-research/Tarsier2-Recap-7b"),
844
        hf_overrides={
845
            "architectures": [os.path.join(models_path_prefix, "Tarsier2ForConditionalGeneration")],
846
847
            "model_type": "tarsier2",
        },
848
    ),
849
850
851
852
853
    "VoxtralForConditionalGeneration": _HfExamplesInfo(
        "mistralai/Voxtral-Mini-3B-2507",
        # disable this temporarily until we support HF format
        is_available_online=False,
    ),
854
    # [Encoder-decoder]
855
    "WhisperForConditionalGeneration": _HfExamplesInfo(
856
857
        os.path.join(models_path_prefix, "openai/whisper-large-v3-turbo"),
        extras={"v3": os.path.join(models_path_prefix, "openai/whisper-large-v3")},
858
    ),
859
    # [Cross-encoder]
860
    "JinaVLForRanking": _HfExamplesInfo(os.path.join(models_path_prefix, "jinaai/jina-reranker-m0")),
861
862
}

863

864
_SPECULATIVE_DECODING_EXAMPLE_MODELS = {
865
    "MedusaModel": _HfExamplesInfo(
866
        os.path.join(models_path_prefix, "JackFram/llama-68m"), speculative_model=os.path.join(models_path_prefix, "abhigoyal/vllm-medusa-llama-68m-random")
867
    ),
868
869
    # Temporarily disabled.
    # TODO(woosuk): Re-enable this once the MLP Speculator is supported in V1.
870
871
872
873
874
    # "MLPSpeculatorPreTrainedModel": _HfExamplesInfo(
    #     "JackFram/llama-160m",
    #     speculative_model="ibm-ai-platform/llama-160m-accelerator"
    # ),
    "DeepSeekMTPModel": _HfExamplesInfo(
875
876
        os.path.join(models_path_prefix, "luccafong/deepseek_mtp_main_random"),
        speculative_model=os.path.join(models_path_prefix, "luccafong/deepseek_mtp_draft_random"),
877
878
879
        trust_remote_code=True,
    ),
    "EagleDeepSeekMTPModel": _HfExamplesInfo(
880
881
        os.path.join(models_path_prefix, "eagle618/deepseek-v3-random"),
        speculative_model=os.path.join(models_path_prefix, "eagle618/eagle-deepseek-v3-random"),
882
883
884
        trust_remote_code=True,
    ),
    "EagleLlamaForCausalLM": _HfExamplesInfo(
885
       os.path.join(models_path_prefix,  "meta-llama/Meta-Llama-3-8B-Instruct"),
886
        trust_remote_code=True,
887
888
        speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE-LLaMA3-Instruct-8B"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct"),
889
890
    ),
    "Eagle3LlamaForCausalLM": _HfExamplesInfo(
891
        os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct"),
892
        trust_remote_code=True,
893
894
        speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct"),
895
896
897
        use_original_num_layers=True,
        max_model_len=10240,
    ),
898
    "EagleMistralLarge3ForCausalLM": _HfExamplesInfo(
899
        os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512"),
900
        speculative_model=os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-Eagle"),
901
        # TODO: revert once figuring out OOM in CI
902
903
        is_available_online=False,
    ),
904
    "LlamaForCausalLMEagle3": _HfExamplesInfo(
905
        os.path.join(models_path_prefix, "Qwen/Qwen3-8B"),
906
        trust_remote_code=True,
907
908
        speculative_model=os.path.join(models_path_prefix, "AngelSlim/Qwen3-8B_eagle3"),
        tokenizer=os.path.join(models_path_prefix, "Qwen/Qwen3-8B"),
909
910
        use_original_num_layers=True,
    ),
zhiweiz's avatar
zhiweiz committed
911
    "EagleLlama4ForCausalLM": _HfExamplesInfo(
912
        os.path.join(models_path_prefix, "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct"),
zhiweiz's avatar
zhiweiz committed
913
        trust_remote_code=True,
914
        speculative_model=os.path.join(models_path_prefix, "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct"),
915
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
916
917
    ),
    "EagleMiniCPMForCausalLM": _HfExamplesInfo(
918
        os.path.join(models_path_prefix, "openbmb/MiniCPM-1B-sft-bf16"),
919
        trust_remote_code=True,
920
921
922
        speculative_model=os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"),
        speculative_method=os.path.join(models_path_prefix, "eagle"),
        tokenizer=os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"),
923
924
    ),
    "ErnieMTPModel": _HfExamplesInfo(
925
        os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT"),
926
        trust_remote_code=True,
927
        speculative_model=os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT"),
928
929
    ),
    "Glm4MoeMTPModel": _HfExamplesInfo(
930
        os.path.join(models_path_prefix, "zai-org/GLM-4.5"),
931
932
        speculative_model="zai-org/GLM-4.5",
    ),
XuruiYang's avatar
XuruiYang committed
933
    "LongCatFlashMTPModel": _HfExamplesInfo(
934
        os.path.join(models_path_prefix, os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat")),
XuruiYang's avatar
XuruiYang committed
935
        trust_remote_code=True,
936
        speculative_model=os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat"),
937
938
    ),
    "MiMoMTPModel": _HfExamplesInfo(
939
        os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"),
940
        trust_remote_code=True,
941
        speculative_model=os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"),
942
    ),
943
    "Eagle3Qwen2_5vlForCausalLM": _HfExamplesInfo(
944
945
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-7B-Instruct"),
        speculative_model=os.path.join(models_path_prefix, "Rayzl/qwen2.5-vl-7b-eagle3-sgl"),
946
    ),
947
    "Eagle3Qwen3vlForCausalLM": _HfExamplesInfo(
948
949
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-8B-Instruct"),
        speculative_model=os.path.join(models_path_prefix, "taobao-mnn/Qwen3-VL-8B-Instruct-Eagle3"),
950
    ),
951
    "Qwen3NextMTP": _HfExamplesInfo(
952
        os.path.join(models_path_prefix, "Qwen/Qwen3-Next-80B-A3B-Instruct"), min_transformers_version="4.56.3"
953
    ),
954
955
}

956
_TRANSFORMERS_BACKEND_MODELS = {
957
    "TransformersEmbeddingModel": _HfExamplesInfo(
958
        os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5"), min_transformers_version="5.0.0.dev"
959
960
    ),
    "TransformersForSequenceClassification": _HfExamplesInfo(
961
        os.path.join(models_path_prefix, "papluca/xlm-roberta-base-language-detection"),
962
        min_transformers_version="5.0.0.dev",
963
964
    ),
    "TransformersForCausalLM": _HfExamplesInfo(
965
        os.path.join(models_path_prefix, "hmellor/Ilama-3.2-1B"), trust_remote_code=True
966
    ),
967
    "TransformersMultiModalForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/Emu3-Chat-hf")),
968
    "TransformersMoEForCausalLM": _HfExamplesInfo(
969
        os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924"), min_transformers_version="5.0.0.dev"
970
    ),
971
    "TransformersMultiModalMoEForCausalLM": _HfExamplesInfo(
972
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-30B-A3B-Instruct"), min_transformers_version="5.0.0.dev"
973
974
    ),
    "TransformersMoEEmbeddingModel": _HfExamplesInfo(
975
        os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B"), min_transformers_version="5.0.0.dev"
976
977
    ),
    "TransformersMoEForSequenceClassification": _HfExamplesInfo(
978
        os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B"), min_transformers_version="5.0.0.dev"
979
    ),
980
    "TransformersMultiModalEmbeddingModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-4b-it")),
981
    "TransformersMultiModalForSequenceClassification": _HfExamplesInfo(
982
        os.path.join(models_path_prefix, "google/gemma-3-4b-it")
983
    ),
984
985
}

986
987
988
_EXAMPLE_MODELS = {
    **_TEXT_GENERATION_EXAMPLE_MODELS,
    **_EMBEDDING_EXAMPLE_MODELS,
989
    **_SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS,
990
991
    **_MULTIMODAL_EXAMPLE_MODELS,
    **_SPECULATIVE_DECODING_EXAMPLE_MODELS,
992
    **_TRANSFORMERS_BACKEND_MODELS,
993
994
995
996
997
998
999
1000
1001
}


class HfExampleModels:
    def __init__(self, hf_models: Mapping[str, _HfExamplesInfo]) -> None:
        super().__init__()

        self.hf_models = hf_models

1002
    def get_supported_archs(self) -> Set[str]:
1003
1004
1005
        return self.hf_models.keys()

    def get_hf_info(self, model_arch: str) -> _HfExamplesInfo:
1006
1007
1008
        try:
            return self.hf_models[model_arch]
        except KeyError:
1009
1010
1011
            raise ValueError(
                f"No example model defined for {model_arch}; please update this file."
            ) from None
1012

1013
1014
1015
1016
1017
    def find_hf_info(self, model_id: str) -> _HfExamplesInfo:
        for info in self.hf_models.values():
            if info.default == model_id:
                return info

1018
1019
1020
1021
1022
        # Fallback to extras
        for info in self.hf_models.values():
            if any(extra == model_id for extra in info.extras.values()):
                return info

1023
1024
1025
        raise ValueError(
            f"No example model defined for {model_id}; please update this file."
        )
1026

1027

Patrick von Platen's avatar
Patrick von Platen committed
1028
HF_EXAMPLE_MODELS = HfExampleModels(_EXAMPLE_MODELS)
1029
AUTO_EXAMPLE_MODELS = HfExampleModels(_AUTOMATIC_CONVERTED_MODELS)