registry.py 49.3 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
from collections.abc import Mapping, Set
5
from dataclasses import dataclass, field
6
from typing import Any, Literal
7

zhuwenwen's avatar
zhuwenwen committed
8
import os
9
10
11
import pytest
from packaging.version import Version
from transformers import __version__ as TRANSFORMERS_VERSION
zhuwenwen's avatar
zhuwenwen committed
12
# from ..utils import models_path_prefix
13

zhuwenwen's avatar
zhuwenwen committed
14
models_path_prefix = os.getenv('VLLM_OPTEST_MODELS_PATH') or os.getenv("OPTEST_MODELS_PATH")
15

16
from vllm.config.model import ModelDType, TokenizerMode
17

zhuwenwen's avatar
zhuwenwen committed
18

19
20
21
22
23
24
25
26
@dataclass(frozen=True)
class _HfExamplesInfo:
    default: str
    """The default model to use for testing this architecture."""

    extras: Mapping[str, str] = field(default_factory=dict)
    """Extra models to use for testing this architecture."""

27
    tokenizer: str | None = None
28
29
    """Set the tokenizer to load for this architecture."""

30
    tokenizer_mode: TokenizerMode | str = "auto"
31
32
    """Set the tokenizer type for this architecture."""

33
    speculative_model: str | None = None
34
35
36
37
38
    """
    The default model to use for testing this architecture, which is only used
    for speculative decoding.
    """

39
40
41
42
43
    speculative_method: str | None = None
    """
    The method to use for speculative decoding.
    """

44
    min_transformers_version: str | None = None
45
46
47
48
    """
    The minimum version of HF Transformers that is required to run this model.
    """

49
    max_transformers_version: str | None = None
50
51
52
53
    """
    The maximum version of HF Transformers that this model runs on.
    """

54
    transformers_version_reason: str | None = None
55
56
57
58
    """
    The reason for the minimum/maximum version requirement.
    """

59
    require_embed_inputs: bool = False
60
    """
61
62
    If `True`, enables prompt and multi-modal embedding inputs while
    disabling tokenization.
63
64
65
66
67
68
69
70
71
72
73
74
75
76
    """

    dtype: ModelDType = "auto"
    """
    The data type for the model weights and activations.
    """

    enforce_eager: bool = False
    """
    Whether to enforce eager execution. If True, we will
    disable CUDA graph and always execute the model in eager mode.
    If False, we will use CUDA graph and eager execution in hybrid.
    """

77
78
    is_available_online: bool = True
    """
79
    Set this to `False` if the name of this architecture no longer exists on
80
81
82
83
84
85
    the HF repo. To maintain backwards compatibility, we have not removed them
    from the main model registry, so without this flag the registry tests will
    fail.
    """

    trust_remote_code: bool = False
86
    """The `trust_remote_code` level required to load the model."""
87

88
    hf_overrides: dict[str, Any] = field(default_factory=dict)
89
    """The `hf_overrides` required to load the model."""
90

91
    max_model_len: int | None = None
92
93
94
95
96
    """
    The maximum model length to use for this model. Some models default to a
    length that is too large to fit into memory in CI.
    """

97
    revision: str | None = None
98
99
100
101
102
    """
    The specific revision (commit hash, tag, or branch) to use for the model.
    If not specified, the default revision will be used.
    """

103
    max_num_seqs: int | None = None
104
105
    """Maximum number of sequences to be processed in a single iteration."""

106
107
108
109
110
111
    use_original_num_layers: bool = False
    """
    If True, use the original number of layers from the model config 
    instead of minimal layers for testing.
    """

112
113
114
    def check_transformers_version(
        self,
        *,
115
        on_fail: Literal["error", "skip", "return"],
116
117
        check_min_version: bool = True,
        check_max_version: bool = True,
118
    ) -> str | None:
119
120
121
122
        """
        If the installed transformers version does not meet the requirements,
        perform the given action.
        """
123
124
125
126
        if (
            self.min_transformers_version is None
            and self.max_transformers_version is None
        ):
127
            return None
128
129

        current_version = TRANSFORMERS_VERSION
130
        cur_base_version = Version(current_version).base_version
131
132
133
        min_version = self.min_transformers_version
        max_version = self.max_transformers_version
        msg = f"`transformers=={current_version}` installed, but `transformers"
134
135
        # Only check the base version for the min/max version, otherwise preview
        # models cannot be run because `x.yy.0.dev0`<`x.yy.0`
136
137
138
139
140
        if (
            check_min_version
            and min_version
            and Version(cur_base_version) < Version(min_version)
        ):
141
            msg += f">={min_version}` is required to run this model."
142
143
144
145
146
        elif (
            check_max_version
            and max_version
            and Version(cur_base_version) > Version(max_version)
        ):
147
148
            msg += f"<={max_version}` is required to run this model."
        else:
149
            return None
150

151
152
153
154
155
        if self.transformers_version_reason:
            msg += f" Reason: {self.transformers_version_reason}"

        if on_fail == "error":
            raise RuntimeError(msg)
156
        elif on_fail == "skip":
157
            pytest.skip(msg)
158

159
160
        return msg

161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
    def check_available_online(
        self,
        *,
        on_fail: Literal["error", "skip"],
    ) -> None:
        """
        If the model is not available online, perform the given action.
        """
        if not self.is_available_online:
            msg = "Model is not available online"

            if on_fail == "error":
                raise RuntimeError(msg)
            else:
                pytest.skip(msg)

177
178
179

_TEXT_GENERATION_EXAMPLE_MODELS = {
    # [Decoder-only]
180
    "AfmoeForCausalLM": _HfExamplesInfo(
181
        os.path.join(models_path_prefix, "arcee-ai/Trinity-Nano",
182
183
        is_available_online=False,
    ),
184
185
186
    "ApertusForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "swiss-ai/Apertus-8B-Instruct-2509")),
    "AquilaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat-7B"), trust_remote_code=True),
    "AquilaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat2-7B"), trust_remote_code=True),
187
    "ArceeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "arcee-ai/AFM-4.5B-Base")),
188
    "ArcticForCausalLM": _HfExamplesInfo(
189
        os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-instruct"), trust_remote_code=True
190
191
    ),
    "BaiChuanForCausalLM": _HfExamplesInfo(
192
        os.path.join(models_path_prefix, "baichuan-inc/Baichuan-7B"), trust_remote_code=True
193
194
    ),
    "BaichuanForCausalLM": _HfExamplesInfo(
195
        os.path.join(models_path_prefix, "baichuan-inc/Baichuan2-7B-chat"), trust_remote_code=True
196
197
    ),
    "BailingMoeForCausalLM": _HfExamplesInfo(
198
        os.path.join(models_path_prefix, "inclusionAI/Ling-lite-1.5"), trust_remote_code=True
199
200
    ),
    "BailingMoeV2ForCausalLM": _HfExamplesInfo(
201
        os.path.join(models_path_prefix, "inclusionAI/Ling-mini-2.0"), trust_remote_code=True
202
203
    ),
    "BambaForCausalLM": _HfExamplesInfo(
204
205
        os.path.join(models_path_prefix, "ibm-ai-platform/Bamba-9B-v1"),
        extras={"tiny": os.path.join(models_path_prefix, "hmellor/tiny-random-BambaForCausalLM")},
206
207
    ),
    "BloomForCausalLM": _HfExamplesInfo(
208
        "bigscience/bloom-560m", {"1b": os.path.join(models_path_prefix, "bigscience/bloomz-1b1")}
209
210
    ),
    "ChatGLMModel": _HfExamplesInfo(
211
        os.path.join(models_path_prefix, "zai-org/chatglm3-6b"), trust_remote_code=True, max_transformers_version="4.48"
212
213
    ),
    "ChatGLMForConditionalGeneration": _HfExamplesInfo(
214
        os.path.join(models_path_prefix, "thu-coai/ShieldLM-6B-chatglm3"),
215
216
217
        trust_remote_code=True,
    ),
    "CohereForCausalLM": _HfExamplesInfo(
218
        os.path.join(models_path_prefix, "CohereForAI/c4ai-command-r-v01"), trust_remote_code=True
219
220
    ),
    "Cohere2ForCausalLM": _HfExamplesInfo(
221
        os.path.join(models_path_prefix, "CohereForAI/c4ai-command-r7b-12-2024"),
222
223
        trust_remote_code=True,
    ),
224
    "CwmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/cwm"), min_transformers_version="4.58"),
zhuwenwen's avatar
zhuwenwen committed
225
    "DbrxForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "databricks/dbrx-instruct")),
226
    "DeciLMForCausalLM": _HfExamplesInfo(
227
        os.path.join(models_path_prefix, "nvidia/Llama-3_3-Nemotron-Super-49B-v1"),
228
229
        trust_remote_code=True,
    ),
230
    "DeepseekForCausalLM": _HfExamplesInfo(
231
        os.path.join(models_path_prefix, "deepseek-ai/deepseek-moe-16b-base"),
232
233
        trust_remote_code=True,
    ),
234
    "DeepseekV2ForCausalLM": _HfExamplesInfo(
235
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V2-Lite-Chat"),
236
237
238
        trust_remote_code=True,
    ),
    "DeepseekV3ForCausalLM": _HfExamplesInfo(
239
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3"),
240
241
        trust_remote_code=True,
    ),
242
    "DeepseekV32ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3.2-Exp")),
243
244
    "Ernie4_5ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baidu/ERNIE-4.5-0.3B-PT")),
    "Ernie4_5_MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT")),
245
    "ExaoneForCausalLM": _HfExamplesInfo(
246
247
248
249
        os.path.join(models_path_prefix, "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"), trust_remote_code=True
    ),
    "Exaone4ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "LGAI-EXAONE/EXAONE-4.0-32B")),
    "Fairseq2LlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mgleize/fairseq2-dummy-Llama-3.2-1B")),
250
    "FalconForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-7b")),
251
252
    "FalconH1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/Falcon-H1-0.5B-Base")),
    "FlexOlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Flex-reddit-2x7B-1T")),
253
    "GemmaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")),
254
    "Gemma2ForCausalLM": _HfExamplesInfo(
255
256
        os.path.join(models_path_prefix, "google/gemma-2-9b", extras={"tiny": os.path.join(models_path_prefix, "google/gemma-2-2b-it")}
    ),
257
    "Gemma3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-1b-it")),
258
    "Gemma3nForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3n-E2B-it")),
259
260
    "GlmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/glm-4-9b-chat-hf")),
    "Glm4ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4-9B-0414")),
261
262
    "Glm4MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.5")),
    "GPT2LMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "openai-community/gpt2)", {"alias": os.path.join(models_path_prefix, "gpt2")}),
263
    "GPTBigCodeForCausalLM": _HfExamplesInfo(
264
        os.path.join(models_path_prefix, "bigcode/starcoder"),
265
        extras={
266
267
            "tiny": os.path.join(models_path_prefix, "bigcode/tiny_starcoder_py"),
            "santacoder": os.path.join(models_path_prefix, "bigcode/gpt_bigcode-santacoder"),
268
        },
269
270
    ),
    "GPTJForCausalLM": _HfExamplesInfo(
271
        os.path.join(models_path_prefix, "Milos/slovak-gpt-j-405M"), {"6b": os.path.join(models_path_prefix, "EleutherAI/gpt-j-6b")}
272
273
    ),
    "GPTNeoXForCausalLM": _HfExamplesInfo(
274
        os.path.join(models_path_prefix, "EleutherAI/pythia-70m"), {"1b": os.path.join(models_path_prefix, "EleutherAI/pythia-1.4b")}
275
    ),
276
    "GptOssForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "lmsys/gpt-oss-20b-bf16")),
277
    "GraniteForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerLM-3b"),
278
    "GraniteMoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerMoE-3b")),
279
    "GraniteMoeHybridForCausalLM": _HfExamplesInfo(
280
        os.path.join(models_path_prefix, "ibm-granite/granite-4.0-tiny-preview")
281
282
    ),
    "GraniteMoeSharedForCausalLM": _HfExamplesInfo(
283
        os.path.join(models_path_prefix, "ibm-research/moe-7b-1b-active-shared-experts")
284
285
    ),
    "Grok1ModelForCausalLM": _HfExamplesInfo(
286
        os.path.join(models_path_prefix, "hpcai-tech/grok-1"), trust_remote_code=True
287
    ),
288
    "HunYuanDenseV1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tencent/Hunyuan-7B-Instruct")),
289
    "HunYuanMoEV1ForCausalLM": _HfExamplesInfo(
290
        os.path.join(models_path_prefix, "tencent/Hunyuan-A13B-Instruct"), trust_remote_code=True
291
292
    ),
    "InternLMForCausalLM": _HfExamplesInfo(
293
        os.path.join(models_path_prefix, "internlm/internlm-chat-7b"), trust_remote_code=True
294
295
    ),
    "InternLM2ForCausalLM": _HfExamplesInfo(
296
        os.path.join(models_path_prefix, "internlm/internlm2-chat-7b"), trust_remote_code=True
297
298
    ),
    "InternLM2VEForCausalLM": _HfExamplesInfo(
299
        os.path.join(models_path_prefix, "OpenGVLab/Mono-InternVL-2B"), trust_remote_code=True
300
301
    ),
    "InternLM3ForCausalLM": _HfExamplesInfo(
302
        os.path.join(models_path_prefix, "internlm/internlm3-8b-instruct"), trust_remote_code=True
303
    ),
304
    "JAISLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "inceptionai/jais-13b-chat")),
305
    "JambaForCausalLM": _HfExamplesInfo(
306
        os.path.join(models_path_prefix, "ai21labs/AI21-Jamba-1.5-Mini"),
307
        extras={
308
309
            "tiny": os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-dev"),
            "random": "os.path.join(models_path_prefix, ai21labs/Jamba-tiny-random"),
310
311
        },
    ),
312
    "KimiLinearForCausalLM": _HfExamplesInfo(
313
        os.path.join(models_path_prefix, "moonshotai/Kimi-Linear-48B-A3B-Instruct"), trust_remote_code=True
314
    ),
315
    "Lfm2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "LiquidAI/LFM2-1.2B")),
Paul Pak's avatar
Paul Pak committed
316
    "Lfm2MoeForCausalLM": _HfExamplesInfo(
317
        os.path.join(models_path_prefix, "LiquidAI/LFM2-8B-A1B"), min_transformers_version="4.58"
Paul Pak's avatar
Paul Pak committed
318
    ),
319
    "LlamaForCausalLM": _HfExamplesInfo(
320
        os.path.join(models_path_prefix, "meta-llama/Llama-3.2-1B-Instruct"),
321
        extras={
322
323
324
325
            "guard": os.path.join(models_path_prefix, "meta-llama/Llama-Guard-3-1B"),
            "hermes": os.path.join(models_path_prefix, "NousResearch/Hermes-3-Llama-3.1-8B"),
            "fp8": os.path.join(models_path_prefix, "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8"),
            "tiny": os.path.join(models_path_prefix, "hmellor/tiny-random-LlamaForCausalLM"),
326
327
328
        },
    ),
    "LLaMAForCausalLM": _HfExamplesInfo(
329
        os.path.join(models_path_prefix, "decapoda-research/llama-7b-hf"), is_available_online=False
330
331
    ),
    "Llama4ForCausalLM": _HfExamplesInfo(
332
        os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
333
334
    ),
    "LongcatFlashForCausalLM": _HfExamplesInfo(
335
        os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat"), trust_remote_code=True
336
    ),
337
    "MambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "state-spaces/mamba-130m-hf")),
338
    "Mamba2ForCausalLM": _HfExamplesInfo(
339
        os.path.join(models_path_prefix, "mistralai/Mamba-Codestral-7B-v0.1"),
340
        extras={
341
            "random": os.path.join(models_path_prefix, "yujiepan/mamba2-codestral-v0.1-tiny-random"),
342
343
        },
    ),
344
    "FalconMambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-mamba-7b-instruct")),
345
    "MiniCPMForCausalLM": _HfExamplesInfo(
346
        os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"), trust_remote_code=True
347
348
    ),
    "MiniCPM3ForCausalLM": _HfExamplesInfo(
349
        os.path.join(models_path_prefix, "openbmb/MiniCPM3-4B"), trust_remote_code=True
350
    ),
351
    "MiniMaxForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01-hf")),
352
    "MiniMaxText01ForCausalLM": _HfExamplesInfo(
353
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01"),
354
355
356
357
        trust_remote_code=True,
        revision="a59aa9cbc53b9fb8742ca4e9e1531b9802b6fdc3",
    ),
    "MiniMaxM1ForCausalLM": _HfExamplesInfo(
358
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-M1-40k"), trust_remote_code=True
359
    ),
360
    "MiniMaxM2ForCausalLM": _HfExamplesInfo(
361
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-M2"),
youkaichao's avatar
youkaichao committed
362
        trust_remote_code=True,
363
    ),
zhuwenwen's avatar
zhuwenwen committed
364
    "MistralForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Mistral-7B-Instruct-v0.1")),
365
    "MistralLarge3ForCausalLM": _HfExamplesInfo(
366
        os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4"), is_available_online=False
367
    ),
368
    "MixtralForCausalLM": _HfExamplesInfo(
369
370
        os.path.join(models_path_prefix, "mistralai/Mixtral-8x7B-Instruct-v0.1"),
        {"tiny": os.path.join(models_path_prefix, "TitanML/tiny-mixtral")},
371
    ),
zhuwenwen's avatar
zhuwenwen committed
372
373
374
    "MptForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mpt"), is_available_online=False),
    "MPTForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mosaicml/mpt-7b")),
    "NemotronForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/Minitron-8B-Base")),
375
    "NemotronHForCausalLM": _HfExamplesInfo(
376
        os.path.join(models_path_prefix, "nvidia/Nemotron-H-8B-Base-8K"), trust_remote_code=True
377
    ),
zhuwenwen's avatar
zhuwenwen committed
378
    "OlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-1B-hf")),
zhuwenwen's avatar
zhuwenwen committed
379
    "Olmo2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-2-0425-1B")),
380
    "Olmo3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Olmo-3-7B-Instruct")),
zhuwenwen's avatar
zhuwenwen committed
381
    "OlmoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924-Instruct")),
382
    "OpenPanguMTPModel": _HfExamplesInfo(
383
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1"),
384
385
386
        trust_remote_code=True,
        is_available_online=False,
    ),
387
    "OPTForCausalLM": _HfExamplesInfo(
388
        os.path.join(models_path_prefix, "facebook/opt-125m"), {"1b": os.path.join(models_path_prefix, "facebook/opt-iml-max-1.3b")}
389
390
    ),
    "OrionForCausalLM": _HfExamplesInfo(
391
        os.path.join(models_path_prefix, "OrionStarAI/Orion-14B-Chat", trust_remote_code=True
392
    ),
393
    "OuroForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ByteDance/Ouro-1.4B", trust_remote_code=True),
394
    "PanguEmbeddedForCausalLM": _HfExamplesInfo(
395
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Embedded-7B-V1.1", trust_remote_code=True
396
397
    ),
    "PanguUltraMoEForCausalLM": _HfExamplesInfo(
398
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1",
399
400
401
        trust_remote_code=True,
        is_available_online=False,
    ),
402
403
404
    "PersimmonForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/persimmon-8b-chat")),
    "PhiForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/phi-2")),
    "Phi3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-3-mini-4k-instruct")),
405
    "PhiMoEForCausalLM": _HfExamplesInfo(
406
        os.path.join(models_path_prefix, "microsoft/Phi-3.5-MoE-instruct"), trust_remote_code=True
407
408
    ),
    "Plamo2ForCausalLM": _HfExamplesInfo(
409
        os.path.join(models_path_prefix, "pfnet/plamo-2-1b"),
410
411
        trust_remote_code=True,
    ),
412
    "Plamo3ForCausalLM": _HfExamplesInfo(
413
        os.path.join(models_path_prefix, "pfnet/plamo-3-nict-2b-base"),
414
415
        trust_remote_code=True,
    ),
416
    "QWenLMHeadModel": _HfExamplesInfo(
417
        os.path.join(models_path_prefix, "Qwen/Qwen-7B-Chat"),
418
419
420
421
422
        max_transformers_version="4.53",
        transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers",  # noqa: E501
        trust_remote_code=True,
    ),
    "Qwen2ForCausalLM": _HfExamplesInfo(
423
        os.path.join(models_path_prefix, "Qwen/Qwen2-0.5B-Instruct"), extras={"2.5": os.path.join(models_path_prefix, "Qwen/Qwen2.5-0.5B-Instruct")}
424
    ),
425
426
427
    "Qwen2MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen1.5-MoE-A2.7B-Chat")),
    "Qwen3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-8B")),
    "Qwen3MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B")),
428
    "Qwen3NextForCausalLM": _HfExamplesInfo(
429
430
        os.path.join(models_path_prefix, "Qwen/Qwen3-Next-80B-A3B-Instruct"),
        extras={"tiny-random": os.path.join(models_path_prefix, "tiny-random/qwen3-next-moe")},
431
432
        min_transformers_version="4.56.3",
    ),
433
    "RWForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-40b")),
434
    "SeedOssForCausalLM": _HfExamplesInfo(
435
        os.path.join(models_path_prefix, "ByteDance-Seed/Seed-OSS-36B-Instruct"),
436
437
        trust_remote_code=True,
    ),
438
439
440
441
442
    "SmolLM3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "HuggingFaceTB/SmolLM3-3B")),
    "StableLMEpochForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-zephyr-3b")),
    "StableLmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-3b-4e1t")),
    "Starcoder2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "bigcode/starcoder2-3b")),
    "Step3TextForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stepfun-ai/step3"), trust_remote_code=True),
443
    "SolarForCausalLM": _HfExamplesInfo(
444
        os.path.join(models_path_prefix, "upstage/solar-pro-preview-instruct"), trust_remote_code=True
445
    ),
446
    "TeleChatForCausalLM": _HfExamplesInfo(
447
        os.path.join(models_path_prefix, "chuhac/TeleChat2-35B"), trust_remote_code=True
448
    ),
449
    "TeleChat2ForCausalLM": _HfExamplesInfo(
450
        os.path.join(models_path_prefix, "Tele-AI/TeleChat2-3B"), trust_remote_code=True
451
452
    ),
    "TeleFLMForCausalLM": _HfExamplesInfo(
453
        os.path.join(models_path_prefix, "CofeAI/FLM-2-52B-Instruct-2407"), trust_remote_code=True
454
455
    ),
    "XverseForCausalLM": _HfExamplesInfo(
456
457
        os.path.join(models_path_prefix, "xverse/XVERSE-7B-Chat"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-2-7b"),
458
459
        trust_remote_code=True,
    ),
zhuwenwen's avatar
zhuwenwen committed
460
    "Zamba2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Zyphra/Zamba2-7B-instruct")),
461
    "MiMoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"), trust_remote_code=True),
462
    "Dots1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "rednote-hilab/dots.llm1.inst")),
463
464
465
466
}

_EMBEDDING_EXAMPLE_MODELS = {
    # [Text-only]
467
    "BertModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5")),
468
    "Gemma2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-multilingual-gemma2")),
469
    "Gemma3TextModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/embeddinggemma-300m")),
470
    "GritLM": _HfExamplesInfo(os.path.join(models_path_prefix, "parasail-ai/GritLM-7B-vllm")),
471
    "GteModel": _HfExamplesInfo(
472
        os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-embed-m-v2.0"), trust_remote_code=True
473
474
    ),
    "GteNewModel": _HfExamplesInfo(
475
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-base-en-v1.5"),
476
477
478
479
        trust_remote_code=True,
        hf_overrides={"architectures": ["GteNewModel"]},
    ),
    "InternLM2ForRewardModel": _HfExamplesInfo(
480
        os.path.join(models_path_prefix, "internlm/internlm2-1_8b-reward"), trust_remote_code=True
481
    ),
482
483
    "JambaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-reward-dev")),
    "LlamaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "llama", is_available_online=False)),
484
    "MistralModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")),
485
    "ModernBertModel": _HfExamplesInfo(
486
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-modernbert-base"), trust_remote_code=True
487
488
    ),
    "NomicBertModel": _HfExamplesInfo(
489
        os.path.join(models_path_prefix, "nomic-ai/nomic-embed-text-v2-moe"), trust_remote_code=True
490
    ),
491
    "Qwen2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "ssmits/Qwen2-7B-Instruct-embed-base")),
492
    "Qwen2ForRewardModel": _HfExamplesInfo(
493
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-RM-72B"),
494
495
496
497
        max_transformers_version="4.53",
        transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers",  # noqa: E501
    ),
    "Qwen2ForProcessRewardModel": _HfExamplesInfo(
498
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-PRM-7B"),
499
500
501
        max_transformers_version="4.53",
        transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers",  # noqa: E501
    ),
502
503
504
    "RobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/stsb-roberta-base-v2")),
    "RobertaForMaskedLM": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/all-roberta-large-v1")),
    "XLMRobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/multilingual-e5-small")),
505
    "BertSpladeSparseEmbeddingModel": _HfExamplesInfo(
506
        os.path.join(models_path_prefix, "naver/splade-v3"),
507
        hf_overrides={"architectures": ["BertSpladeSparseEmbeddingModel"]},
508
    ),
509
    # [Multimodal]
510
    "CLIPModel": _HfExamplesInfo(os.path.join(models_path_prefix, "openai/clip-vit-base-patch32")),
zhuwenwen's avatar
zhuwenwen committed
511
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "royokong/e5-v")),
512
    "Phi3VForCausalLM": _HfExamplesInfo(
513
        os.path.join(models_path_prefix, "TIGER-Lab/VLM2Vec-Full"), trust_remote_code=True
514
    ),
515
516
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "MrLight/dse-qwen2-2b-mrl-v1")),
    "SiglipModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/siglip-base-patch16-224")),
517
    "PrithviGeoSpatialMAE": _HfExamplesInfo(
518
        os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"),
519
        dtype="float16",
520
        enforce_eager=True,
521
522
        require_embed_inputs=True,
        # This is to avoid the model going OOM in CI
523
524
525
        max_num_seqs=32,
    ),
    "Terratorch": _HfExamplesInfo(
526
        os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"),
527
        dtype="float16",
528
        enforce_eager=True,
529
        require_embed_inputs=True,
530
531
532
        # This is to avoid the model going OOM in CI
        max_num_seqs=32,
    ),
533

534
535
_SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS = {
    # [Decoder-only]
536
    "GPT2ForSequenceClassification": _HfExamplesInfo(
537
        os.path.join(models_path_prefix, "nie3e/sentiment-polish-gpt2-small")
538
    ),
539
    # [Cross-encoder]
540
    "BertForSequenceClassification": _HfExamplesInfo(
541
        os.path.join(models_path_prefix, "cross-encoder/ms-marco-MiniLM-L-6-v2")
542
    ),
543
    "BertForTokenClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "boltuix/NeuroBERT-NER")),
544
    "GteNewForSequenceClassification": _HfExamplesInfo(
545
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-multilingual-reranker-base"),
546
547
548
549
        trust_remote_code=True,
        hf_overrides={"architectures": ["GteNewForSequenceClassification"]},
    ),
    "ModernBertForSequenceClassification": _HfExamplesInfo(
550
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-reranker-modernbert-base")
551
    ),
552
    "ModernBertForTokenClassification": _HfExamplesInfo(
553
        os.path.join(models_path_prefix, "disham993/electrical-ner-ModernBERT-base")
554
    ),
555
    "RobertaForSequenceClassification": _HfExamplesInfo(
556
        os.path.join(models_path_prefix, "cross-encoder/quora-roberta-base")
557
    ),
558
    "XLMRobertaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-m3")),
559
560
}

561
562
_AUTOMATIC_CONVERTED_MODELS = {
    # Use as_seq_cls_model for automatic conversion
563
    "GemmaForSequenceClassification": _HfExamplesInfo(
564
        os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-gemma"),
565
566
567
568
569
570
571
        hf_overrides={
            "architectures": ["GemmaForSequenceClassification"],
            "classifier_from_token": ["Yes"],
            "method": "no_post_processing",
        },
    ),
    "LlamaForSequenceClassification": _HfExamplesInfo(
572
        os.path.join(models_path_prefix, "Skywork/Skywork-Reward-V2-Llama-3.2-1B")
573
    ),
574
    "Qwen2ForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "jason9693/Qwen2.5-1.5B-apeach")),
575
    "Qwen3ForSequenceClassification": _HfExamplesInfo(
576
        os.path.join(models_path_prefix, "tomaarsen/Qwen3-Reranker-0.6B-seq-cls")
577
    ),
578
579
}

580
581
_MULTIMODAL_EXAMPLE_MODELS = {
    # [Decoder-only]
582
    "AriaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "rhymes-ai/Aria")),
583
    "AyaVisionForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "CohereForAI/aya-vision-8b")),
584
    "BeeForConditionalGeneration": _HfExamplesInfo(
585
        os.path.join(models_path_prefix, "Open-Bee/Bee-8B-RL"),
586
587
        trust_remote_code=True,
    ),
588
    "Blip2ForConditionalGeneration": _HfExamplesInfo(
589
590
        os.path.join(models_path_prefix, "Salesforce/blip2-opt-2.7b"),
        extras={"6b": os.path.join(models_path_prefix, "Salesforce/blip2-opt-6.7b")},
591
    ),
592
    "ChameleonForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/chameleon-7b")),
593
    "Cohere2VisionForConditionalGeneration": _HfExamplesInfo(
594
        os.path.join(models_path_prefix, "CohereLabs/command-a-vision-07-2025")
595
596
    ),
    "DeepseekVLV2ForCausalLM": _HfExamplesInfo(
597
598
        os.path.join(models_path_prefix, "deepseek-ai/deepseek-vl2-tiny"),
        extras={"fork": os.path.join(models_path_prefix, "Isotr0py/deepseek-vl2-tiny")},
599
600
601
602
        max_transformers_version="4.48",
        transformers_version_reason="HF model is not compatible.",
        hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]},
    ),
603
    "DeepseekOCRForCausalLM": _HfExamplesInfo(
604
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-OCR"),
605
    ),
606
    "DotsOCRForCausalLM": _HfExamplesInfo(
607
       os.path.join(models_path_prefix,  "rednote-hilab/dots.ocr"), trust_remote_code=True
608
    ),
609
    "Emu3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/Emu3-Chat-hf")),
610
    "Ernie4_5_VLMoeForConditionalGeneration": _HfExamplesInfo(
611
        os.path.join(models_path_prefix, "baidu/ERNIE-4.5-VL-28B-A3B-PT"),
612
613
        trust_remote_code=True,
    ),
614
615
    "FuyuForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/fuyu-8b")),
    "Gemma3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-4b-it")),
616
    "Gemma3nForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3n-E2B-it")),
617
618
619
620
    "GraniteSpeechForConditionalGeneration": _HfExamplesInfo(
        "ibm-granite/granite-speech-3.3-2b"
    ),
    "GLM4VForCausalLM": _HfExamplesInfo(
621
        os.path.join(models_path_prefix, "zai-org/glm-4v-9b"),
622
623
624
        trust_remote_code=True,
        hf_overrides={"architectures": ["GLM4VForCausalLM"]},
    ),
625
626
    "Glm4vForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.1V-9B-Thinking")),
    "Glm4vMoeForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.5V")),
627
    "H2OVLChatModel": _HfExamplesInfo(
628
        os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-800m"),
629
        trust_remote_code=True,
630
        extras={"2b": os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-2b")},
631
632
633
634
        max_transformers_version="4.48",
        transformers_version_reason="HF model is not compatible.",
    ),
    "HCXVisionForCausalLM": _HfExamplesInfo(
635
        os.path.join(models_path_prefix, "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"),
636
637
        trust_remote_code=True,
    ),
638
    "HunYuanVLForConditionalGeneration": _HfExamplesInfo(
639
        os.path.join(models_path_prefix, "tencent/HunyuanOCR"),
640
641
        is_available_online=False,
    ),
642
    "Idefics3ForConditionalGeneration": _HfExamplesInfo(
643
644
        os.path.join(models_path_prefix, "HuggingFaceM4/Idefics3-8B-Llama3"),
        extras={"tiny": os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM-256M-Instruct")},
645
646
    ),
    "InternS1ForConditionalGeneration": _HfExamplesInfo(
647
        os.path.join(models_path_prefix, "internlm/Intern-S1"), trust_remote_code=True
648
649
    ),
    "InternVLChatModel": _HfExamplesInfo(
650
        os.path.join(models_path_prefix, "OpenGVLab/InternVL2-1B"),
651
        extras={
652
653
654
655
656
            "2B": os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B"),
            "3.0": os.path.join(models_path_prefix, "OpenGVLab/InternVL3-1B"),
            "3.5-qwen3": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-1B"),
            "3.5-qwen3moe": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-30B-A3B"),
            "3.5-gptoss": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview"),
657
658
659
        },
        trust_remote_code=True,
    ),
660
    "InternVLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "OpenGVLab/InternVL3-1B-hf")),
661
    "KeyeForConditionalGeneration": _HfExamplesInfo(
662
        os.path.join(models_path_prefix, "Kwai-Keye/Keye-VL-8B-Preview"),
663
664
665
        trust_remote_code=True,
    ),
    "KeyeVL1_5ForConditionalGeneration": _HfExamplesInfo(
666
        os.path.join(models_path_prefix, "Kwai-Keye/Keye-VL-1_5-8B"),
667
668
669
        trust_remote_code=True,
    ),
    "KimiVLForConditionalGeneration": _HfExamplesInfo(
670
671
        os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Instruct"),
        extras={"thinking": os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Thinking")},
672
673
        trust_remote_code=True,
    ),
674
    "LightOnOCRForConditionalGeneration": _HfExamplesInfo(
675
        os.path.join(models_path_prefix, "lightonai/LightOnOCR-1B"),
676
677
        is_available_online=False,
    ),
678
    "Llama4ForConditionalGeneration": _HfExamplesInfo(
679
        os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
680
        max_model_len=10240,
681
        extras={"llama-guard-4": os.path.join(models_path_prefix, "meta-llama/Llama-Guard-4-12B")},
682
683
    ),
    "LlavaForConditionalGeneration": _HfExamplesInfo(
684
        os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
685
        extras={
686
687
            "mistral": os.path.join(models_path_prefix, "mistral-community/pixtral-12b"),
            "mistral-fp8": os.path.join(models_path_prefix, "nm-testing/pixtral-12b-FP8-dynamic"),
688
689
690
        },
    ),
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(
691
        os.path.join(models_path_prefix, "llava-hf/llava-v1.6-mistral-7b-hf")
692
693
    ),
    "LlavaNextVideoForConditionalGeneration": _HfExamplesInfo(
694
        os.path.join(models_path_prefix, "llava-hf/LLaVA-NeXT-Video-7B-hf")
695
696
    ),
    "LlavaOnevisionForConditionalGeneration": _HfExamplesInfo(
697
        os.path.join(models_path_prefix, "llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
698
699
    ),
    "MantisForConditionalGeneration": _HfExamplesInfo(
700
        os.path.join(models_path_prefix, "TIGER-Lab/Mantis-8B-siglip-llama3"),
701
702
703
704
705
        max_transformers_version="4.48",
        transformers_version_reason="HF model is not compatible.",
        hf_overrides={"architectures": ["MantisForConditionalGeneration"]},
    ),
    "MiDashengLMModel": _HfExamplesInfo(
706
        os.path.join(models_path_prefix, "mispeech/midashenglm-7b"), trust_remote_code=True
707
    ),
708
    "MiniCPMO": _HfExamplesInfo(os.path.join(models_path_prefix, "openbmb/MiniCPM-o-2_6"), trust_remote_code=True),
709
    "MiniCPMV": _HfExamplesInfo(
710
        os.path.join(models_path_prefix, "openbmb/MiniCPM-Llama3-V-2_5"),
711
        extras={
712
713
714
            "2.6": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-2_6"),
            "4.0": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-4"),
            "4.5": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-4_5"),
715
716
717
718
        },
        trust_remote_code=True,
    ),
    "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo(
719
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-VL-01"),
720
721
722
        trust_remote_code=True,
    ),
    "Mistral3ForConditionalGeneration": _HfExamplesInfo(
723
724
        os.path.join(models_path_prefix, "mistralai/Mistral-Small-3.1-24B-Instruct-2503"),
        extras={"fp8": os.path.join(models_path_prefix, "nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic")},
725
726
    ),
    "MolmoForCausalLM": _HfExamplesInfo(
727
        os.path.join(models_path_prefix, "allenai/Molmo-7B-D-0924"),
728
729
        max_transformers_version="4.48",
        transformers_version_reason="Incorrectly-detected `tensorflow` import.",
730
        extras={"olmo": os.path.join(models_path_prefix, "allenai/Molmo-7B-O-0924")},
731
732
        trust_remote_code=True,
    ),
733
    "NVLM_D": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/NVLM-D-72B"), trust_remote_code=True),
734
    "Llama_Nemotron_Nano_VL": _HfExamplesInfo(
735
        os.path.join(models_path_prefix, "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1"),
736
737
738
        trust_remote_code=True,
    ),
    "NemotronH_Nano_VL_V2": _HfExamplesInfo(
739
        os.path.join(models_path_prefix, "nano_vl_dummy"), is_available_online=False, trust_remote_code=True
740
    ),
Zero's avatar
Zero committed
741
    "OpenCUAForConditionalGeneration": _HfExamplesInfo(
742
        os.path.join(models_path_prefix, "xlangai/OpenCUA-7B"), trust_remote_code=True
Zero's avatar
Zero committed
743
    ),
744
    "Ovis": _HfExamplesInfo(
745
        os.path.join(models_path_prefix, "AIDC-AI/Ovis2-1B"),
746
747
748
749
        trust_remote_code=True,
        max_transformers_version="4.53",
        transformers_version_reason="HF model is not compatible",
        extras={
750
751
            "1.6-llama": os.path.join(models_path_prefix, "AIDC-AI/Ovis1.6-Llama3.2-3B"),
            "1.6-gemma": os.path.join(models_path_prefix, "AIDC-AI/Ovis1.6-Gemma2-9B"),
752
753
        },
    ),
754
    "Ovis2_5": _HfExamplesInfo(os.path.join(models_path_prefix, "AIDC-AI/Ovis2.5-2B"), trust_remote_code=True),
755
    "PaddleOCRVLForConditionalGeneration": _HfExamplesInfo(
756
       os.path.join(models_path_prefix,  "PaddlePaddle/PaddleOCR-VL"),
757
758
        trust_remote_code=True,
    ),
759
    "PaliGemmaForConditionalGeneration": _HfExamplesInfo(
760
761
        os.path.join(models_path_prefix, "google/paligemma-3b-mix-224"),
        extras={"v2": os.path.join(models_path_prefix, "google/paligemma2-3b-ft-docci-448")},
762
763
    ),
    "Phi3VForCausalLM": _HfExamplesInfo(
764
        os.path.join(models_path_prefix, "microsoft/Phi-3-vision-128k-instruct"),
765
766
767
        trust_remote_code=True,
        max_transformers_version="4.48",
        transformers_version_reason="Use of deprecated imports which have been removed.",  # noqa: E501
768
        extras={"phi3.5": os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")},
769
770
    ),
    "Phi4MMForCausalLM": _HfExamplesInfo(
771
        os.path.join(models_path_prefix, "microsoft/Phi-4-multimodal-instruct"), trust_remote_code=True
772
773
    ),
    "Phi4MultimodalForCausalLM": _HfExamplesInfo(
774
        os.path.join(models_path_prefix, "microsoft/Phi-4-multimodal-instruct"),
775
776
777
        revision="refs/pr/70",
    ),
    "PixtralForConditionalGeneration": _HfExamplesInfo(
778
        os.path.join(models_path_prefix, "mistralai/Pixtral-12B-2409"),
779
        extras={
780
781
            "mistral-large-3": os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4"),
            "ministral-3": os.path.join(models_path_prefix, "mistralai/Ministral-3-3B-Instruct-2512"),
782
        },
783
        tokenizer_mode="mistral",
784
785
        # TODO: revert once Mistral-Large-3 and Ministral-3 are publicly available.
        is_available_online=False,
786
787
    ),
    "QwenVLForConditionalGeneration": _HfExamplesInfo(
788
789
        os.path.join(models_path_prefix, "Qwen/Qwen-VL"),
        extras={"chat": os.path.join(models_path_prefix, "Qwen/Qwen-VL-Chat")},
790
        trust_remote_code=True,
791
792
        max_transformers_version="4.53.3",
        transformers_version_reason="Use of deprecated imports which have been removed.",  # noqa: E501
793
794
795
        hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]},
    ),
    "Qwen2AudioForConditionalGeneration": _HfExamplesInfo(
796
        os.path.join(models_path_prefix, "Qwen/Qwen2-Audio-7B-Instruct")
797
    ),
798
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2-VL-2B-Instruct")),
799
    "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo(
800
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-3B-Instruct"),
801
802
        max_model_len=4096,
    ),
zhuwenwen's avatar
zhuwenwen committed
803
    "Qwen2_5OmniModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-3B")),
804
    "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-7B-AWQ")),
805
    "Qwen3VLForConditionalGeneration": _HfExamplesInfo(
806
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-4B-Instruct"),
807
808
809
810
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
    "Qwen3VLMoeForConditionalGeneration": _HfExamplesInfo(
811
       os.path.join(models_path_prefix,  "Qwen/Qwen3-VL-30B-A3B-Instruct"),
812
813
814
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
815
    "Qwen3OmniMoeForConditionalGeneration": _HfExamplesInfo(
816
        os.path.join(models_path_prefix, "Qwen/Qwen3-Omni-30B-A3B-Instruct"),
817
818
819
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
820
    "RForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "YannQi/R-4B"), trust_remote_code=True),
821
    "SkyworkR1VChatModel": _HfExamplesInfo(
822
        os.path.join(models_path_prefix, "Skywork/Skywork-R1V-38B"), trust_remote_code=True
823
824
    ),
    "SmolVLMForConditionalGeneration": _HfExamplesInfo(
825
        os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM2-2.2B-Instruct")
826
827
    ),
    "Step3VLForConditionalGeneration": _HfExamplesInfo(
828
        os.path.join(models_path_prefix, "stepfun-ai/step3"), trust_remote_code=True
829
830
    ),
    "UltravoxModel": _HfExamplesInfo(
831
        os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_5-llama-3_2-1b"),
832
833
        trust_remote_code=True,
    ),
834
    "TarsierForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "omni-research/Tarsier-7b")),
835
    "Tarsier2ForConditionalGeneration": _HfExamplesInfo(
836
        os.path.join(models_path_prefix, "omni-research/Tarsier2-Recap-7b"),
837
838
        hf_overrides={"architectures": ["Tarsier2ForConditionalGeneration"]},
    ),
839
840
841
842
843
    "VoxtralForConditionalGeneration": _HfExamplesInfo(
        "mistralai/Voxtral-Mini-3B-2507",
        # disable this temporarily until we support HF format
        is_available_online=False,
    ),
844
    # [Encoder-decoder]
845
    "WhisperForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "openai/whisper-large-v3")),
846
    # [Cross-encoder]
847
    "JinaVLForRanking": _HfExamplesInfo(os.path.join(models_path_prefix, "jinaai/jina-reranker-m0")),
848
849
}

850

851
_SPECULATIVE_DECODING_EXAMPLE_MODELS = {
852
    "MedusaModel": _HfExamplesInfo(
853
        os.path.join(models_path_prefix, "JackFram/llama-68m"), speculative_model=os.path.join(models_path_prefix, "abhigoyal/vllm-medusa-llama-68m-random")
854
    ),
855
856
    # Temporarily disabled.
    # TODO(woosuk): Re-enable this once the MLP Speculator is supported in V1.
857
858
859
860
861
    # "MLPSpeculatorPreTrainedModel": _HfExamplesInfo(
    #     "JackFram/llama-160m",
    #     speculative_model="ibm-ai-platform/llama-160m-accelerator"
    # ),
    "DeepSeekMTPModel": _HfExamplesInfo(
862
863
        os.path.join(models_path_prefix, "luccafong/deepseek_mtp_main_random"),
        speculative_model=os.path.join(models_path_prefix, "luccafong/deepseek_mtp_draft_random"),
864
865
866
        trust_remote_code=True,
    ),
    "EagleDeepSeekMTPModel": _HfExamplesInfo(
867
868
        os.path.join(models_path_prefix, "eagle618/deepseek-v3-random"),
        speculative_model=os.path.join(models_path_prefix, "eagle618/eagle-deepseek-v3-random"),
869
870
871
        trust_remote_code=True,
    ),
    "EagleLlamaForCausalLM": _HfExamplesInfo(
872
       os.path.join(models_path_prefix,  "meta-llama/Meta-Llama-3-8B-Instruct"),
873
        trust_remote_code=True,
874
875
        speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE-LLaMA3-Instruct-8B"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct"),
876
877
    ),
    "Eagle3LlamaForCausalLM": _HfExamplesInfo(
878
        os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct"),
879
        trust_remote_code=True,
880
881
        speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct"),
882
883
884
        use_original_num_layers=True,
        max_model_len=10240,
    ),
885
    "EagleMistralLarge3ForCausalLM": _HfExamplesInfo(
886
887
        os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512"),
        speculative_model="os.path.join(models_path_prefix, mistralai/Mistral-Large-3-675B-Instruct-2512-Eagle"),
888
889
        is_available_online=False,
    ),
890
    "LlamaForCausalLMEagle3": _HfExamplesInfo(
891
        os.path.join(models_path_prefix, "Qwen/Qwen3-8B"),
892
        trust_remote_code=True,
893
894
        speculative_model=os.path.join(models_path_prefix, "AngelSlim/Qwen3-8B_eagle3"),
        tokenizer=os.path.join(models_path_prefix, "Qwen/Qwen3-8B"),
895
896
        use_original_num_layers=True,
    ),
zhiweiz's avatar
zhiweiz committed
897
    "EagleLlama4ForCausalLM": _HfExamplesInfo(
898
        os.path.join(models_path_prefix, "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct"),
zhiweiz's avatar
zhiweiz committed
899
        trust_remote_code=True,
900
        speculative_model=os.path.join(models_path_prefix, "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct"),
901
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
902
903
    ),
    "EagleMiniCPMForCausalLM": _HfExamplesInfo(
904
        os.path.join(models_path_prefix, "openbmb/MiniCPM-1B-sft-bf16"),
905
        trust_remote_code=True,
906
907
908
        speculative_model=os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"),
        speculative_method=os.path.join(models_path_prefix, "eagle"),
        tokenizer=os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"),
909
910
    ),
    "ErnieMTPModel": _HfExamplesInfo(
911
        os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT"),
912
        trust_remote_code=True,
913
        speculative_model=os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT"),
914
915
    ),
    "Glm4MoeMTPModel": _HfExamplesInfo(
916
        os.path.join(models_path_prefix, "zai-org/GLM-4.5"),
917
918
        speculative_model="zai-org/GLM-4.5",
    ),
XuruiYang's avatar
XuruiYang committed
919
    "LongCatFlashMTPModel": _HfExamplesInfo(
920
        os.path.join(models_path_prefix, os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat")),
XuruiYang's avatar
XuruiYang committed
921
        trust_remote_code=True,
922
        speculative_model=os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat"),
923
924
    ),
    "MiMoMTPModel": _HfExamplesInfo(
925
        os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"),
926
        trust_remote_code=True,
927
        speculative_model=os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"),
928
    ),
929
    "Eagle3Qwen2_5vlForCausalLM": _HfExamplesInfo(
930
931
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-7B-Instruct"),
        speculative_model=os.path.join(models_path_prefix, "Rayzl/qwen2.5-vl-7b-eagle3-sgl"),
932
    ),
933
    "Eagle3Qwen3vlForCausalLM": _HfExamplesInfo(
934
935
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-8B-Instruct"),
        speculative_model=os.path.join(models_path_prefix, "taobao-mnn/Qwen3-VL-8B-Instruct-Eagle3"),
936
    ),
937
    "Qwen3NextMTP": _HfExamplesInfo(
938
        os.path.join(models_path_prefix, "Qwen/Qwen3-Next-80B-A3B-Instruct"), min_transformers_version="4.56.3"
939
    ),
940
941
}

942
_TRANSFORMERS_BACKEND_MODELS = {
943
    "TransformersEmbeddingModel": _HfExamplesInfo(
944
        os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5"), min_transformers_version="5.0.0.dev"
945
946
    ),
    "TransformersForSequenceClassification": _HfExamplesInfo(
947
        os.path.join(models_path_prefix, "papluca/xlm-roberta-base-language-detection"),
948
        min_transformers_version="5.0.0.dev",
949
950
    ),
    "TransformersForCausalLM": _HfExamplesInfo(
951
        os.path.join(models_path_prefix, "hmellor/Ilama-3.2-1B"), trust_remote_code=True
952
    ),
953
    "TransformersMultiModalForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/Emu3-Chat-hf")),
954
    "TransformersMoEForCausalLM": _HfExamplesInfo(
955
        os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924"), min_transformers_version="5.0.0.dev"
956
    ),
957
    "TransformersMultiModalMoEForCausalLM": _HfExamplesInfo(
958
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-30B-A3B-Instruct"), min_transformers_version="5.0.0.dev"
959
960
    ),
    "TransformersMoEEmbeddingModel": _HfExamplesInfo(
961
        os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B"), min_transformers_version="5.0.0.dev"
962
963
    ),
    "TransformersMoEForSequenceClassification": _HfExamplesInfo(
964
        os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B"), min_transformers_version="5.0.0.dev"
965
    ),
966
    "TransformersMultiModalEmbeddingModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-4b-it")),
967
    "TransformersMultiModalForSequenceClassification": _HfExamplesInfo(
968
        os.path.join(models_path_prefix, "google/gemma-3-4b-it")
969
    ),
970
971
}

972
973
974
_EXAMPLE_MODELS = {
    **_TEXT_GENERATION_EXAMPLE_MODELS,
    **_EMBEDDING_EXAMPLE_MODELS,
975
    **_SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS,
976
977
    **_MULTIMODAL_EXAMPLE_MODELS,
    **_SPECULATIVE_DECODING_EXAMPLE_MODELS,
978
    **_TRANSFORMERS_BACKEND_MODELS,
979
980
981
982
983
984
985
986
987
}


class HfExampleModels:
    def __init__(self, hf_models: Mapping[str, _HfExamplesInfo]) -> None:
        super().__init__()

        self.hf_models = hf_models

988
    def get_supported_archs(self) -> Set[str]:
989
990
991
        return self.hf_models.keys()

    def get_hf_info(self, model_arch: str) -> _HfExamplesInfo:
992
993
994
        try:
            return self.hf_models[model_arch]
        except KeyError:
995
996
997
            raise ValueError(
                f"No example model defined for {model_arch}; please update this file."
            ) from None
998

999
1000
1001
1002
1003
    def find_hf_info(self, model_id: str) -> _HfExamplesInfo:
        for info in self.hf_models.values():
            if info.default == model_id:
                return info

1004
1005
1006
1007
1008
        # Fallback to extras
        for info in self.hf_models.values():
            if any(extra == model_id for extra in info.extras.values()):
                return info

1009
1010
1011
        raise ValueError(
            f"No example model defined for {model_id}; please update this file."
        )
1012

1013

Patrick von Platen's avatar
Patrick von Platen committed
1014
HF_EXAMPLE_MODELS = HfExampleModels(_EXAMPLE_MODELS)
1015
AUTO_EXAMPLE_MODELS = HfExampleModels(_AUTOMATIC_CONVERTED_MODELS)