registry.py 57.3 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
from collections.abc import Mapping, Set
5
from dataclasses import dataclass, field
6
from typing import Any, Literal
7

zhuwenwen's avatar
zhuwenwen committed
8
import os
9
10
11
import pytest
from packaging.version import Version
from transformers import __version__ as TRANSFORMERS_VERSION
zhuwenwen's avatar
zhuwenwen committed
12
# from ..utils import models_path_prefix
13

zhuwenwen's avatar
zhuwenwen committed
14
models_path_prefix = os.getenv('VLLM_OPTEST_MODELS_PATH') or os.getenv("OPTEST_MODELS_PATH")
15

16
from vllm.config.model import ModelDType, TokenizerMode
17

zhuwenwen's avatar
zhuwenwen committed
18

19
20
21
22
23
24
25
26
@dataclass(frozen=True)
class _HfExamplesInfo:
    default: str
    """The default model to use for testing this architecture."""

    extras: Mapping[str, str] = field(default_factory=dict)
    """Extra models to use for testing this architecture."""

27
    tokenizer: str | None = None
28
29
    """Set the tokenizer to load for this architecture."""

30
    tokenizer_mode: TokenizerMode | str = "auto"
31
32
    """Set the tokenizer type for this architecture."""

33
    speculative_model: str | None = None
34
35
36
37
38
    """
    The default model to use for testing this architecture, which is only used
    for speculative decoding.
    """

39
40
41
42
43
    speculative_method: str | None = None
    """
    The method to use for speculative decoding.
    """

44
    min_transformers_version: str | None = None
45
46
47
48
    """
    The minimum version of HF Transformers that is required to run this model.
    """

49
    max_transformers_version: str | None = None
50
51
52
53
    """
    The maximum version of HF Transformers that this model runs on.
    """

54
    transformers_version_reason: dict[Literal["vllm", "hf"], str] | None = None
55
    """
56
57
58
    The type and reason to skip test for the minimum/maximum version requirement.
    vllm: skip all vLLM tests if the version requirement is not met.
    hf: only skip tests that uses HF runner if the version requirement is not met.
59
60
    """

61
    require_embed_inputs: bool = False
62
    """
63
64
    If `True`, enables prompt and multi-modal embedding inputs while
    disabling tokenization.
65
66
67
68
69
70
71
72
73
74
75
76
77
78
    """

    dtype: ModelDType = "auto"
    """
    The data type for the model weights and activations.
    """

    enforce_eager: bool = False
    """
    Whether to enforce eager execution. If True, we will
    disable CUDA graph and always execute the model in eager mode.
    If False, we will use CUDA graph and eager execution in hybrid.
    """

79
80
    is_available_online: bool = True
    """
81
    Set this to `False` if the name of this architecture no longer exists on
82
83
84
85
86
87
    the HF repo. To maintain backwards compatibility, we have not removed them
    from the main model registry, so without this flag the registry tests will
    fail.
    """

    trust_remote_code: bool = False
88
    """The `trust_remote_code` level required to load the model."""
89

90
    hf_overrides: dict[str, Any] = field(default_factory=dict)
91
    """The `hf_overrides` required to load the model."""
92

93
    max_model_len: int | None = None
94
95
96
97
98
    """
    The maximum model length to use for this model. Some models default to a
    length that is too large to fit into memory in CI.
    """

99
100
101
102
103
    max_num_batched_tokens: int | None = None
    """
    The maximum number of tokens to be processed in a single batch.
    """

104
    revision: str | None = None
105
106
107
108
109
    """
    The specific revision (commit hash, tag, or branch) to use for the model.
    If not specified, the default revision will be used.
    """

110
    max_num_seqs: int | None = None
111
112
    """Maximum number of sequences to be processed in a single iteration."""

113
114
    use_original_num_layers: bool = False
    """
Rayyyyy's avatar
Rayyyyy committed
115
    If True, use the original number of layers from the model config
116
117
118
    instead of minimal layers for testing.
    """

119
120
121
    def check_transformers_version(
        self,
        *,
122
        on_fail: Literal["error", "skip", "return"],
123
        check_version_reason: Literal["vllm", "hf"] = "hf",
124
125
        check_min_version: bool = True,
        check_max_version: bool = True,
126
    ) -> str | None:
127
128
129
130
        """
        If the installed transformers version does not meet the requirements,
        perform the given action.
        """
131
132
133
134
        if (
            self.min_transformers_version is None
            and self.max_transformers_version is None
        ):
135
            return None
136
137

        current_version = TRANSFORMERS_VERSION
138
        cur_base_version = Version(current_version).base_version
139
140
141
        min_version = self.min_transformers_version
        max_version = self.max_transformers_version
        msg = f"`transformers=={current_version}` installed, but `transformers"
142
143
        # Only check the base version for the min/max version, otherwise preview
        # models cannot be run because `x.yy.0.dev0`<`x.yy.0`
144
145
        if min_version and Version(cur_base_version) < Version(min_version):
            is_version_valid = not check_min_version
146
            msg += f">={min_version}` is required to run this model."
147
148
        elif max_version and Version(cur_base_version) > Version(max_version):
            is_version_valid = not check_max_version
149
150
            msg += f"<={max_version}` is required to run this model."
        else:
151
            is_version_valid = True
152

153
154
155
156
157
158
159
160
161
        # check if Transformers version breaks the corresponding model runner,
        # skip test when model runner not compatible
        is_reason_valid = not (
            check_version_reason
            and self.transformers_version_reason
            and check_version_reason in self.transformers_version_reason
        )
        is_transformers_valid = is_version_valid and is_reason_valid
        if is_transformers_valid:
162
            return None
163
164
165
        elif self.transformers_version_reason:
            for reason_type, reason in self.transformers_version_reason.items():
                msg += f" Reason({reason_type}): {reason}"
166
167
168

        if on_fail == "error":
            raise RuntimeError(msg)
169
        elif on_fail == "skip":
170
            pytest.skip(msg)
171

172
173
        return msg

174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
    def check_available_online(
        self,
        *,
        on_fail: Literal["error", "skip"],
    ) -> None:
        """
        If the model is not available online, perform the given action.
        """
        if not self.is_available_online:
            msg = "Model is not available online"

            if on_fail == "error":
                raise RuntimeError(msg)
            else:
                pytest.skip(msg)

190
191
192

_TEXT_GENERATION_EXAMPLE_MODELS = {
    # [Decoder-only]
193
    "AfmoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "arcee-ai/Trinity-Nano-Preview")),
194
195
196
    "ApertusForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "swiss-ai/Apertus-8B-Instruct-2509")),
    "AquilaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat-7B"), trust_remote_code=True),
    "AquilaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat2-7B"), trust_remote_code=True),
197
    "ArceeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "arcee-ai/AFM-4.5B-Base")),
198
    "ArcticForCausalLM": _HfExamplesInfo(
199
        os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-instruct"), trust_remote_code=True
200
201
    ),
    "BaiChuanForCausalLM": _HfExamplesInfo(
202
        os.path.join(models_path_prefix, "baichuan-inc/Baichuan-7B"), trust_remote_code=True
203
204
    ),
    "BaichuanForCausalLM": _HfExamplesInfo(
205
        os.path.join(models_path_prefix, "baichuan-inc/Baichuan2-7B-chat"), trust_remote_code=True
206
207
    ),
    "BailingMoeForCausalLM": _HfExamplesInfo(
208
        os.path.join(models_path_prefix, "inclusionAI/Ling-lite-1.5"), trust_remote_code=True
209
210
    ),
    "BailingMoeV2ForCausalLM": _HfExamplesInfo(
211
        os.path.join(models_path_prefix, "inclusionAI/Ling-mini-2.0"), trust_remote_code=True
212
213
    ),
    "BambaForCausalLM": _HfExamplesInfo(
214
215
        os.path.join(models_path_prefix, "ibm-ai-platform/Bamba-9B-v1"),
        extras={"tiny": os.path.join(models_path_prefix, "hmellor/tiny-random-BambaForCausalLM")},
216
217
    ),
    "BloomForCausalLM": _HfExamplesInfo(
218
        "bigscience/bloom-560m", {"1b": os.path.join(models_path_prefix, "bigscience/bloomz-1b1")}
219
220
    ),
    "ChatGLMModel": _HfExamplesInfo(
221
        os.path.join(models_path_prefix, "zai-org/chatglm3-6b"), trust_remote_code=True, max_transformers_version="4.48"
222
223
    ),
    "ChatGLMForConditionalGeneration": _HfExamplesInfo(
224
        os.path.join(models_path_prefix, "thu-coai/ShieldLM-6B-chatglm3"),
225
226
227
        trust_remote_code=True,
    ),
    "CohereForCausalLM": _HfExamplesInfo(
228
        os.path.join(models_path_prefix, "CohereLabs/c4ai-command-r-v01"), trust_remote_code=True
229
230
    ),
    "Cohere2ForCausalLM": _HfExamplesInfo(
231
        os.path.join(models_path_prefix, "CohereLabs/c4ai-command-r7b-12-2024"),
232
233
        trust_remote_code=True,
    ),
234
    "CwmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/cwm"), min_transformers_version="4.58"),
235
236
    # FIXME: databricks/dbrx-instruct has been deleted
    "DbrxForCausalLM": _HfExamplesInfo(
237
        os.path.join(models_path_prefix, "databricks/dbrx-instruct"), is_available_online=False
238
    ),
239
    "DeciLMForCausalLM": _HfExamplesInfo(
240
        os.path.join(models_path_prefix, "nvidia/Llama-3_3-Nemotron-Super-49B-v1"),
241
242
        trust_remote_code=True,
    ),
243
    "DeepseekForCausalLM": _HfExamplesInfo(
244
        os.path.join(models_path_prefix, "deepseek-ai/deepseek-moe-16b-base"),
245
246
        trust_remote_code=True,
    ),
247
    "DeepseekV2ForCausalLM": _HfExamplesInfo(
248
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V2-Lite-Chat"),
249
250
251
        trust_remote_code=True,
    ),
    "DeepseekV3ForCausalLM": _HfExamplesInfo(
252
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3"),
253
254
        trust_remote_code=True,
    ),
255
    "DeepseekV32ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3.2-Exp")),
256
257
    "Ernie4_5ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baidu/ERNIE-4.5-0.3B-PT")),
    "Ernie4_5_MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT")),
258
    "ExaoneForCausalLM": _HfExamplesInfo(
259
260
261
        os.path.join(models_path_prefix, "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"), trust_remote_code=True
    ),
    "Exaone4ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "LGAI-EXAONE/EXAONE-4.0-32B")),
Kyungmin Lee's avatar
Kyungmin Lee committed
262
    "ExaoneMoEForCausalLM": _HfExamplesInfo(
263
        "LGAI-EXAONE/K-EXAONE-236B-A23B", min_transformers_version="5.1.0"
Kyungmin Lee's avatar
Kyungmin Lee committed
264
    ),
265
    "Fairseq2LlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mgleize/fairseq2-dummy-Llama-3.2-1B")),
266
    "FalconForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-7b")),
267
268
    "FalconH1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/Falcon-H1-0.5B-Base")),
    "FlexOlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Flex-reddit-2x7B-1T")),
269
    "GemmaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")),
270
    "Gemma2ForCausalLM": _HfExamplesInfo(
271
        "google/gemma-2-9b", extras={"tiny": os.path.join(models_path_prefix, "google/gemma-2-2b-it")}
272
    ),
273
    "Gemma3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-1b-it")),
274
    "Gemma3nForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3n-E2B-it")),
275
276
    "GlmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/glm-4-9b-chat-hf")),
    "Glm4ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4-9B-0414")),
277
    "Glm4MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.5")),
278
    "Glm4MoeLiteForCausalLM": _HfExamplesInfo(
279
        os.path.join(models_path_prefix, "zai-org/GLM-4.7-Flash"),
280
        min_transformers_version="5.0.0",
281
    ),
282
    "GPT2LMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "openai-community/gpt2"), {"alias": os.path.join(models_path_prefix, "gpt2")}),
283
    "GPTBigCodeForCausalLM": _HfExamplesInfo(
284
        os.path.join(models_path_prefix, "bigcode/starcoder"),
285
        extras={
286
287
            "tiny": os.path.join(models_path_prefix, "bigcode/tiny_starcoder_py"),
            "santacoder": os.path.join(models_path_prefix, "bigcode/gpt_bigcode-santacoder"),
288
        },
289
290
    ),
    "GPTJForCausalLM": _HfExamplesInfo(
291
        os.path.join(models_path_prefix, "Milos/slovak-gpt-j-405M"), {"6b": os.path.join(models_path_prefix, "EleutherAI/gpt-j-6b")}
292
293
    ),
    "GPTNeoXForCausalLM": _HfExamplesInfo(
294
        os.path.join(models_path_prefix, "EleutherAI/pythia-70m"), {"1b": os.path.join(models_path_prefix, "EleutherAI/pythia-1.4b")}
295
    ),
296
    "GptOssForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "lmsys/gpt-oss-20b-bf16")),
297
    "GraniteForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerLM-3b")),
298
    "GraniteMoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerMoE-3b")),
299
    "GraniteMoeHybridForCausalLM": _HfExamplesInfo(
300
        os.path.join(models_path_prefix, "ibm-granite/granite-4.0-tiny-preview")
301
302
    ),
    "GraniteMoeSharedForCausalLM": _HfExamplesInfo(
303
        os.path.join(models_path_prefix, "ibm-research/moe-7b-1b-active-shared-experts")
304
305
    ),
    "Grok1ModelForCausalLM": _HfExamplesInfo(
306
        os.path.join(models_path_prefix, "hpcai-tech/grok-1"), trust_remote_code=True
307
    ),
308
    "Grok1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "xai-org/grok-2"), trust_remote_code=True),
309
    "HunYuanDenseV1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tencent/Hunyuan-7B-Instruct")),
310
    "HunYuanMoEV1ForCausalLM": _HfExamplesInfo(
311
        os.path.join(models_path_prefix, "tencent/Hunyuan-A13B-Instruct"), trust_remote_code=True
312
313
    ),
    "InternLMForCausalLM": _HfExamplesInfo(
314
        os.path.join(models_path_prefix, "internlm/internlm-chat-7b"), trust_remote_code=True
315
316
    ),
    "InternLM2ForCausalLM": _HfExamplesInfo(
317
        os.path.join(models_path_prefix, "internlm/internlm2-chat-7b"), trust_remote_code=True
318
319
    ),
    "InternLM2VEForCausalLM": _HfExamplesInfo(
320
        os.path.join(models_path_prefix, "OpenGVLab/Mono-InternVL-2B"), trust_remote_code=True
321
322
    ),
    "InternLM3ForCausalLM": _HfExamplesInfo(
323
        os.path.join(models_path_prefix, "internlm/internlm3-8b-instruct"), trust_remote_code=True
324
    ),
325
    "JAISLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "inceptionai/jais-13b-chat")),
326
    "Jais2ForCausalLM": _HfExamplesInfo(
327
        os.path.join(models_path_prefix, "inceptionai/Jais-2-8B-Chat"), min_transformers_version="4.58"
328
    ),
329
    "IQuestCoderForCausalLM": _HfExamplesInfo(
330
        os.path.join(models_path_prefix, "IQuestLab/IQuest-Coder-V1-40B-Instruct"), trust_remote_code=True
331
332
    ),
    "IQuestLoopCoderForCausalLM": _HfExamplesInfo(
333
        os.path.join(models_path_prefix, "IQuestLab/IQuest-Coder-V1-40B-Loop-Instruct"), trust_remote_code=True
334
    ),
335
    "JAISLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "inceptionai/jais-13b-chat")),
336
    "Jais2ForCausalLM": _HfExamplesInfo(
337
        os.path.join(models_path_prefix, "inceptionai/Jais-2-8B-Chat"), min_transformers_version="4.58"
338
    ),
339
    "JambaForCausalLM": _HfExamplesInfo(
340
        os.path.join(models_path_prefix, "ai21labs/AI21-Jamba-1.5-Mini"),
341
        extras={
342
            "tiny": os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-dev"),
343
            "random": os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-random"),
344
345
        },
    ),
346
    "KimiLinearForCausalLM": _HfExamplesInfo(
347
        os.path.join(models_path_prefix, "moonshotai/Kimi-Linear-48B-A3B-Instruct"), trust_remote_code=True
348
    ),
349
    "Lfm2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "LiquidAI/LFM2-1.2B")),
Paul Pak's avatar
Paul Pak committed
350
    "Lfm2MoeForCausalLM": _HfExamplesInfo(
351
        os.path.join(models_path_prefix, "LiquidAI/LFM2-8B-A1B"), min_transformers_version="4.58"
Paul Pak's avatar
Paul Pak committed
352
    ),
353
    "LlamaForCausalLM": _HfExamplesInfo(
354
        os.path.join(models_path_prefix, "meta-llama/Llama-3.2-1B-Instruct"),
355
        extras={
356
357
358
359
            "guard": os.path.join(models_path_prefix, "meta-llama/Llama-Guard-3-1B"),
            "hermes": os.path.join(models_path_prefix, "NousResearch/Hermes-3-Llama-3.1-8B"),
            "fp8": os.path.join(models_path_prefix, "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8"),
            "tiny": os.path.join(models_path_prefix, "hmellor/tiny-random-LlamaForCausalLM"),
360
361
362
        },
    ),
    "LLaMAForCausalLM": _HfExamplesInfo(
363
        os.path.join(models_path_prefix, "decapoda-research/llama-7b-hf"), is_available_online=False
364
365
    ),
    "Llama4ForCausalLM": _HfExamplesInfo(
366
        os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
367
368
    ),
    "LongcatFlashForCausalLM": _HfExamplesInfo(
369
        os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat"), trust_remote_code=True
370
    ),
371
    "MambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "state-spaces/mamba-130m-hf")),
372
    "Mamba2ForCausalLM": _HfExamplesInfo(
373
        os.path.join(models_path_prefix, "mistralai/Mamba-Codestral-7B-v0.1"),
374
        extras={
375
            "random": os.path.join(models_path_prefix, "yujiepan/mamba2-codestral-v0.1-tiny-random"),
376
377
        },
    ),
378
    "FalconMambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-mamba-7b-instruct")),
379
    "MiniCPMForCausalLM": _HfExamplesInfo(
380
        os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"), trust_remote_code=True
381
382
    ),
    "MiniCPM3ForCausalLM": _HfExamplesInfo(
383
        os.path.join(models_path_prefix, "openbmb/MiniCPM3-4B"), trust_remote_code=True
384
    ),
385
    "MiniCPM4ForCausalLM": _HfExamplesInfo(
386
        os.path.join(models_path_prefix, "openbmb/MiniCPM4.1-8B"), trust_remote_code=True
387
    ),
388
    "MiniMaxForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01-hf")),
389
    "MiniMaxText01ForCausalLM": _HfExamplesInfo(
390
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01"),
391
392
393
394
        trust_remote_code=True,
        revision="a59aa9cbc53b9fb8742ca4e9e1531b9802b6fdc3",
    ),
    "MiniMaxM1ForCausalLM": _HfExamplesInfo(
395
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-M1-40k"), trust_remote_code=True
396
    ),
397
    "MiniMaxM2ForCausalLM": _HfExamplesInfo(
398
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-M2"),
youkaichao's avatar
youkaichao committed
399
        trust_remote_code=True,
400
    ),
zhuwenwen's avatar
zhuwenwen committed
401
    "MistralForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Mistral-7B-Instruct-v0.1")),
402
    "MistralLarge3ForCausalLM": _HfExamplesInfo(
403
        os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4")
404
    ),
405
    "MixtralForCausalLM": _HfExamplesInfo(
406
407
        os.path.join(models_path_prefix, "mistralai/Mixtral-8x7B-Instruct-v0.1"),
        {"tiny": os.path.join(models_path_prefix, "TitanML/tiny-mixtral")},
408
    ),
zhuwenwen's avatar
zhuwenwen committed
409
    "MptForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mpt"), is_available_online=False),
410
    # FIXME: mosaicml/mpt-7b has been deleted
411
    "MPTForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mosaicml/mpt-7b"), is_available_online=False),
zhuwenwen's avatar
zhuwenwen committed
412
    "NemotronForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/Minitron-8B-Base")),
413
    "NemotronHForCausalLM": _HfExamplesInfo(
414
        os.path.join(models_path_prefix, "nvidia/Nemotron-H-8B-Base-8K"), trust_remote_code=True
415
    ),
zhuwenwen's avatar
zhuwenwen committed
416
    "OlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-1B-hf")),
zhuwenwen's avatar
zhuwenwen committed
417
    "Olmo2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-2-0425-1B")),
418
    "Olmo3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Olmo-3-7B-Instruct")),
zhuwenwen's avatar
zhuwenwen committed
419
    "OlmoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924-Instruct")),
420
    "OpenPanguMTPModel": _HfExamplesInfo(
421
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1"),
422
423
424
        trust_remote_code=True,
        is_available_online=False,
    ),
425
    "OPTForCausalLM": _HfExamplesInfo(
426
        os.path.join(models_path_prefix, "facebook/opt-125m"), {"1b": os.path.join(models_path_prefix, "facebook/opt-iml-max-1.3b")}
427
428
    ),
    "OrionForCausalLM": _HfExamplesInfo(
429
        os.path.join(models_path_prefix, "OrionStarAI/Orion-14B-Chat"), trust_remote_code=True
430
    ),
431
    "OuroForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ByteDance/Ouro-1.4B"), trust_remote_code=True),
432
    "PanguEmbeddedForCausalLM": _HfExamplesInfo(
433
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Embedded-7B-V1.1"), trust_remote_code=True
434
    ),
435
436
437
438
439
    "PanguProMoEV2ForCausalLM": _HfExamplesInfo(
        "",
        trust_remote_code=True,
        is_available_online=False,
    ),
440
    "PanguUltraMoEForCausalLM": _HfExamplesInfo(
441
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1"),
442
443
444
        trust_remote_code=True,
        is_available_online=False,
    ),
445
446
447
    "PersimmonForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/persimmon-8b-chat")),
    "PhiForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/phi-2")),
    "Phi3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-3-mini-4k-instruct")),
448
    "PhiMoEForCausalLM": _HfExamplesInfo(
449
        os.path.join(models_path_prefix, "microsoft/Phi-3.5-MoE-instruct"), trust_remote_code=True
450
451
    ),
    "Plamo2ForCausalLM": _HfExamplesInfo(
452
        os.path.join(models_path_prefix, "pfnet/plamo-2-1b"),
453
454
        trust_remote_code=True,
    ),
455
    "Plamo3ForCausalLM": _HfExamplesInfo(
456
        os.path.join(models_path_prefix, "pfnet/plamo-3-nict-2b-base"),
457
458
        trust_remote_code=True,
    ),
459
    "QWenLMHeadModel": _HfExamplesInfo(
460
        os.path.join(models_path_prefix, "Qwen/Qwen-7B-Chat"),
461
        max_transformers_version="4.53",
462
463
464
        transformers_version_reason={
            "hf": "HF model uses remote code that is not compatible with latest Transformers"  # noqa: E501
        },
465
466
467
        trust_remote_code=True,
    ),
    "Qwen2ForCausalLM": _HfExamplesInfo(
468
        os.path.join(models_path_prefix, "Qwen/Qwen2-0.5B-Instruct"),
469
        extras={
470
471
            "2.5": os.path.join(models_path_prefix, "Qwen/Qwen2.5-0.5B-Instruct"),
            "2.5-1.5B": os.path.join(models_path_prefix, "Qwen/Qwen2.5-1.5B-Instruct"),
472
        },
473
    ),
474
475
476
    "Qwen2MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen1.5-MoE-A2.7B-Chat")),
    "Qwen3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-8B")),
    "Qwen3MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B")),
477
    "Qwen3NextForCausalLM": _HfExamplesInfo(
478
479
        os.path.join(models_path_prefix, "Qwen/Qwen3-Next-80B-A3B-Instruct"),
        extras={"tiny-random": os.path.join(models_path_prefix, "tiny-random/qwen3-next-moe")},
480
481
        min_transformers_version="4.56.3",
    ),
482
    "RWForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-40b")),
483
    "SeedOssForCausalLM": _HfExamplesInfo(
484
        os.path.join(models_path_prefix, "ByteDance-Seed/Seed-OSS-36B-Instruct"),
485
486
        trust_remote_code=True,
    ),
Li Xie's avatar
Li Xie committed
487
488
489
    "Step1ForCausalLM": _HfExamplesInfo(
        "stepfun-ai/Step-Audio-EditX", trust_remote_code=True
    ),
csy0225's avatar
csy0225 committed
490
    "Step3p5ForCausalLM": _HfExamplesInfo(
491
        os.path.join(models_path_prefix, "stepfun-ai/step-3.5-flash"), is_available_online=False
csy0225's avatar
csy0225 committed
492
    ),
493
494
495
496
497
    "SmolLM3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "HuggingFaceTB/SmolLM3-3B")),
    "StableLMEpochForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-zephyr-3b")),
    "StableLmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-3b-4e1t")),
    "Starcoder2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "bigcode/starcoder2-3b")),
    "Step3TextForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stepfun-ai/step3"), trust_remote_code=True),
498
    "SolarForCausalLM": _HfExamplesInfo(
499
        os.path.join(models_path_prefix, "upstage/solar-pro-preview-instruct"), trust_remote_code=True
500
    ),
501
    "TeleChatForCausalLM": _HfExamplesInfo(
502
        os.path.join(models_path_prefix, "chuhac/TeleChat2-35B"), trust_remote_code=True
503
    ),
504
    "TeleChat2ForCausalLM": _HfExamplesInfo(
505
        os.path.join(models_path_prefix, "Tele-AI/TeleChat2-3B"), trust_remote_code=True
506
507
    ),
    "TeleFLMForCausalLM": _HfExamplesInfo(
508
        os.path.join(models_path_prefix, "CofeAI/FLM-2-52B-Instruct-2407"), trust_remote_code=True
509
510
    ),
    "XverseForCausalLM": _HfExamplesInfo(
511
512
        os.path.join(models_path_prefix, "xverse/XVERSE-7B-Chat"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-2-7b"),
513
514
        trust_remote_code=True,
    ),
zhuwenwen's avatar
zhuwenwen committed
515
    "Zamba2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Zyphra/Zamba2-7B-instruct")),
516
    "MiMoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"), trust_remote_code=True),
517
    "MiMoV2FlashForCausalLM": _HfExamplesInfo(
518
        os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-V2-Flash"), trust_remote_code=True
519
    ),
520
    "Dots1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "rednote-hilab/dots.llm1.inst")),
521
522
523
524
}

_EMBEDDING_EXAMPLE_MODELS = {
    # [Text-only]
525
    "BertModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5")),
526
    "BgeM3EmbeddingModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-m3")),
527
    "Gemma2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-multilingual-gemma2")),
528
    "Gemma3TextModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/embeddinggemma-300m")),
529
    "GritLM": _HfExamplesInfo(os.path.join(models_path_prefix, "parasail-ai/GritLM-7B-vllm")),
530
    "GteModel": _HfExamplesInfo(
531
        os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-embed-m-v2.0"), trust_remote_code=True
532
533
    ),
    "GteNewModel": _HfExamplesInfo(
534
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-base-en-v1.5"),
535
536
537
538
        trust_remote_code=True,
        hf_overrides={"architectures": ["GteNewModel"]},
    ),
    "InternLM2ForRewardModel": _HfExamplesInfo(
539
        os.path.join(models_path_prefix, "internlm/internlm2-1_8b-reward"), trust_remote_code=True
540
    ),
541
    "JambaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-reward-dev")),
542
    "LlamaModel": _HfExamplesInfo("llama", is_available_online=False),
543
    "LlamaBidirectionalModel": _HfExamplesInfo(
544
        os.path.join(models_path_prefix, "nvidia/llama-nemotron-embed-1b-v2"), trust_remote_code=True
545
    ),
546
    "MistralModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")),
547
    "ModernBertModel": _HfExamplesInfo(
548
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-modernbert-base"), trust_remote_code=True
549
550
    ),
    "NomicBertModel": _HfExamplesInfo(
551
        os.path.join(models_path_prefix, "nomic-ai/nomic-embed-text-v2-moe"), trust_remote_code=True
552
    ),
553
    "Qwen2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "ssmits/Qwen2-7B-Instruct-embed-base")),
554
    "Qwen2ForRewardModel": _HfExamplesInfo(
555
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-RM-72B"),
556
        max_transformers_version="4.53",
557
558
559
        transformers_version_reason={
            "hf": "HF model uses remote code that is not compatible with latest Transformers"  # noqa: E501
        },
560
561
    ),
    "Qwen2ForProcessRewardModel": _HfExamplesInfo(
562
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-PRM-7B"),
563
        max_transformers_version="4.53",
564
565
566
        transformers_version_reason={
            "hf": "HF model uses remote code that is not compatible with latest Transformers"  # noqa: E501
        },
567
    ),
568
569
570
    "RobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/stsb-roberta-base-v2")),
    "RobertaForMaskedLM": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/all-roberta-large-v1")),
    "XLMRobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/multilingual-e5-small")),
571
    "BertSpladeSparseEmbeddingModel": _HfExamplesInfo(
572
        os.path.join(models_path_prefix, "naver/splade-v3"),
573
        hf_overrides={"architectures": ["BertSpladeSparseEmbeddingModel"]},
574
    ),
575
    # [Multimodal]
576
    "CLIPModel": _HfExamplesInfo(os.path.join(models_path_prefix, "openai/clip-vit-base-patch32")),
zhuwenwen's avatar
zhuwenwen committed
577
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "royokong/e5-v")),
578
    "Phi3VForCausalLM": _HfExamplesInfo(
579
        os.path.join(models_path_prefix, "TIGER-Lab/VLM2Vec-Full"), trust_remote_code=True
580
    ),
581
582
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "MrLight/dse-qwen2-2b-mrl-v1")),
    "SiglipModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/siglip-base-patch16-224")),
583
    "PrithviGeoSpatialMAE": _HfExamplesInfo(
584
        os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"),
585
        dtype="float16",
586
        enforce_eager=True,
587
588
        require_embed_inputs=True,
        # This is to avoid the model going OOM in CI
589
590
591
        max_num_seqs=32,
    ),
    "Terratorch": _HfExamplesInfo(
592
        os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"),
593
        dtype="float16",
594
        enforce_eager=True,
595
        require_embed_inputs=True,
596
597
598
        # This is to avoid the model going OOM in CI
        max_num_seqs=32,
    ),
599
600
}

601
602
_SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS = {
    # [Decoder-only]
603
    "GPT2ForSequenceClassification": _HfExamplesInfo(
604
        os.path.join(models_path_prefix, "nie3e/sentiment-polish-gpt2-small")
605
    ),
606
    # [Cross-encoder]
607
    "BertForSequenceClassification": _HfExamplesInfo(
608
        os.path.join(models_path_prefix, "cross-encoder/ms-marco-MiniLM-L-6-v2")
609
    ),
610
    "BertForTokenClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "boltuix/NeuroBERT-NER")),
611
    "GteNewForSequenceClassification": _HfExamplesInfo(
612
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-multilingual-reranker-base"),
613
614
615
        trust_remote_code=True,
        hf_overrides={"architectures": ["GteNewForSequenceClassification"]},
    ),
616
617
618
    "LlamaBidirectionalForSequenceClassification": _HfExamplesInfo(
        "nvidia/llama-nemotron-rerank-1b-v2", trust_remote_code=True
    ),
619
    "ModernBertForSequenceClassification": _HfExamplesInfo(
620
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-reranker-modernbert-base")
621
    ),
622
    "ModernBertForTokenClassification": _HfExamplesInfo(
623
        os.path.join(models_path_prefix, "disham993/electrical-ner-ModernBERT-base")
624
    ),
625
    "RobertaForSequenceClassification": _HfExamplesInfo(
626
        os.path.join(models_path_prefix, "cross-encoder/quora-roberta-base")
627
    ),
628
    "XLMRobertaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-m3")),
629
630
}

631
632
_AUTOMATIC_CONVERTED_MODELS = {
    # Use as_seq_cls_model for automatic conversion
633
    "GemmaForSequenceClassification": _HfExamplesInfo(
634
        os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-gemma"),
635
636
637
638
639
640
641
        hf_overrides={
            "architectures": ["GemmaForSequenceClassification"],
            "classifier_from_token": ["Yes"],
            "method": "no_post_processing",
        },
    ),
    "LlamaForSequenceClassification": _HfExamplesInfo(
642
        os.path.join(models_path_prefix, "Skywork/Skywork-Reward-V2-Llama-3.2-1B")
643
    ),
644
    "Qwen2ForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "jason9693/Qwen2.5-1.5B-apeach")),
645
    "Qwen3ForSequenceClassification": _HfExamplesInfo(
646
        os.path.join(models_path_prefix, "tomaarsen/Qwen3-Reranker-0.6B-seq-cls")
647
    ),
648
    "Qwen3ForTokenClassification": _HfExamplesInfo("bd2lcco/Qwen3-0.6B-finetuned"),
649
650
651
652
653
654
655
656
657
    "Qwen3VLForSequenceClassification": _HfExamplesInfo(
        "Qwen/Qwen3-VL-Reranker-2B",
        is_available_online=False,
        hf_overrides={
            "architectures": ["Qwen3VLForSequenceClassification"],
            "classifier_from_token": ["no", "yes"],
            "is_original_qwen3_reranker": True,
        },
    ),
658
659
}

660
661
_MULTIMODAL_EXAMPLE_MODELS = {
    # [Decoder-only]
662
    "AriaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "rhymes-ai/Aria")),
663
    "AudioFlamingo3ForConditionalGeneration": _HfExamplesInfo(
664
        os.path.join(models_path_prefix, "nvidia/audio-flamingo-3-hf"), min_transformers_version="5.0.0"
665
    ),
666
667
    "AyaVisionForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "CohereLabs/aya-vision-8b")),
    "BagelForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "ByteDance-Seed/BAGEL-7B-MoT")),
668
    "BeeForConditionalGeneration": _HfExamplesInfo(
669
        os.path.join(models_path_prefix, "Open-Bee/Bee-8B-RL"),
670
671
        trust_remote_code=True,
    ),
672
    "Blip2ForConditionalGeneration": _HfExamplesInfo(
673
674
        os.path.join(models_path_prefix, "Salesforce/blip2-opt-2.7b"),
        extras={"6b": os.path.join(models_path_prefix, "Salesforce/blip2-opt-6.7b")},
675
    ),
676
    "ChameleonForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/chameleon-7b")),
677
    "Cohere2VisionForConditionalGeneration": _HfExamplesInfo(
678
        os.path.join(models_path_prefix, "CohereLabs/command-a-vision-07-2025")
679
680
    ),
    "DeepseekVLV2ForCausalLM": _HfExamplesInfo(
681
682
        os.path.join(models_path_prefix, "deepseek-ai/deepseek-vl2-tiny"),
        extras={"fork": os.path.join(models_path_prefix, "Isotr0py/deepseek-vl2-tiny")},
683
        max_transformers_version="4.48",
684
        transformers_version_reason={"hf": "HF model is not compatible."},
685
686
        hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]},
    ),
687
    "DeepseekOCRForCausalLM": _HfExamplesInfo(
688
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-OCR"),
689
    ),
690
    "DotsOCRForCausalLM": _HfExamplesInfo(
691
       os.path.join(models_path_prefix,  "rednote-hilab/dots.ocr"), trust_remote_code=True
692
    ),
693
    "Eagle2_5_VLForConditionalGeneration": _HfExamplesInfo(
694
        os.path.join(models_path_prefix, "nvidia/Eagle2.5-8B"), trust_remote_code=True, is_available_online=False
695
    ),
696
    "Emu3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/Emu3-Chat-hf")),
697
    "Ernie4_5_VLMoeForConditionalGeneration": _HfExamplesInfo(
698
        os.path.join(models_path_prefix, "baidu/ERNIE-4.5-VL-28B-A3B-PT"),
699
700
        trust_remote_code=True,
    ),
701
702
    "FuyuForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/fuyu-8b")),
    "Gemma3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-4b-it")),
703
    "Gemma3nForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3n-E2B-it")),
704
    "GlmAsrForConditionalGeneration": _HfExamplesInfo(
705
        os.path.join(models_path_prefix, "zai-org/GLM-ASR-Nano-2512"),
706
        trust_remote_code=True,
707
        min_transformers_version="5.0.0",
708
    ),
709
    "GraniteVision": _HfExamplesInfo("ibm-granite/granite-vision-3.3-2b"),
710
    "GraniteSpeechForConditionalGeneration": _HfExamplesInfo(
711
        os.path.join(models_path_prefix, "ibm-granite/granite-speech-3.3-2b")
712
713
    ),
    "GLM4VForCausalLM": _HfExamplesInfo(
714
        os.path.join(models_path_prefix, "zai-org/glm-4v-9b"),
715
716
717
        trust_remote_code=True,
        hf_overrides={"architectures": ["GLM4VForCausalLM"]},
    ),
718
719
    "Glm4vForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.1V-9B-Thinking")),
    "Glm4vMoeForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.5V")),
720
    "GlmOcrForConditionalGeneration": _HfExamplesInfo(
721
        os.path.join(models_path_prefix, "zai-org/GLM-OCR"),
722
        is_available_online=False,
723
        min_transformers_version="5.1.0",
724
    ),
725
    "H2OVLChatModel": _HfExamplesInfo(
726
        os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-800m"),
727
        trust_remote_code=True,
728
        extras={"2b": os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-2b")},
729
        max_transformers_version="4.48",
730
        transformers_version_reason={"hf": "HF model is not compatible."},
731
732
    ),
    "HCXVisionForCausalLM": _HfExamplesInfo(
733
        os.path.join(models_path_prefix, "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"),
734
735
        trust_remote_code=True,
    ),
736
    "HunYuanVLForConditionalGeneration": _HfExamplesInfo(
737
        os.path.join(models_path_prefix, "tencent/HunyuanOCR"),
738
        hf_overrides={"num_experts": 0},
739
    ),
740
    "Idefics3ForConditionalGeneration": _HfExamplesInfo(
741
742
        os.path.join(models_path_prefix, "HuggingFaceM4/Idefics3-8B-Llama3"),
        extras={"tiny": os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM-256M-Instruct")},
743
    ),
oscardev256's avatar
oscardev256 committed
744
745
746
    "IsaacForConditionalGeneration": _HfExamplesInfo(
        "PerceptronAI/Isaac-0.1",
        trust_remote_code=True,
747
        extras={"0.2-2B-Preview": "PerceptronAI/Isaac-0.2-2B-Preview"},
oscardev256's avatar
oscardev256 committed
748
    ),
749
    "InternS1ForConditionalGeneration": _HfExamplesInfo(
750
        os.path.join(models_path_prefix, "internlm/Intern-S1"), trust_remote_code=True
751
752
    ),
    "InternVLChatModel": _HfExamplesInfo(
753
        os.path.join(models_path_prefix, "OpenGVLab/InternVL2-1B"),
754
        extras={
755
756
757
758
759
            "2B": os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B"),
            "3.0": os.path.join(models_path_prefix, "OpenGVLab/InternVL3-1B"),
            "3.5-qwen3": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-1B"),
            "3.5-qwen3moe": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-30B-A3B"),
            "3.5-gptoss": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview"),
760
761
762
        },
        trust_remote_code=True,
    ),
763
    "InternVLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "OpenGVLab/InternVL3-1B-hf")),
764
    "KananaVForConditionalGeneration": _HfExamplesInfo(
765
        os.path.join(models_path_prefix, "kakaocorp/kanana-1.5-v-3b-instruct"),
766
767
        trust_remote_code=True,
    ),
768
    "KeyeForConditionalGeneration": _HfExamplesInfo(
769
        os.path.join(models_path_prefix, "Kwai-Keye/Keye-VL-8B-Preview"),
770
771
772
        trust_remote_code=True,
    ),
    "KeyeVL1_5ForConditionalGeneration": _HfExamplesInfo(
773
        os.path.join(models_path_prefix, "Kwai-Keye/Keye-VL-1_5-8B"),
774
775
776
        trust_remote_code=True,
    ),
    "KimiVLForConditionalGeneration": _HfExamplesInfo(
777
778
        os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Instruct"),
        extras={"thinking": os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Thinking")},
779
        trust_remote_code=True,
780
        max_transformers_version="4.53.3",
781
782
783
784
785
786
787
        transformers_version_reason={
            "hf": (
                "HF model uses deprecated transformers API "
                "(PytorchGELUTanh, DynamicCache.seen_tokens, and more). See: "
                "https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/discussions/31"
            )
        },
788
    ),
Roger Wang's avatar
Roger Wang committed
789
790
791
792
793
    "KimiK25ForConditionalGeneration": _HfExamplesInfo(
        "moonshotai/Kimi-K2.5",
        trust_remote_code=True,
        is_available_online=False,
    ),
794
    "LightOnOCRForConditionalGeneration": _HfExamplesInfo(
795
        os.path.join(models_path_prefix, "lightonai/LightOnOCR-1B-1025")
796
    ),
797
798
799
800
    "Lfm2VlForConditionalGeneration": _HfExamplesInfo(
        "LiquidAI/LFM2-VL-450M",
        min_transformers_version="5.0.0",
    ),
801
    "Llama4ForConditionalGeneration": _HfExamplesInfo(
802
        os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
803
        max_model_len=10240,
804
        extras={"llama-guard-4": os.path.join(models_path_prefix, "meta-llama/Llama-Guard-4-12B")},
805
806
    ),
    "LlavaForConditionalGeneration": _HfExamplesInfo(
807
        os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
808
        extras={
809
810
            "mistral": os.path.join(models_path_prefix, "mistral-community/pixtral-12b"),
            "mistral-fp8": os.path.join(models_path_prefix, "nm-testing/pixtral-12b-FP8-dynamic"),
811
812
813
        },
    ),
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(
814
        os.path.join(models_path_prefix, "llava-hf/llava-v1.6-mistral-7b-hf")
815
816
    ),
    "LlavaNextVideoForConditionalGeneration": _HfExamplesInfo(
817
        os.path.join(models_path_prefix, "llava-hf/LLaVA-NeXT-Video-7B-hf")
818
819
    ),
    "LlavaOnevisionForConditionalGeneration": _HfExamplesInfo(
820
        os.path.join(models_path_prefix, "llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
821
822
    ),
    "MantisForConditionalGeneration": _HfExamplesInfo(
823
        os.path.join(models_path_prefix, "TIGER-Lab/Mantis-8B-siglip-llama3"),
824
        max_transformers_version="4.48",
825
        transformers_version_reason={"hf": "HF model is not compatible."},
826
827
828
        hf_overrides={"architectures": ["MantisForConditionalGeneration"]},
    ),
    "MiDashengLMModel": _HfExamplesInfo(
829
        os.path.join(models_path_prefix, "mispeech/midashenglm-7b"), trust_remote_code=True
830
    ),
831
    "MiniCPMO": _HfExamplesInfo(os.path.join(models_path_prefix, "openbmb/MiniCPM-o-2_6"), trust_remote_code=True),
832
    "MiniCPMV": _HfExamplesInfo(
833
        os.path.join(models_path_prefix, "openbmb/MiniCPM-Llama3-V-2_5"),
834
        extras={
835
836
837
            "2.6": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-2_6"),
            "4.0": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-4"),
            "4.5": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-4_5"),
838
839
840
841
        },
        trust_remote_code=True,
    ),
    "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo(
842
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-VL-01"),
843
844
845
        trust_remote_code=True,
    ),
    "Mistral3ForConditionalGeneration": _HfExamplesInfo(
846
847
        os.path.join(models_path_prefix, "mistralai/Mistral-Small-3.1-24B-Instruct-2503"),
        extras={"fp8": os.path.join(models_path_prefix, "nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic")},
848
849
    ),
    "MolmoForCausalLM": _HfExamplesInfo(
850
        os.path.join(models_path_prefix, "allenai/Molmo-7B-D-0924"),
851
        max_transformers_version="4.48",
852
853
854
        transformers_version_reason={
            "vllm": "Incorrectly-detected `tensorflow` import from processor."
        },
855
        extras={"olmo": os.path.join(models_path_prefix, "allenai/Molmo-7B-O-0924")},
856
857
        trust_remote_code=True,
    ),
858
    "Molmo2ForConditionalGeneration": _HfExamplesInfo(
859
        os.path.join(models_path_prefix, "allenai/Molmo2-8B"),
860
861
862
863
864
865
        extras={"olmo": "allenai/Molmo2-O-7B"},
        min_transformers_version="4.51",
        trust_remote_code=True,
        # required by current PrefixLM implementation
        max_num_batched_tokens=31872,
    ),
866
    "NVLM_D": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/NVLM-D-72B"), trust_remote_code=True),
867
    "Llama_Nemotron_Nano_VL": _HfExamplesInfo(
868
        os.path.join(models_path_prefix, "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1"),
869
870
871
        trust_remote_code=True,
    ),
    "NemotronH_Nano_VL_V2": _HfExamplesInfo(
872
        os.path.join(models_path_prefix, "nano_vl_dummy"), is_available_online=False, trust_remote_code=True
873
    ),
Zero's avatar
Zero committed
874
    "OpenCUAForConditionalGeneration": _HfExamplesInfo(
875
        os.path.join(models_path_prefix, "xlangai/OpenCUA-7B"), trust_remote_code=True
Zero's avatar
Zero committed
876
    ),
877
    "Ovis": _HfExamplesInfo(
878
        os.path.join(models_path_prefix, "AIDC-AI/Ovis2-1B"),
879
880
        trust_remote_code=True,
        max_transformers_version="4.53",
881
        transformers_version_reason={"hf": "HF model is not compatible"},
882
        extras={
883
884
            "1.6-llama": os.path.join(models_path_prefix, "AIDC-AI/Ovis1.6-Llama3.2-3B"),
            "1.6-gemma": os.path.join(models_path_prefix, "AIDC-AI/Ovis1.6-Gemma2-9B"),
885
886
        },
    ),
887
    "Ovis2_5": _HfExamplesInfo(os.path.join(models_path_prefix, "AIDC-AI/Ovis2.5-2B"), trust_remote_code=True),
888
    "PaddleOCRVLForConditionalGeneration": _HfExamplesInfo(
889
       os.path.join(models_path_prefix,  "PaddlePaddle/PaddleOCR-VL"),
890
891
        trust_remote_code=True,
    ),
892
    "PaliGemmaForConditionalGeneration": _HfExamplesInfo(
893
894
        os.path.join(models_path_prefix, "google/paligemma-3b-mix-224"),
        extras={"v2": os.path.join(models_path_prefix, "google/paligemma2-3b-ft-docci-448")},
895
896
    ),
    "Phi3VForCausalLM": _HfExamplesInfo(
897
        os.path.join(models_path_prefix, "microsoft/Phi-3-vision-128k-instruct"),
898
899
        trust_remote_code=True,
        max_transformers_version="4.48",
900
901
902
        transformers_version_reason={
            "hf": "HF model use deprecated imports which have been removed."
        },  # noqa: E501
903
        extras={"phi3.5": os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")},
904
905
    ),
    "Phi4MMForCausalLM": _HfExamplesInfo(
906
        os.path.join(models_path_prefix, "microsoft/Phi-4-multimodal-instruct"), trust_remote_code=True
907
908
    ),
    "PixtralForConditionalGeneration": _HfExamplesInfo(
909
        os.path.join(models_path_prefix, "mistralai/Pixtral-12B-2409"),
910
        extras={
911
912
            "mistral-large-3": os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4"),
            "ministral-3": os.path.join(models_path_prefix, "mistralai/Ministral-3-3B-Instruct-2512"),
913
        },
914
915
916
        tokenizer_mode="mistral",
    ),
    "QwenVLForConditionalGeneration": _HfExamplesInfo(
917
918
        os.path.join(models_path_prefix, "Qwen/Qwen-VL"),
        extras={"chat": os.path.join(models_path_prefix, "Qwen/Qwen-VL-Chat")},
919
        trust_remote_code=True,
920
        max_transformers_version="4.53.3",
921
922
923
        transformers_version_reason={
            "hf": "HF model uses deprecated imports which have been removed."
        },  # noqa: E501
924
925
926
        hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]},
    ),
    "Qwen2AudioForConditionalGeneration": _HfExamplesInfo(
927
        os.path.join(models_path_prefix, "Qwen/Qwen2-Audio-7B-Instruct")
928
    ),
929
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2-VL-2B-Instruct")),
930
    "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo(
931
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-3B-Instruct"),
932
933
        max_model_len=4096,
    ),
zhuwenwen's avatar
zhuwenwen committed
934
    "Qwen2_5OmniModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-3B")),
935
    "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-7B-AWQ")),
936
    "Qwen3VLForConditionalGeneration": _HfExamplesInfo(
937
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-4B-Instruct"),
938
939
940
941
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
    "Qwen3VLMoeForConditionalGeneration": _HfExamplesInfo(
942
       os.path.join(models_path_prefix,  "Qwen/Qwen3-VL-30B-A3B-Instruct"),
943
944
945
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
Rayyyyy's avatar
Rayyyyy committed
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
    "Qwen3_5ForConditionalGeneration": _HfExamplesInfo(
        "Qwen/Qwen3.5-9B-Instruct",
        max_model_len=4096,
        min_transformers_version="5.1.0",
    ),
    "Qwen3_5MoeForConditionalGeneration": _HfExamplesInfo(
        "Qwen/Qwen3.5-35B-A3B-Instruct",
        max_model_len=4096,
        min_transformers_version="5.1.0",
    ),
    "Qwen3_5MTP": _HfExamplesInfo(
        "Qwen/Qwen3.5-9B-Instruct",
        speculative_model="Qwen/Qwen3.5-9B-Instruct",
        min_transformers_version="5.1.0",
    ),
    "Qwen3_5MoeMTP": _HfExamplesInfo(
        "Qwen/Qwen3.5-35B-A3B-Instruct",
        speculative_model="Qwen/Qwen3.5-35B-A3B-Instruct",
        min_transformers_version="5.1.0",
    ),
966
    "Qwen3OmniMoeForConditionalGeneration": _HfExamplesInfo(
967
        os.path.join(models_path_prefix, "Qwen/Qwen3-Omni-30B-A3B-Instruct"),
968
969
970
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
971
    "RForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "YannQi/R-4B"), trust_remote_code=True),
972
    "SkyworkR1VChatModel": _HfExamplesInfo(
973
        os.path.join(models_path_prefix, "Skywork/Skywork-R1V-38B"), trust_remote_code=True
974
975
    ),
    "SmolVLMForConditionalGeneration": _HfExamplesInfo(
976
        os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM2-2.2B-Instruct")
977
978
    ),
    "Step3VLForConditionalGeneration": _HfExamplesInfo(
979
        os.path.join(models_path_prefix, "stepfun-ai/step3"), trust_remote_code=True
980
    ),
ltd0924's avatar
ltd0924 committed
981
982
983
    "StepVLForConditionalGeneration": _HfExamplesInfo(
        "stepfun-ai/Step3-VL-10B", trust_remote_code=True
    ),
984
    "UltravoxModel": _HfExamplesInfo(
985
        os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_5-llama-3_2-1b"),
986
987
        trust_remote_code=True,
    ),
988
    "TarsierForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "omni-research/Tarsier-7b")),
989
    "Tarsier2ForConditionalGeneration": _HfExamplesInfo(
990
        os.path.join(models_path_prefix, "omni-research/Tarsier2-Recap-7b"),
991
        hf_overrides={
992
            "architectures": [os.path.join(models_path_prefix, "Tarsier2ForConditionalGeneration")],
993
994
            "model_type": "tarsier2",
        },
995
    ),
996
997
998
999
1000
    "VoxtralForConditionalGeneration": _HfExamplesInfo(
        "mistralai/Voxtral-Mini-3B-2507",
        # disable this temporarily until we support HF format
        is_available_online=False,
    ),
Patrick von Platen's avatar
Patrick von Platen committed
1001
1002
1003
1004
1005
    "VoxtralStreamingGeneration": _HfExamplesInfo(
        "<place-holder>",
        # disable this temporarily until we support HF format
        is_available_online=False,
    ),
1006
    # [Encoder-decoder]
1007
1008
1009
    "NemotronParseForConditionalGeneration": _HfExamplesInfo(
        "nvidia/NVIDIA-Nemotron-Parse-v1.1", trust_remote_code=True
    ),
1010
    "WhisperForConditionalGeneration": _HfExamplesInfo(
1011
1012
        os.path.join(models_path_prefix, "openai/whisper-large-v3-turbo"),
        extras={"v3": os.path.join(models_path_prefix, "openai/whisper-large-v3")},
1013
    ),
1014
    # [Cross-encoder]
1015
    "JinaVLForRanking": _HfExamplesInfo(os.path.join(models_path_prefix, "jinaai/jina-reranker-m0")),
1016
1017
}

1018

1019
_SPECULATIVE_DECODING_EXAMPLE_MODELS = {
1020
    "MedusaModel": _HfExamplesInfo(
1021
        os.path.join(models_path_prefix, "JackFram/llama-68m"), speculative_model=os.path.join(models_path_prefix, "abhigoyal/vllm-medusa-llama-68m-random")
1022
    ),
1023
1024
    # Temporarily disabled.
    # TODO(woosuk): Re-enable this once the MLP Speculator is supported in V1.
1025
1026
1027
1028
1029
    # "MLPSpeculatorPreTrainedModel": _HfExamplesInfo(
    #     "JackFram/llama-160m",
    #     speculative_model="ibm-ai-platform/llama-160m-accelerator"
    # ),
    "DeepSeekMTPModel": _HfExamplesInfo(
1030
1031
        os.path.join(models_path_prefix, "luccafong/deepseek_mtp_main_random"),
        speculative_model=os.path.join(models_path_prefix, "luccafong/deepseek_mtp_draft_random"),
1032
1033
1034
        trust_remote_code=True,
    ),
    "EagleDeepSeekMTPModel": _HfExamplesInfo(
1035
1036
        os.path.join(models_path_prefix, "eagle618/deepseek-v3-random"),
        speculative_model=os.path.join(models_path_prefix, "eagle618/eagle-deepseek-v3-random"),
1037
1038
1039
        trust_remote_code=True,
    ),
    "EagleLlamaForCausalLM": _HfExamplesInfo(
1040
       os.path.join(models_path_prefix,  "meta-llama/Meta-Llama-3-8B-Instruct"),
1041
        trust_remote_code=True,
1042
1043
        speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE-LLaMA3-Instruct-8B"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct"),
1044
1045
    ),
    "Eagle3LlamaForCausalLM": _HfExamplesInfo(
1046
        os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct"),
1047
        trust_remote_code=True,
1048
1049
        speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct"),
1050
1051
1052
        use_original_num_layers=True,
        max_model_len=10240,
    ),
1053
    "EagleMistralLarge3ForCausalLM": _HfExamplesInfo(
1054
        os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512"),
1055
        speculative_model=os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-Eagle"),
1056
        # TODO: revert once figuring out OOM in CI
1057
1058
        is_available_online=False,
    ),
1059
    "LlamaForCausalLMEagle3": _HfExamplesInfo(
1060
        os.path.join(models_path_prefix, "Qwen/Qwen3-8B"),
1061
        trust_remote_code=True,
1062
1063
        speculative_model=os.path.join(models_path_prefix, "AngelSlim/Qwen3-8B_eagle3"),
        tokenizer=os.path.join(models_path_prefix, "Qwen/Qwen3-8B"),
1064
1065
        use_original_num_layers=True,
    ),
zhiweiz's avatar
zhiweiz committed
1066
    "EagleLlama4ForCausalLM": _HfExamplesInfo(
1067
        os.path.join(models_path_prefix, "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct"),
zhiweiz's avatar
zhiweiz committed
1068
        trust_remote_code=True,
1069
        speculative_model=os.path.join(models_path_prefix, "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct"),
1070
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
1071
1072
    ),
    "EagleMiniCPMForCausalLM": _HfExamplesInfo(
1073
        os.path.join(models_path_prefix, "openbmb/MiniCPM-1B-sft-bf16"),
1074
        trust_remote_code=True,
1075
1076
1077
        speculative_model=os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"),
        speculative_method=os.path.join(models_path_prefix, "eagle"),
        tokenizer=os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"),
1078
1079
    ),
    "ErnieMTPModel": _HfExamplesInfo(
1080
        os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT"),
1081
        trust_remote_code=True,
1082
        speculative_model=os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT"),
1083
    ),
Kyungmin Lee's avatar
Kyungmin Lee committed
1084
1085
1086
    "ExaoneMoeMTP": _HfExamplesInfo(
        "LGAI-EXAONE/K-EXAONE-236B-A23B",
        speculative_model="LGAI-EXAONE/K-EXAONE-236B-A23B",
1087
        min_transformers_version="5.1.0",
Kyungmin Lee's avatar
Kyungmin Lee committed
1088
    ),
1089
    "Glm4MoeMTPModel": _HfExamplesInfo(
1090
        os.path.join(models_path_prefix, "zai-org/GLM-4.5"),
1091
1092
        speculative_model="zai-org/GLM-4.5",
    ),
1093
1094
1095
    "Glm4MoeLiteMTPModel": _HfExamplesInfo(
        "zai-org/GLM-4.7-Flash",
        speculative_model="zai-org/GLM-4.7-Flash",
1096
        min_transformers_version="5.0.0",
1097
1098
1099
1100
    ),
    "GlmOcrMTPModel": _HfExamplesInfo(
        "zai-org/GLM-OCR",
        speculative_model="zai-org/GLM-OCR",
1101
        is_available_online=False,
1102
        min_transformers_version="5.1.0",
1103
    ),
XuruiYang's avatar
XuruiYang committed
1104
    "LongCatFlashMTPModel": _HfExamplesInfo(
1105
        os.path.join(models_path_prefix, os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat")),
XuruiYang's avatar
XuruiYang committed
1106
        trust_remote_code=True,
1107
        speculative_model=os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat"),
1108
1109
    ),
    "MiMoMTPModel": _HfExamplesInfo(
1110
        os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"),
1111
        trust_remote_code=True,
1112
        speculative_model=os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"),
1113
    ),
1114
    "Eagle3Qwen2_5vlForCausalLM": _HfExamplesInfo(
1115
1116
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-7B-Instruct"),
        speculative_model=os.path.join(models_path_prefix, "Rayzl/qwen2.5-vl-7b-eagle3-sgl"),
1117
    ),
1118
    "Eagle3Qwen3vlForCausalLM": _HfExamplesInfo(
1119
1120
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-8B-Instruct"),
        speculative_model=os.path.join(models_path_prefix, "taobao-mnn/Qwen3-VL-8B-Instruct-Eagle3"),
1121
    ),
1122
    "Qwen3NextMTP": _HfExamplesInfo(
1123
        os.path.join(models_path_prefix, "Qwen/Qwen3-Next-80B-A3B-Instruct"), min_transformers_version="4.56.3"
1124
    ),
csy0225's avatar
csy0225 committed
1125
1126
1127
1128
1129
1130
    "Step3p5MTP": _HfExamplesInfo(
        "stepfun-ai/Step-3.5-Flash",
        trust_remote_code=True,
        speculative_model="stepfun-ai/Step-3.5-Flash",
        is_available_online=False,
    ),
1131
1132
}

1133
_TRANSFORMERS_BACKEND_MODELS = {
1134
    "TransformersEmbeddingModel": _HfExamplesInfo(
1135
        os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5"), min_transformers_version="5.0.0"
1136
1137
    ),
    "TransformersForSequenceClassification": _HfExamplesInfo(
1138
        os.path.join(models_path_prefix, "papluca/xlm-roberta-base-language-detection"),
1139
        min_transformers_version="5.0.0",
1140
1141
    ),
    "TransformersForCausalLM": _HfExamplesInfo(
1142
        os.path.join(models_path_prefix, "hmellor/Ilama-3.2-1B"), trust_remote_code=True
1143
    ),
1144
    "TransformersMultiModalForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/Emu3-Chat-hf")),
1145
    "TransformersMoEForCausalLM": _HfExamplesInfo(
1146
        os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924"), min_transformers_version="5.0.0"
1147
    ),
1148
    "TransformersMultiModalMoEForCausalLM": _HfExamplesInfo(
1149
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-30B-A3B-Instruct"), min_transformers_version="5.0.0"
1150
1151
    ),
    "TransformersMoEEmbeddingModel": _HfExamplesInfo(
1152
        os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B"), min_transformers_version="5.0.0"
1153
1154
    ),
    "TransformersMoEForSequenceClassification": _HfExamplesInfo(
1155
        os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B"), min_transformers_version="5.0.0"
1156
    ),
1157
    "TransformersMultiModalEmbeddingModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-4b-it")),
1158
    "TransformersMultiModalForSequenceClassification": _HfExamplesInfo(
1159
        os.path.join(models_path_prefix, "google/gemma-3-4b-it")
1160
    ),
1161
1162
}

1163
1164
1165
_EXAMPLE_MODELS = {
    **_TEXT_GENERATION_EXAMPLE_MODELS,
    **_EMBEDDING_EXAMPLE_MODELS,
1166
    **_SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS,
1167
1168
    **_MULTIMODAL_EXAMPLE_MODELS,
    **_SPECULATIVE_DECODING_EXAMPLE_MODELS,
1169
    **_TRANSFORMERS_BACKEND_MODELS,
1170
1171
1172
1173
1174
1175
1176
1177
1178
}


class HfExampleModels:
    def __init__(self, hf_models: Mapping[str, _HfExamplesInfo]) -> None:
        super().__init__()

        self.hf_models = hf_models

1179
    def get_supported_archs(self) -> Set[str]:
1180
1181
1182
        return self.hf_models.keys()

    def get_hf_info(self, model_arch: str) -> _HfExamplesInfo:
1183
1184
1185
        try:
            return self.hf_models[model_arch]
        except KeyError:
1186
1187
1188
            raise ValueError(
                f"No example model defined for {model_arch}; please update this file."
            ) from None
1189

1190
1191
1192
1193
1194
    def find_hf_info(self, model_id: str) -> _HfExamplesInfo:
        for info in self.hf_models.values():
            if info.default == model_id:
                return info

1195
1196
1197
1198
1199
        # Fallback to extras
        for info in self.hf_models.values():
            if any(extra == model_id for extra in info.extras.values()):
                return info

1200
1201
1202
        raise ValueError(
            f"No example model defined for {model_id}; please update this file."
        )
1203

1204

Patrick von Platen's avatar
Patrick von Platen committed
1205
HF_EXAMPLE_MODELS = HfExampleModels(_EXAMPLE_MODELS)
1206
AUTO_EXAMPLE_MODELS = HfExampleModels(_AUTOMATIC_CONVERTED_MODELS)