registry.py 56.2 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
from collections.abc import Mapping, Set
5
from dataclasses import dataclass, field
6
from typing import Any, Literal
7

zhuwenwen's avatar
zhuwenwen committed
8
import os
9
10
11
import pytest
from packaging.version import Version
from transformers import __version__ as TRANSFORMERS_VERSION
zhuwenwen's avatar
zhuwenwen committed
12
# from ..utils import models_path_prefix
13

zhuwenwen's avatar
zhuwenwen committed
14
models_path_prefix = os.getenv('VLLM_OPTEST_MODELS_PATH') or os.getenv("OPTEST_MODELS_PATH")
15

16
from vllm.config.model import ModelDType, TokenizerMode
17

zhuwenwen's avatar
zhuwenwen committed
18

19
20
21
22
23
24
25
26
@dataclass(frozen=True)
class _HfExamplesInfo:
    default: str
    """The default model to use for testing this architecture."""

    extras: Mapping[str, str] = field(default_factory=dict)
    """Extra models to use for testing this architecture."""

27
    tokenizer: str | None = None
28
29
    """Set the tokenizer to load for this architecture."""

30
    tokenizer_mode: TokenizerMode | str = "auto"
31
32
    """Set the tokenizer type for this architecture."""

33
    speculative_model: str | None = None
34
35
36
37
38
    """
    The default model to use for testing this architecture, which is only used
    for speculative decoding.
    """

39
40
41
42
43
    speculative_method: str | None = None
    """
    The method to use for speculative decoding.
    """

44
    min_transformers_version: str | None = None
45
46
47
48
    """
    The minimum version of HF Transformers that is required to run this model.
    """

49
    max_transformers_version: str | None = None
50
51
52
53
    """
    The maximum version of HF Transformers that this model runs on.
    """

54
    transformers_version_reason: dict[Literal["vllm", "hf"], str] | None = None
55
    """
56
57
58
    The type and reason to skip test for the minimum/maximum version requirement.
    vllm: skip all vLLM tests if the version requirement is not met.
    hf: only skip tests that uses HF runner if the version requirement is not met.
59
60
    """

61
    require_embed_inputs: bool = False
62
    """
63
64
    If `True`, enables prompt and multi-modal embedding inputs while
    disabling tokenization.
65
66
67
68
69
70
71
72
73
74
75
76
77
78
    """

    dtype: ModelDType = "auto"
    """
    The data type for the model weights and activations.
    """

    enforce_eager: bool = False
    """
    Whether to enforce eager execution. If True, we will
    disable CUDA graph and always execute the model in eager mode.
    If False, we will use CUDA graph and eager execution in hybrid.
    """

79
80
    is_available_online: bool = True
    """
81
    Set this to `False` if the name of this architecture no longer exists on
82
83
84
85
86
87
    the HF repo. To maintain backwards compatibility, we have not removed them
    from the main model registry, so without this flag the registry tests will
    fail.
    """

    trust_remote_code: bool = False
88
    """The `trust_remote_code` level required to load the model."""
89

90
    hf_overrides: dict[str, Any] = field(default_factory=dict)
91
    """The `hf_overrides` required to load the model."""
92

93
    max_model_len: int | None = None
94
95
96
97
98
    """
    The maximum model length to use for this model. Some models default to a
    length that is too large to fit into memory in CI.
    """

99
100
101
102
103
    max_num_batched_tokens: int | None = None
    """
    The maximum number of tokens to be processed in a single batch.
    """

104
    revision: str | None = None
105
106
107
108
109
    """
    The specific revision (commit hash, tag, or branch) to use for the model.
    If not specified, the default revision will be used.
    """

110
    max_num_seqs: int | None = None
111
112
    """Maximum number of sequences to be processed in a single iteration."""

113
114
115
116
117
118
    use_original_num_layers: bool = False
    """
    If True, use the original number of layers from the model config 
    instead of minimal layers for testing.
    """

119
120
121
    def check_transformers_version(
        self,
        *,
122
        on_fail: Literal["error", "skip", "return"],
123
        check_version_reason: Literal["vllm", "hf"] = "hf",
124
125
        check_min_version: bool = True,
        check_max_version: bool = True,
126
    ) -> str | None:
127
128
129
130
        """
        If the installed transformers version does not meet the requirements,
        perform the given action.
        """
131
132
133
134
        if (
            self.min_transformers_version is None
            and self.max_transformers_version is None
        ):
135
            return None
136
137

        current_version = TRANSFORMERS_VERSION
138
        cur_base_version = Version(current_version).base_version
139
140
141
        min_version = self.min_transformers_version
        max_version = self.max_transformers_version
        msg = f"`transformers=={current_version}` installed, but `transformers"
142
143
        # Only check the base version for the min/max version, otherwise preview
        # models cannot be run because `x.yy.0.dev0`<`x.yy.0`
144
145
        if min_version and Version(cur_base_version) < Version(min_version):
            is_version_valid = not check_min_version
146
            msg += f">={min_version}` is required to run this model."
147
148
        elif max_version and Version(cur_base_version) > Version(max_version):
            is_version_valid = not check_max_version
149
150
            msg += f"<={max_version}` is required to run this model."
        else:
151
            is_version_valid = True
152

153
154
155
156
157
158
159
160
161
        # check if Transformers version breaks the corresponding model runner,
        # skip test when model runner not compatible
        is_reason_valid = not (
            check_version_reason
            and self.transformers_version_reason
            and check_version_reason in self.transformers_version_reason
        )
        is_transformers_valid = is_version_valid and is_reason_valid
        if is_transformers_valid:
162
            return None
163
164
165
        elif self.transformers_version_reason:
            for reason_type, reason in self.transformers_version_reason.items():
                msg += f" Reason({reason_type}): {reason}"
166
167
168

        if on_fail == "error":
            raise RuntimeError(msg)
169
        elif on_fail == "skip":
170
            pytest.skip(msg)
171

172
173
        return msg

174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
    def check_available_online(
        self,
        *,
        on_fail: Literal["error", "skip"],
    ) -> None:
        """
        If the model is not available online, perform the given action.
        """
        if not self.is_available_online:
            msg = "Model is not available online"

            if on_fail == "error":
                raise RuntimeError(msg)
            else:
                pytest.skip(msg)

190
191
192

_TEXT_GENERATION_EXAMPLE_MODELS = {
    # [Decoder-only]
193
    "AfmoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "arcee-ai/Trinity-Nano-Preview")),
194
195
196
    "ApertusForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "swiss-ai/Apertus-8B-Instruct-2509")),
    "AquilaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat-7B"), trust_remote_code=True),
    "AquilaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat2-7B"), trust_remote_code=True),
197
    "ArceeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "arcee-ai/AFM-4.5B-Base")),
198
    "ArcticForCausalLM": _HfExamplesInfo(
199
        os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-instruct"), trust_remote_code=True
200
201
    ),
    "BaiChuanForCausalLM": _HfExamplesInfo(
202
        os.path.join(models_path_prefix, "baichuan-inc/Baichuan-7B"), trust_remote_code=True
203
204
    ),
    "BaichuanForCausalLM": _HfExamplesInfo(
205
        os.path.join(models_path_prefix, "baichuan-inc/Baichuan2-7B-chat"), trust_remote_code=True
206
207
    ),
    "BailingMoeForCausalLM": _HfExamplesInfo(
208
        os.path.join(models_path_prefix, "inclusionAI/Ling-lite-1.5"), trust_remote_code=True
209
210
    ),
    "BailingMoeV2ForCausalLM": _HfExamplesInfo(
211
        os.path.join(models_path_prefix, "inclusionAI/Ling-mini-2.0"), trust_remote_code=True
212
213
    ),
    "BambaForCausalLM": _HfExamplesInfo(
214
215
        os.path.join(models_path_prefix, "ibm-ai-platform/Bamba-9B-v1"),
        extras={"tiny": os.path.join(models_path_prefix, "hmellor/tiny-random-BambaForCausalLM")},
216
217
    ),
    "BloomForCausalLM": _HfExamplesInfo(
218
        "bigscience/bloom-560m", {"1b": os.path.join(models_path_prefix, "bigscience/bloomz-1b1")}
219
220
    ),
    "ChatGLMModel": _HfExamplesInfo(
221
        os.path.join(models_path_prefix, "zai-org/chatglm3-6b"), trust_remote_code=True, max_transformers_version="4.48"
222
223
    ),
    "ChatGLMForConditionalGeneration": _HfExamplesInfo(
224
        os.path.join(models_path_prefix, "thu-coai/ShieldLM-6B-chatglm3"),
225
226
227
        trust_remote_code=True,
    ),
    "CohereForCausalLM": _HfExamplesInfo(
228
        os.path.join(models_path_prefix, "CohereLabs/c4ai-command-r-v01"), trust_remote_code=True
229
230
    ),
    "Cohere2ForCausalLM": _HfExamplesInfo(
231
        os.path.join(models_path_prefix, "CohereLabs/c4ai-command-r7b-12-2024"),
232
233
        trust_remote_code=True,
    ),
234
    "CwmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/cwm"), min_transformers_version="4.58"),
235
236
    # FIXME: databricks/dbrx-instruct has been deleted
    "DbrxForCausalLM": _HfExamplesInfo(
237
        os.path.join(models_path_prefix, "databricks/dbrx-instruct"), is_available_online=False
238
    ),
239
    "DeciLMForCausalLM": _HfExamplesInfo(
240
        os.path.join(models_path_prefix, "nvidia/Llama-3_3-Nemotron-Super-49B-v1"),
241
242
        trust_remote_code=True,
    ),
243
    "DeepseekForCausalLM": _HfExamplesInfo(
244
        os.path.join(models_path_prefix, "deepseek-ai/deepseek-moe-16b-base"),
245
246
        trust_remote_code=True,
    ),
247
    "DeepseekV2ForCausalLM": _HfExamplesInfo(
248
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V2-Lite-Chat"),
249
250
251
        trust_remote_code=True,
    ),
    "DeepseekV3ForCausalLM": _HfExamplesInfo(
252
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3"),
253
254
        trust_remote_code=True,
    ),
255
    "DeepseekV32ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3.2-Exp")),
256
257
    "Ernie4_5ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baidu/ERNIE-4.5-0.3B-PT")),
    "Ernie4_5_MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT")),
258
    "ExaoneForCausalLM": _HfExamplesInfo(
259
260
261
        os.path.join(models_path_prefix, "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"), trust_remote_code=True
    ),
    "Exaone4ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "LGAI-EXAONE/EXAONE-4.0-32B")),
Kyungmin Lee's avatar
Kyungmin Lee committed
262
    "ExaoneMoEForCausalLM": _HfExamplesInfo(
263
        "LGAI-EXAONE/K-EXAONE-236B-A23B", min_transformers_version="5.1.0"
Kyungmin Lee's avatar
Kyungmin Lee committed
264
    ),
265
    "Fairseq2LlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mgleize/fairseq2-dummy-Llama-3.2-1B")),
266
    "FalconForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-7b")),
267
268
    "FalconH1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/Falcon-H1-0.5B-Base")),
    "FlexOlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Flex-reddit-2x7B-1T")),
269
    "GemmaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")),
270
    "Gemma2ForCausalLM": _HfExamplesInfo(
271
        "google/gemma-2-9b", extras={"tiny": os.path.join(models_path_prefix, "google/gemma-2-2b-it")}
272
    ),
273
    "Gemma3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-1b-it")),
274
    "Gemma3nForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3n-E2B-it")),
275
276
    "GlmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/glm-4-9b-chat-hf")),
    "Glm4ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4-9B-0414")),
277
    "Glm4MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.5")),
278
    "Glm4MoeLiteForCausalLM": _HfExamplesInfo(
279
        os.path.join(models_path_prefix, "zai-org/GLM-4.7-Flash"),
280
        min_transformers_version="5.0.0",
281
    ),
282
    "GPT2LMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "openai-community/gpt2"), {"alias": os.path.join(models_path_prefix, "gpt2")}),
283
    "GPTBigCodeForCausalLM": _HfExamplesInfo(
284
        os.path.join(models_path_prefix, "bigcode/starcoder"),
285
        extras={
286
287
            "tiny": os.path.join(models_path_prefix, "bigcode/tiny_starcoder_py"),
            "santacoder": os.path.join(models_path_prefix, "bigcode/gpt_bigcode-santacoder"),
288
        },
289
290
    ),
    "GPTJForCausalLM": _HfExamplesInfo(
291
        os.path.join(models_path_prefix, "Milos/slovak-gpt-j-405M"), {"6b": os.path.join(models_path_prefix, "EleutherAI/gpt-j-6b")}
292
293
    ),
    "GPTNeoXForCausalLM": _HfExamplesInfo(
294
        os.path.join(models_path_prefix, "EleutherAI/pythia-70m"), {"1b": os.path.join(models_path_prefix, "EleutherAI/pythia-1.4b")}
295
    ),
296
    "GptOssForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "lmsys/gpt-oss-20b-bf16")),
297
    "GraniteForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerLM-3b")),
298
    "GraniteMoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerMoE-3b")),
299
    "GraniteMoeHybridForCausalLM": _HfExamplesInfo(
300
        os.path.join(models_path_prefix, "ibm-granite/granite-4.0-tiny-preview")
301
302
    ),
    "GraniteMoeSharedForCausalLM": _HfExamplesInfo(
303
        os.path.join(models_path_prefix, "ibm-research/moe-7b-1b-active-shared-experts")
304
305
    ),
    "Grok1ModelForCausalLM": _HfExamplesInfo(
306
        os.path.join(models_path_prefix, "hpcai-tech/grok-1"), trust_remote_code=True
307
    ),
308
    "Grok1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "xai-org/grok-2"), trust_remote_code=True),
309
    "HunYuanDenseV1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tencent/Hunyuan-7B-Instruct")),
310
    "HunYuanMoEV1ForCausalLM": _HfExamplesInfo(
311
        os.path.join(models_path_prefix, "tencent/Hunyuan-A13B-Instruct"), trust_remote_code=True
312
313
    ),
    "InternLMForCausalLM": _HfExamplesInfo(
314
        os.path.join(models_path_prefix, "internlm/internlm-chat-7b"), trust_remote_code=True
315
316
    ),
    "InternLM2ForCausalLM": _HfExamplesInfo(
317
        os.path.join(models_path_prefix, "internlm/internlm2-chat-7b"), trust_remote_code=True
318
319
    ),
    "InternLM2VEForCausalLM": _HfExamplesInfo(
320
        os.path.join(models_path_prefix, "OpenGVLab/Mono-InternVL-2B"), trust_remote_code=True
321
322
    ),
    "InternLM3ForCausalLM": _HfExamplesInfo(
323
        os.path.join(models_path_prefix, "internlm/internlm3-8b-instruct"), trust_remote_code=True
324
    ),
325
    "JAISLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "inceptionai/jais-13b-chat")),
326
    "Jais2ForCausalLM": _HfExamplesInfo(
327
        os.path.join(models_path_prefix, "inceptionai/Jais-2-8B-Chat"), min_transformers_version="4.58"
328
    ),
329
    "IQuestCoderForCausalLM": _HfExamplesInfo(
330
        os.path.join(models_path_prefix, "IQuestLab/IQuest-Coder-V1-40B-Instruct"), trust_remote_code=True
331
332
    ),
    "IQuestLoopCoderForCausalLM": _HfExamplesInfo(
333
        os.path.join(models_path_prefix, "IQuestLab/IQuest-Coder-V1-40B-Loop-Instruct"), trust_remote_code=True
334
    ),
335
    "JAISLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "inceptionai/jais-13b-chat")),
336
    "Jais2ForCausalLM": _HfExamplesInfo(
337
        os.path.join(models_path_prefix, "inceptionai/Jais-2-8B-Chat"), min_transformers_version="4.58"
338
    ),
339
    "JambaForCausalLM": _HfExamplesInfo(
340
        os.path.join(models_path_prefix, "ai21labs/AI21-Jamba-1.5-Mini"),
341
        extras={
342
            "tiny": os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-dev"),
343
            "random": os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-random"),
344
345
        },
    ),
346
    "KimiLinearForCausalLM": _HfExamplesInfo(
347
        os.path.join(models_path_prefix, "moonshotai/Kimi-Linear-48B-A3B-Instruct"), trust_remote_code=True
348
    ),
349
    "Lfm2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "LiquidAI/LFM2-1.2B")),
Paul Pak's avatar
Paul Pak committed
350
    "Lfm2MoeForCausalLM": _HfExamplesInfo(
351
        os.path.join(models_path_prefix, "LiquidAI/LFM2-8B-A1B"), min_transformers_version="4.58"
Paul Pak's avatar
Paul Pak committed
352
    ),
353
    "LlamaForCausalLM": _HfExamplesInfo(
354
        os.path.join(models_path_prefix, "meta-llama/Llama-3.2-1B-Instruct"),
355
        extras={
356
357
358
359
            "guard": os.path.join(models_path_prefix, "meta-llama/Llama-Guard-3-1B"),
            "hermes": os.path.join(models_path_prefix, "NousResearch/Hermes-3-Llama-3.1-8B"),
            "fp8": os.path.join(models_path_prefix, "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8"),
            "tiny": os.path.join(models_path_prefix, "hmellor/tiny-random-LlamaForCausalLM"),
360
361
362
        },
    ),
    "LLaMAForCausalLM": _HfExamplesInfo(
363
        os.path.join(models_path_prefix, "decapoda-research/llama-7b-hf"), is_available_online=False
364
365
    ),
    "Llama4ForCausalLM": _HfExamplesInfo(
366
        os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
367
368
    ),
    "LongcatFlashForCausalLM": _HfExamplesInfo(
369
        os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat"), trust_remote_code=True
370
    ),
371
    "MambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "state-spaces/mamba-130m-hf")),
372
    "Mamba2ForCausalLM": _HfExamplesInfo(
373
        os.path.join(models_path_prefix, "mistralai/Mamba-Codestral-7B-v0.1"),
374
        extras={
375
            "random": os.path.join(models_path_prefix, "yujiepan/mamba2-codestral-v0.1-tiny-random"),
376
377
        },
    ),
378
    "FalconMambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-mamba-7b-instruct")),
379
    "MiniCPMForCausalLM": _HfExamplesInfo(
380
        os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"), trust_remote_code=True
381
382
    ),
    "MiniCPM3ForCausalLM": _HfExamplesInfo(
383
        os.path.join(models_path_prefix, "openbmb/MiniCPM3-4B"), trust_remote_code=True
384
    ),
385
    "MiniCPM4ForCausalLM": _HfExamplesInfo(
386
        os.path.join(models_path_prefix, "openbmb/MiniCPM4.1-8B"), trust_remote_code=True
387
    ),
388
    "MiniMaxForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01-hf")),
389
    "MiniMaxText01ForCausalLM": _HfExamplesInfo(
390
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01"),
391
392
393
394
        trust_remote_code=True,
        revision="a59aa9cbc53b9fb8742ca4e9e1531b9802b6fdc3",
    ),
    "MiniMaxM1ForCausalLM": _HfExamplesInfo(
395
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-M1-40k"), trust_remote_code=True
396
    ),
397
    "MiniMaxM2ForCausalLM": _HfExamplesInfo(
398
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-M2"),
youkaichao's avatar
youkaichao committed
399
        trust_remote_code=True,
400
    ),
zhuwenwen's avatar
zhuwenwen committed
401
    "MistralForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Mistral-7B-Instruct-v0.1")),
402
    "MistralLarge3ForCausalLM": _HfExamplesInfo(
403
        os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4")
404
    ),
405
    "MixtralForCausalLM": _HfExamplesInfo(
406
407
        os.path.join(models_path_prefix, "mistralai/Mixtral-8x7B-Instruct-v0.1"),
        {"tiny": os.path.join(models_path_prefix, "TitanML/tiny-mixtral")},
408
    ),
zhuwenwen's avatar
zhuwenwen committed
409
    "MptForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mpt"), is_available_online=False),
410
    # FIXME: mosaicml/mpt-7b has been deleted
411
    "MPTForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mosaicml/mpt-7b"), is_available_online=False),
zhuwenwen's avatar
zhuwenwen committed
412
    "NemotronForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/Minitron-8B-Base")),
413
    "NemotronHForCausalLM": _HfExamplesInfo(
414
        os.path.join(models_path_prefix, "nvidia/Nemotron-H-8B-Base-8K"), trust_remote_code=True
415
    ),
zhuwenwen's avatar
zhuwenwen committed
416
    "OlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-1B-hf")),
zhuwenwen's avatar
zhuwenwen committed
417
    "Olmo2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-2-0425-1B")),
418
    "Olmo3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Olmo-3-7B-Instruct")),
zhuwenwen's avatar
zhuwenwen committed
419
    "OlmoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924-Instruct")),
420
    "OpenPanguMTPModel": _HfExamplesInfo(
421
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1"),
422
423
424
        trust_remote_code=True,
        is_available_online=False,
    ),
425
    "OPTForCausalLM": _HfExamplesInfo(
426
        os.path.join(models_path_prefix, "facebook/opt-125m"), {"1b": os.path.join(models_path_prefix, "facebook/opt-iml-max-1.3b")}
427
428
    ),
    "OrionForCausalLM": _HfExamplesInfo(
429
        os.path.join(models_path_prefix, "OrionStarAI/Orion-14B-Chat"), trust_remote_code=True
430
    ),
431
    "OuroForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ByteDance/Ouro-1.4B"), trust_remote_code=True),
432
    "PanguEmbeddedForCausalLM": _HfExamplesInfo(
433
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Embedded-7B-V1.1"), trust_remote_code=True
434
    ),
435
436
437
438
439
    "PanguProMoEV2ForCausalLM": _HfExamplesInfo(
        "",
        trust_remote_code=True,
        is_available_online=False,
    ),
440
    "PanguUltraMoEForCausalLM": _HfExamplesInfo(
441
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1"),
442
443
444
        trust_remote_code=True,
        is_available_online=False,
    ),
445
446
447
    "PersimmonForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/persimmon-8b-chat")),
    "PhiForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/phi-2")),
    "Phi3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-3-mini-4k-instruct")),
448
    "PhiMoEForCausalLM": _HfExamplesInfo(
449
        os.path.join(models_path_prefix, "microsoft/Phi-3.5-MoE-instruct"), trust_remote_code=True
450
451
    ),
    "Plamo2ForCausalLM": _HfExamplesInfo(
452
        os.path.join(models_path_prefix, "pfnet/plamo-2-1b"),
453
454
        trust_remote_code=True,
    ),
455
    "Plamo3ForCausalLM": _HfExamplesInfo(
456
        os.path.join(models_path_prefix, "pfnet/plamo-3-nict-2b-base"),
457
458
        trust_remote_code=True,
    ),
459
    "QWenLMHeadModel": _HfExamplesInfo(
460
        os.path.join(models_path_prefix, "Qwen/Qwen-7B-Chat"),
461
        max_transformers_version="4.53",
462
463
464
        transformers_version_reason={
            "hf": "HF model uses remote code that is not compatible with latest Transformers"  # noqa: E501
        },
465
466
467
        trust_remote_code=True,
    ),
    "Qwen2ForCausalLM": _HfExamplesInfo(
468
        os.path.join(models_path_prefix, "Qwen/Qwen2-0.5B-Instruct"),
469
        extras={
470
471
            "2.5": os.path.join(models_path_prefix, "Qwen/Qwen2.5-0.5B-Instruct"),
            "2.5-1.5B": os.path.join(models_path_prefix, "Qwen/Qwen2.5-1.5B-Instruct"),
472
        },
473
    ),
474
475
476
    "Qwen2MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen1.5-MoE-A2.7B-Chat")),
    "Qwen3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-8B")),
    "Qwen3MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B")),
477
    "Qwen3NextForCausalLM": _HfExamplesInfo(
478
479
        os.path.join(models_path_prefix, "Qwen/Qwen3-Next-80B-A3B-Instruct"),
        extras={"tiny-random": os.path.join(models_path_prefix, "tiny-random/qwen3-next-moe")},
480
481
        min_transformers_version="4.56.3",
    ),
482
    "RWForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-40b")),
483
    "SeedOssForCausalLM": _HfExamplesInfo(
484
        os.path.join(models_path_prefix, "ByteDance-Seed/Seed-OSS-36B-Instruct"),
485
486
        trust_remote_code=True,
    ),
Li Xie's avatar
Li Xie committed
487
488
489
    "Step1ForCausalLM": _HfExamplesInfo(
        "stepfun-ai/Step-Audio-EditX", trust_remote_code=True
    ),
490
491
492
493
494
    "SmolLM3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "HuggingFaceTB/SmolLM3-3B")),
    "StableLMEpochForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-zephyr-3b")),
    "StableLmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-3b-4e1t")),
    "Starcoder2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "bigcode/starcoder2-3b")),
    "Step3TextForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stepfun-ai/step3"), trust_remote_code=True),
495
    "SolarForCausalLM": _HfExamplesInfo(
496
        os.path.join(models_path_prefix, "upstage/solar-pro-preview-instruct"), trust_remote_code=True
497
    ),
498
    "TeleChatForCausalLM": _HfExamplesInfo(
499
        os.path.join(models_path_prefix, "chuhac/TeleChat2-35B"), trust_remote_code=True
500
    ),
501
    "TeleChat2ForCausalLM": _HfExamplesInfo(
502
        os.path.join(models_path_prefix, "Tele-AI/TeleChat2-3B"), trust_remote_code=True
503
504
    ),
    "TeleFLMForCausalLM": _HfExamplesInfo(
505
        os.path.join(models_path_prefix, "CofeAI/FLM-2-52B-Instruct-2407"), trust_remote_code=True
506
507
    ),
    "XverseForCausalLM": _HfExamplesInfo(
508
509
        os.path.join(models_path_prefix, "xverse/XVERSE-7B-Chat"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-2-7b"),
510
511
        trust_remote_code=True,
    ),
zhuwenwen's avatar
zhuwenwen committed
512
    "Zamba2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Zyphra/Zamba2-7B-instruct")),
513
    "MiMoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"), trust_remote_code=True),
514
    "MiMoV2FlashForCausalLM": _HfExamplesInfo(
515
        os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-V2-Flash"), trust_remote_code=True
516
    ),
517
    "Dots1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "rednote-hilab/dots.llm1.inst")),
518
519
520
521
}

_EMBEDDING_EXAMPLE_MODELS = {
    # [Text-only]
522
    "BertModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5")),
523
    "BgeM3EmbeddingModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-m3")),
524
    "Gemma2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-multilingual-gemma2")),
525
    "Gemma3TextModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/embeddinggemma-300m")),
526
    "GritLM": _HfExamplesInfo(os.path.join(models_path_prefix, "parasail-ai/GritLM-7B-vllm")),
527
    "GteModel": _HfExamplesInfo(
528
        os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-embed-m-v2.0"), trust_remote_code=True
529
530
    ),
    "GteNewModel": _HfExamplesInfo(
531
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-base-en-v1.5"),
532
533
534
535
        trust_remote_code=True,
        hf_overrides={"architectures": ["GteNewModel"]},
    ),
    "InternLM2ForRewardModel": _HfExamplesInfo(
536
        os.path.join(models_path_prefix, "internlm/internlm2-1_8b-reward"), trust_remote_code=True
537
    ),
538
    "JambaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-reward-dev")),
539
    "LlamaModel": _HfExamplesInfo("llama", is_available_online=False),
540
    "LlamaBidirectionalModel": _HfExamplesInfo(
541
        os.path.join(models_path_prefix, "nvidia/llama-nemotron-embed-1b-v2"), trust_remote_code=True
542
    ),
543
    "MistralModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")),
544
    "ModernBertModel": _HfExamplesInfo(
545
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-modernbert-base"), trust_remote_code=True
546
547
    ),
    "NomicBertModel": _HfExamplesInfo(
548
        os.path.join(models_path_prefix, "nomic-ai/nomic-embed-text-v2-moe"), trust_remote_code=True
549
    ),
550
    "Qwen2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "ssmits/Qwen2-7B-Instruct-embed-base")),
551
    "Qwen2ForRewardModel": _HfExamplesInfo(
552
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-RM-72B"),
553
        max_transformers_version="4.53",
554
555
556
        transformers_version_reason={
            "hf": "HF model uses remote code that is not compatible with latest Transformers"  # noqa: E501
        },
557
558
    ),
    "Qwen2ForProcessRewardModel": _HfExamplesInfo(
559
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-PRM-7B"),
560
        max_transformers_version="4.53",
561
562
563
        transformers_version_reason={
            "hf": "HF model uses remote code that is not compatible with latest Transformers"  # noqa: E501
        },
564
    ),
565
566
567
    "RobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/stsb-roberta-base-v2")),
    "RobertaForMaskedLM": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/all-roberta-large-v1")),
    "XLMRobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/multilingual-e5-small")),
568
    "BertSpladeSparseEmbeddingModel": _HfExamplesInfo(
569
        os.path.join(models_path_prefix, "naver/splade-v3"),
570
        hf_overrides={"architectures": ["BertSpladeSparseEmbeddingModel"]},
571
    ),
572
    # [Multimodal]
573
    "CLIPModel": _HfExamplesInfo(os.path.join(models_path_prefix, "openai/clip-vit-base-patch32")),
zhuwenwen's avatar
zhuwenwen committed
574
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "royokong/e5-v")),
575
    "Phi3VForCausalLM": _HfExamplesInfo(
576
        os.path.join(models_path_prefix, "TIGER-Lab/VLM2Vec-Full"), trust_remote_code=True
577
    ),
578
579
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "MrLight/dse-qwen2-2b-mrl-v1")),
    "SiglipModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/siglip-base-patch16-224")),
580
    "PrithviGeoSpatialMAE": _HfExamplesInfo(
581
        os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"),
582
        dtype="float16",
583
        enforce_eager=True,
584
585
        require_embed_inputs=True,
        # This is to avoid the model going OOM in CI
586
587
588
        max_num_seqs=32,
    ),
    "Terratorch": _HfExamplesInfo(
589
        os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"),
590
        dtype="float16",
591
        enforce_eager=True,
592
        require_embed_inputs=True,
593
594
595
        # This is to avoid the model going OOM in CI
        max_num_seqs=32,
    ),
596
597
}

598
599
_SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS = {
    # [Decoder-only]
600
    "GPT2ForSequenceClassification": _HfExamplesInfo(
601
        os.path.join(models_path_prefix, "nie3e/sentiment-polish-gpt2-small")
602
    ),
603
    # [Cross-encoder]
604
    "BertForSequenceClassification": _HfExamplesInfo(
605
        os.path.join(models_path_prefix, "cross-encoder/ms-marco-MiniLM-L-6-v2")
606
    ),
607
    "BertForTokenClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "boltuix/NeuroBERT-NER")),
608
    "GteNewForSequenceClassification": _HfExamplesInfo(
609
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-multilingual-reranker-base"),
610
611
612
        trust_remote_code=True,
        hf_overrides={"architectures": ["GteNewForSequenceClassification"]},
    ),
613
614
615
    "LlamaBidirectionalForSequenceClassification": _HfExamplesInfo(
        "nvidia/llama-nemotron-rerank-1b-v2", trust_remote_code=True
    ),
616
    "ModernBertForSequenceClassification": _HfExamplesInfo(
617
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-reranker-modernbert-base")
618
    ),
619
    "ModernBertForTokenClassification": _HfExamplesInfo(
620
        os.path.join(models_path_prefix, "disham993/electrical-ner-ModernBERT-base")
621
    ),
622
    "RobertaForSequenceClassification": _HfExamplesInfo(
623
        os.path.join(models_path_prefix, "cross-encoder/quora-roberta-base")
624
    ),
625
    "XLMRobertaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-m3")),
626
627
}

628
629
_AUTOMATIC_CONVERTED_MODELS = {
    # Use as_seq_cls_model for automatic conversion
630
    "GemmaForSequenceClassification": _HfExamplesInfo(
631
        os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-gemma"),
632
633
634
635
636
637
638
        hf_overrides={
            "architectures": ["GemmaForSequenceClassification"],
            "classifier_from_token": ["Yes"],
            "method": "no_post_processing",
        },
    ),
    "LlamaForSequenceClassification": _HfExamplesInfo(
639
        os.path.join(models_path_prefix, "Skywork/Skywork-Reward-V2-Llama-3.2-1B")
640
    ),
641
    "Qwen2ForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "jason9693/Qwen2.5-1.5B-apeach")),
642
    "Qwen3ForSequenceClassification": _HfExamplesInfo(
643
        os.path.join(models_path_prefix, "tomaarsen/Qwen3-Reranker-0.6B-seq-cls")
644
    ),
645
    "Qwen3ForTokenClassification": _HfExamplesInfo("bd2lcco/Qwen3-0.6B-finetuned"),
646
647
648
649
650
651
652
653
654
    "Qwen3VLForSequenceClassification": _HfExamplesInfo(
        "Qwen/Qwen3-VL-Reranker-2B",
        is_available_online=False,
        hf_overrides={
            "architectures": ["Qwen3VLForSequenceClassification"],
            "classifier_from_token": ["no", "yes"],
            "is_original_qwen3_reranker": True,
        },
    ),
655
656
}

657
658
_MULTIMODAL_EXAMPLE_MODELS = {
    # [Decoder-only]
659
    "AriaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "rhymes-ai/Aria")),
660
    "AudioFlamingo3ForConditionalGeneration": _HfExamplesInfo(
661
        os.path.join(models_path_prefix, "nvidia/audio-flamingo-3-hf"), min_transformers_version="5.0.0"
662
    ),
663
664
    "AyaVisionForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "CohereLabs/aya-vision-8b")),
    "BagelForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "ByteDance-Seed/BAGEL-7B-MoT")),
665
    "BeeForConditionalGeneration": _HfExamplesInfo(
666
        os.path.join(models_path_prefix, "Open-Bee/Bee-8B-RL"),
667
668
        trust_remote_code=True,
    ),
669
    "Blip2ForConditionalGeneration": _HfExamplesInfo(
670
671
        os.path.join(models_path_prefix, "Salesforce/blip2-opt-2.7b"),
        extras={"6b": os.path.join(models_path_prefix, "Salesforce/blip2-opt-6.7b")},
672
    ),
673
    "ChameleonForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/chameleon-7b")),
674
    "Cohere2VisionForConditionalGeneration": _HfExamplesInfo(
675
        os.path.join(models_path_prefix, "CohereLabs/command-a-vision-07-2025")
676
677
    ),
    "DeepseekVLV2ForCausalLM": _HfExamplesInfo(
678
679
        os.path.join(models_path_prefix, "deepseek-ai/deepseek-vl2-tiny"),
        extras={"fork": os.path.join(models_path_prefix, "Isotr0py/deepseek-vl2-tiny")},
680
        max_transformers_version="4.48",
681
        transformers_version_reason={"hf": "HF model is not compatible."},
682
683
        hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]},
    ),
684
    "DeepseekOCRForCausalLM": _HfExamplesInfo(
685
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-OCR"),
686
    ),
687
    "DotsOCRForCausalLM": _HfExamplesInfo(
688
       os.path.join(models_path_prefix,  "rednote-hilab/dots.ocr"), trust_remote_code=True
689
    ),
690
    "Eagle2_5_VLForConditionalGeneration": _HfExamplesInfo(
691
        os.path.join(models_path_prefix, "nvidia/Eagle2.5-8B"), trust_remote_code=True, is_available_online=False
692
    ),
693
    "Emu3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/Emu3-Chat-hf")),
694
    "Ernie4_5_VLMoeForConditionalGeneration": _HfExamplesInfo(
695
        os.path.join(models_path_prefix, "baidu/ERNIE-4.5-VL-28B-A3B-PT"),
696
697
        trust_remote_code=True,
    ),
698
699
    "FuyuForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/fuyu-8b")),
    "Gemma3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-4b-it")),
700
    "Gemma3nForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3n-E2B-it")),
701
    "GlmAsrForConditionalGeneration": _HfExamplesInfo(
702
        os.path.join(models_path_prefix, "zai-org/GLM-ASR-Nano-2512"),
703
        trust_remote_code=True,
704
        min_transformers_version="5.0.0",
705
    ),
706
    "GraniteVision": _HfExamplesInfo("ibm-granite/granite-vision-3.3-2b"),
707
    "GraniteSpeechForConditionalGeneration": _HfExamplesInfo(
708
        os.path.join(models_path_prefix, "ibm-granite/granite-speech-3.3-2b")
709
710
    ),
    "GLM4VForCausalLM": _HfExamplesInfo(
711
        os.path.join(models_path_prefix, "zai-org/glm-4v-9b"),
712
713
714
        trust_remote_code=True,
        hf_overrides={"architectures": ["GLM4VForCausalLM"]},
    ),
715
716
    "Glm4vForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.1V-9B-Thinking")),
    "Glm4vMoeForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.5V")),
717
    "GlmOcrForConditionalGeneration": _HfExamplesInfo(
718
        os.path.join(models_path_prefix, "zai-org/GLM-OCR"),
719
        is_available_online=False,
720
        min_transformers_version="5.1.0",
721
    ),
722
    "H2OVLChatModel": _HfExamplesInfo(
723
        os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-800m"),
724
        trust_remote_code=True,
725
        extras={"2b": os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-2b")},
726
        max_transformers_version="4.48",
727
        transformers_version_reason={"hf": "HF model is not compatible."},
728
729
    ),
    "HCXVisionForCausalLM": _HfExamplesInfo(
730
        os.path.join(models_path_prefix, "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"),
731
732
        trust_remote_code=True,
    ),
733
    "HunYuanVLForConditionalGeneration": _HfExamplesInfo(
734
        os.path.join(models_path_prefix, "tencent/HunyuanOCR"),
735
        hf_overrides={"num_experts": 0},
736
    ),
737
    "Idefics3ForConditionalGeneration": _HfExamplesInfo(
738
739
        os.path.join(models_path_prefix, "HuggingFaceM4/Idefics3-8B-Llama3"),
        extras={"tiny": os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM-256M-Instruct")},
740
    ),
oscardev256's avatar
oscardev256 committed
741
742
743
    "IsaacForConditionalGeneration": _HfExamplesInfo(
        "PerceptronAI/Isaac-0.1",
        trust_remote_code=True,
744
        extras={"0.2-2B-Preview": "PerceptronAI/Isaac-0.2-2B-Preview"},
oscardev256's avatar
oscardev256 committed
745
    ),
746
    "InternS1ForConditionalGeneration": _HfExamplesInfo(
747
        os.path.join(models_path_prefix, "internlm/Intern-S1"), trust_remote_code=True
748
749
    ),
    "InternVLChatModel": _HfExamplesInfo(
750
        os.path.join(models_path_prefix, "OpenGVLab/InternVL2-1B"),
751
        extras={
752
753
754
755
756
            "2B": os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B"),
            "3.0": os.path.join(models_path_prefix, "OpenGVLab/InternVL3-1B"),
            "3.5-qwen3": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-1B"),
            "3.5-qwen3moe": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-30B-A3B"),
            "3.5-gptoss": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview"),
757
758
759
        },
        trust_remote_code=True,
    ),
760
    "InternVLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "OpenGVLab/InternVL3-1B-hf")),
761
    "KananaVForConditionalGeneration": _HfExamplesInfo(
762
        os.path.join(models_path_prefix, "kakaocorp/kanana-1.5-v-3b-instruct"),
763
764
        trust_remote_code=True,
    ),
765
    "KeyeForConditionalGeneration": _HfExamplesInfo(
766
        os.path.join(models_path_prefix, "Kwai-Keye/Keye-VL-8B-Preview"),
767
768
769
        trust_remote_code=True,
    ),
    "KeyeVL1_5ForConditionalGeneration": _HfExamplesInfo(
770
        os.path.join(models_path_prefix, "Kwai-Keye/Keye-VL-1_5-8B"),
771
772
773
        trust_remote_code=True,
    ),
    "KimiVLForConditionalGeneration": _HfExamplesInfo(
774
775
        os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Instruct"),
        extras={"thinking": os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Thinking")},
776
        trust_remote_code=True,
777
        max_transformers_version="4.53.3",
778
779
780
781
782
783
784
        transformers_version_reason={
            "hf": (
                "HF model uses deprecated transformers API "
                "(PytorchGELUTanh, DynamicCache.seen_tokens, and more). See: "
                "https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/discussions/31"
            )
        },
785
    ),
Roger Wang's avatar
Roger Wang committed
786
787
788
789
790
    "KimiK25ForConditionalGeneration": _HfExamplesInfo(
        "moonshotai/Kimi-K2.5",
        trust_remote_code=True,
        is_available_online=False,
    ),
791
    "LightOnOCRForConditionalGeneration": _HfExamplesInfo(
792
        os.path.join(models_path_prefix, "lightonai/LightOnOCR-1B-1025")
793
    ),
794
795
796
797
    "Lfm2VlForConditionalGeneration": _HfExamplesInfo(
        "LiquidAI/LFM2-VL-450M",
        min_transformers_version="5.0.0",
    ),
798
    "Llama4ForConditionalGeneration": _HfExamplesInfo(
799
        os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
800
        max_model_len=10240,
801
        extras={"llama-guard-4": os.path.join(models_path_prefix, "meta-llama/Llama-Guard-4-12B")},
802
803
    ),
    "LlavaForConditionalGeneration": _HfExamplesInfo(
804
        os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
805
        extras={
806
807
            "mistral": os.path.join(models_path_prefix, "mistral-community/pixtral-12b"),
            "mistral-fp8": os.path.join(models_path_prefix, "nm-testing/pixtral-12b-FP8-dynamic"),
808
809
810
        },
    ),
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(
811
        os.path.join(models_path_prefix, "llava-hf/llava-v1.6-mistral-7b-hf")
812
813
    ),
    "LlavaNextVideoForConditionalGeneration": _HfExamplesInfo(
814
        os.path.join(models_path_prefix, "llava-hf/LLaVA-NeXT-Video-7B-hf")
815
816
    ),
    "LlavaOnevisionForConditionalGeneration": _HfExamplesInfo(
817
        os.path.join(models_path_prefix, "llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
818
819
    ),
    "MantisForConditionalGeneration": _HfExamplesInfo(
820
        os.path.join(models_path_prefix, "TIGER-Lab/Mantis-8B-siglip-llama3"),
821
        max_transformers_version="4.48",
822
        transformers_version_reason={"hf": "HF model is not compatible."},
823
824
825
        hf_overrides={"architectures": ["MantisForConditionalGeneration"]},
    ),
    "MiDashengLMModel": _HfExamplesInfo(
826
        os.path.join(models_path_prefix, "mispeech/midashenglm-7b"), trust_remote_code=True
827
    ),
828
    "MiniCPMO": _HfExamplesInfo(os.path.join(models_path_prefix, "openbmb/MiniCPM-o-2_6"), trust_remote_code=True),
829
    "MiniCPMV": _HfExamplesInfo(
830
        os.path.join(models_path_prefix, "openbmb/MiniCPM-Llama3-V-2_5"),
831
        extras={
832
833
834
            "2.6": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-2_6"),
            "4.0": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-4"),
            "4.5": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-4_5"),
835
836
837
838
        },
        trust_remote_code=True,
    ),
    "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo(
839
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-VL-01"),
840
841
842
        trust_remote_code=True,
    ),
    "Mistral3ForConditionalGeneration": _HfExamplesInfo(
843
844
        os.path.join(models_path_prefix, "mistralai/Mistral-Small-3.1-24B-Instruct-2503"),
        extras={"fp8": os.path.join(models_path_prefix, "nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic")},
845
846
    ),
    "MolmoForCausalLM": _HfExamplesInfo(
847
        os.path.join(models_path_prefix, "allenai/Molmo-7B-D-0924"),
848
        max_transformers_version="4.48",
849
850
851
        transformers_version_reason={
            "vllm": "Incorrectly-detected `tensorflow` import from processor."
        },
852
        extras={"olmo": os.path.join(models_path_prefix, "allenai/Molmo-7B-O-0924")},
853
854
        trust_remote_code=True,
    ),
855
    "Molmo2ForConditionalGeneration": _HfExamplesInfo(
856
        os.path.join(models_path_prefix, "allenai/Molmo2-8B"),
857
858
859
860
861
862
        extras={"olmo": "allenai/Molmo2-O-7B"},
        min_transformers_version="4.51",
        trust_remote_code=True,
        # required by current PrefixLM implementation
        max_num_batched_tokens=31872,
    ),
863
    "NVLM_D": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/NVLM-D-72B"), trust_remote_code=True),
864
    "Llama_Nemotron_Nano_VL": _HfExamplesInfo(
865
        os.path.join(models_path_prefix, "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1"),
866
867
868
        trust_remote_code=True,
    ),
    "NemotronH_Nano_VL_V2": _HfExamplesInfo(
869
        os.path.join(models_path_prefix, "nano_vl_dummy"), is_available_online=False, trust_remote_code=True
870
    ),
Zero's avatar
Zero committed
871
    "OpenCUAForConditionalGeneration": _HfExamplesInfo(
872
        os.path.join(models_path_prefix, "xlangai/OpenCUA-7B"), trust_remote_code=True
Zero's avatar
Zero committed
873
    ),
874
    "Ovis": _HfExamplesInfo(
875
        os.path.join(models_path_prefix, "AIDC-AI/Ovis2-1B"),
876
877
        trust_remote_code=True,
        max_transformers_version="4.53",
878
        transformers_version_reason={"hf": "HF model is not compatible"},
879
        extras={
880
881
            "1.6-llama": os.path.join(models_path_prefix, "AIDC-AI/Ovis1.6-Llama3.2-3B"),
            "1.6-gemma": os.path.join(models_path_prefix, "AIDC-AI/Ovis1.6-Gemma2-9B"),
882
883
        },
    ),
884
    "Ovis2_5": _HfExamplesInfo(os.path.join(models_path_prefix, "AIDC-AI/Ovis2.5-2B"), trust_remote_code=True),
885
    "PaddleOCRVLForConditionalGeneration": _HfExamplesInfo(
886
       os.path.join(models_path_prefix,  "PaddlePaddle/PaddleOCR-VL"),
887
888
        trust_remote_code=True,
    ),
889
    "PaliGemmaForConditionalGeneration": _HfExamplesInfo(
890
891
        os.path.join(models_path_prefix, "google/paligemma-3b-mix-224"),
        extras={"v2": os.path.join(models_path_prefix, "google/paligemma2-3b-ft-docci-448")},
892
893
    ),
    "Phi3VForCausalLM": _HfExamplesInfo(
894
        os.path.join(models_path_prefix, "microsoft/Phi-3-vision-128k-instruct"),
895
896
        trust_remote_code=True,
        max_transformers_version="4.48",
897
898
899
        transformers_version_reason={
            "hf": "HF model use deprecated imports which have been removed."
        },  # noqa: E501
900
        extras={"phi3.5": os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")},
901
902
    ),
    "Phi4MMForCausalLM": _HfExamplesInfo(
903
        os.path.join(models_path_prefix, "microsoft/Phi-4-multimodal-instruct"), trust_remote_code=True
904
905
    ),
    "PixtralForConditionalGeneration": _HfExamplesInfo(
906
        os.path.join(models_path_prefix, "mistralai/Pixtral-12B-2409"),
907
        extras={
908
909
            "mistral-large-3": os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4"),
            "ministral-3": os.path.join(models_path_prefix, "mistralai/Ministral-3-3B-Instruct-2512"),
910
        },
911
912
913
        tokenizer_mode="mistral",
    ),
    "QwenVLForConditionalGeneration": _HfExamplesInfo(
914
915
        os.path.join(models_path_prefix, "Qwen/Qwen-VL"),
        extras={"chat": os.path.join(models_path_prefix, "Qwen/Qwen-VL-Chat")},
916
        trust_remote_code=True,
917
        max_transformers_version="4.53.3",
918
919
920
        transformers_version_reason={
            "hf": "HF model uses deprecated imports which have been removed."
        },  # noqa: E501
921
922
923
        hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]},
    ),
    "Qwen2AudioForConditionalGeneration": _HfExamplesInfo(
924
        os.path.join(models_path_prefix, "Qwen/Qwen2-Audio-7B-Instruct")
925
    ),
926
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2-VL-2B-Instruct")),
927
    "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo(
928
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-3B-Instruct"),
929
930
        max_model_len=4096,
    ),
zhuwenwen's avatar
zhuwenwen committed
931
    "Qwen2_5OmniModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-3B")),
932
    "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-7B-AWQ")),
933
    "Qwen3VLForConditionalGeneration": _HfExamplesInfo(
934
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-4B-Instruct"),
935
936
937
938
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
    "Qwen3VLMoeForConditionalGeneration": _HfExamplesInfo(
939
       os.path.join(models_path_prefix,  "Qwen/Qwen3-VL-30B-A3B-Instruct"),
940
941
942
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
943
    "Qwen3OmniMoeForConditionalGeneration": _HfExamplesInfo(
944
        os.path.join(models_path_prefix, "Qwen/Qwen3-Omni-30B-A3B-Instruct"),
945
946
947
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
948
    "RForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "YannQi/R-4B"), trust_remote_code=True),
949
    "SkyworkR1VChatModel": _HfExamplesInfo(
950
        os.path.join(models_path_prefix, "Skywork/Skywork-R1V-38B"), trust_remote_code=True
951
952
    ),
    "SmolVLMForConditionalGeneration": _HfExamplesInfo(
953
        os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM2-2.2B-Instruct")
954
955
    ),
    "Step3VLForConditionalGeneration": _HfExamplesInfo(
956
        os.path.join(models_path_prefix, "stepfun-ai/step3"), trust_remote_code=True
957
    ),
ltd0924's avatar
ltd0924 committed
958
959
960
    "StepVLForConditionalGeneration": _HfExamplesInfo(
        "stepfun-ai/Step3-VL-10B", trust_remote_code=True
    ),
961
    "UltravoxModel": _HfExamplesInfo(
962
        os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_5-llama-3_2-1b"),
963
964
        trust_remote_code=True,
    ),
965
    "TarsierForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "omni-research/Tarsier-7b")),
966
    "Tarsier2ForConditionalGeneration": _HfExamplesInfo(
967
        os.path.join(models_path_prefix, "omni-research/Tarsier2-Recap-7b"),
968
        hf_overrides={
969
            "architectures": [os.path.join(models_path_prefix, "Tarsier2ForConditionalGeneration")],
970
971
            "model_type": "tarsier2",
        },
972
    ),
973
974
975
976
977
    "VoxtralForConditionalGeneration": _HfExamplesInfo(
        "mistralai/Voxtral-Mini-3B-2507",
        # disable this temporarily until we support HF format
        is_available_online=False,
    ),
Patrick von Platen's avatar
Patrick von Platen committed
978
979
980
981
982
    "VoxtralStreamingGeneration": _HfExamplesInfo(
        "<place-holder>",
        # disable this temporarily until we support HF format
        is_available_online=False,
    ),
983
    # [Encoder-decoder]
984
985
986
    "NemotronParseForConditionalGeneration": _HfExamplesInfo(
        "nvidia/NVIDIA-Nemotron-Parse-v1.1", trust_remote_code=True
    ),
987
    "WhisperForConditionalGeneration": _HfExamplesInfo(
988
989
        os.path.join(models_path_prefix, "openai/whisper-large-v3-turbo"),
        extras={"v3": os.path.join(models_path_prefix, "openai/whisper-large-v3")},
990
    ),
991
    # [Cross-encoder]
992
    "JinaVLForRanking": _HfExamplesInfo(os.path.join(models_path_prefix, "jinaai/jina-reranker-m0")),
993
994
}

995

996
_SPECULATIVE_DECODING_EXAMPLE_MODELS = {
997
    "MedusaModel": _HfExamplesInfo(
998
        os.path.join(models_path_prefix, "JackFram/llama-68m"), speculative_model=os.path.join(models_path_prefix, "abhigoyal/vllm-medusa-llama-68m-random")
999
    ),
1000
1001
    # Temporarily disabled.
    # TODO(woosuk): Re-enable this once the MLP Speculator is supported in V1.
1002
1003
1004
1005
1006
    # "MLPSpeculatorPreTrainedModel": _HfExamplesInfo(
    #     "JackFram/llama-160m",
    #     speculative_model="ibm-ai-platform/llama-160m-accelerator"
    # ),
    "DeepSeekMTPModel": _HfExamplesInfo(
1007
1008
        os.path.join(models_path_prefix, "luccafong/deepseek_mtp_main_random"),
        speculative_model=os.path.join(models_path_prefix, "luccafong/deepseek_mtp_draft_random"),
1009
1010
1011
        trust_remote_code=True,
    ),
    "EagleDeepSeekMTPModel": _HfExamplesInfo(
1012
1013
        os.path.join(models_path_prefix, "eagle618/deepseek-v3-random"),
        speculative_model=os.path.join(models_path_prefix, "eagle618/eagle-deepseek-v3-random"),
1014
1015
1016
        trust_remote_code=True,
    ),
    "EagleLlamaForCausalLM": _HfExamplesInfo(
1017
       os.path.join(models_path_prefix,  "meta-llama/Meta-Llama-3-8B-Instruct"),
1018
        trust_remote_code=True,
1019
1020
        speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE-LLaMA3-Instruct-8B"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct"),
1021
1022
    ),
    "Eagle3LlamaForCausalLM": _HfExamplesInfo(
1023
        os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct"),
1024
        trust_remote_code=True,
1025
1026
        speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct"),
1027
1028
1029
        use_original_num_layers=True,
        max_model_len=10240,
    ),
1030
    "EagleMistralLarge3ForCausalLM": _HfExamplesInfo(
1031
        os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512"),
1032
        speculative_model=os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-Eagle"),
1033
        # TODO: revert once figuring out OOM in CI
1034
1035
        is_available_online=False,
    ),
1036
    "LlamaForCausalLMEagle3": _HfExamplesInfo(
1037
        os.path.join(models_path_prefix, "Qwen/Qwen3-8B"),
1038
        trust_remote_code=True,
1039
1040
        speculative_model=os.path.join(models_path_prefix, "AngelSlim/Qwen3-8B_eagle3"),
        tokenizer=os.path.join(models_path_prefix, "Qwen/Qwen3-8B"),
1041
1042
        use_original_num_layers=True,
    ),
zhiweiz's avatar
zhiweiz committed
1043
    "EagleLlama4ForCausalLM": _HfExamplesInfo(
1044
        os.path.join(models_path_prefix, "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct"),
zhiweiz's avatar
zhiweiz committed
1045
        trust_remote_code=True,
1046
        speculative_model=os.path.join(models_path_prefix, "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct"),
1047
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
1048
1049
    ),
    "EagleMiniCPMForCausalLM": _HfExamplesInfo(
1050
        os.path.join(models_path_prefix, "openbmb/MiniCPM-1B-sft-bf16"),
1051
        trust_remote_code=True,
1052
1053
1054
        speculative_model=os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"),
        speculative_method=os.path.join(models_path_prefix, "eagle"),
        tokenizer=os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"),
1055
1056
    ),
    "ErnieMTPModel": _HfExamplesInfo(
1057
        os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT"),
1058
        trust_remote_code=True,
1059
        speculative_model=os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT"),
1060
    ),
Kyungmin Lee's avatar
Kyungmin Lee committed
1061
1062
1063
    "ExaoneMoeMTP": _HfExamplesInfo(
        "LGAI-EXAONE/K-EXAONE-236B-A23B",
        speculative_model="LGAI-EXAONE/K-EXAONE-236B-A23B",
1064
        min_transformers_version="5.1.0",
Kyungmin Lee's avatar
Kyungmin Lee committed
1065
    ),
1066
    "Glm4MoeMTPModel": _HfExamplesInfo(
1067
        os.path.join(models_path_prefix, "zai-org/GLM-4.5"),
1068
1069
        speculative_model="zai-org/GLM-4.5",
    ),
1070
1071
1072
    "Glm4MoeLiteMTPModel": _HfExamplesInfo(
        "zai-org/GLM-4.7-Flash",
        speculative_model="zai-org/GLM-4.7-Flash",
1073
        min_transformers_version="5.0.0",
1074
1075
1076
1077
    ),
    "GlmOcrMTPModel": _HfExamplesInfo(
        "zai-org/GLM-OCR",
        speculative_model="zai-org/GLM-OCR",
1078
        is_available_online=False,
1079
        min_transformers_version="5.1.0",
1080
    ),
XuruiYang's avatar
XuruiYang committed
1081
    "LongCatFlashMTPModel": _HfExamplesInfo(
1082
        os.path.join(models_path_prefix, os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat")),
XuruiYang's avatar
XuruiYang committed
1083
        trust_remote_code=True,
1084
        speculative_model=os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat"),
1085
1086
    ),
    "MiMoMTPModel": _HfExamplesInfo(
1087
        os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"),
1088
        trust_remote_code=True,
1089
        speculative_model=os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"),
1090
    ),
1091
    "Eagle3Qwen2_5vlForCausalLM": _HfExamplesInfo(
1092
1093
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-7B-Instruct"),
        speculative_model=os.path.join(models_path_prefix, "Rayzl/qwen2.5-vl-7b-eagle3-sgl"),
1094
    ),
1095
    "Eagle3Qwen3vlForCausalLM": _HfExamplesInfo(
1096
1097
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-8B-Instruct"),
        speculative_model=os.path.join(models_path_prefix, "taobao-mnn/Qwen3-VL-8B-Instruct-Eagle3"),
1098
    ),
1099
    "Qwen3NextMTP": _HfExamplesInfo(
1100
        os.path.join(models_path_prefix, "Qwen/Qwen3-Next-80B-A3B-Instruct"), min_transformers_version="4.56.3"
1101
    ),
1102
1103
}

1104
_TRANSFORMERS_BACKEND_MODELS = {
1105
    "TransformersEmbeddingModel": _HfExamplesInfo(
1106
        os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5"), min_transformers_version="5.0.0"
1107
1108
    ),
    "TransformersForSequenceClassification": _HfExamplesInfo(
1109
        os.path.join(models_path_prefix, "papluca/xlm-roberta-base-language-detection"),
1110
        min_transformers_version="5.0.0",
1111
1112
    ),
    "TransformersForCausalLM": _HfExamplesInfo(
1113
        os.path.join(models_path_prefix, "hmellor/Ilama-3.2-1B"), trust_remote_code=True
1114
    ),
1115
    "TransformersMultiModalForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/Emu3-Chat-hf")),
1116
    "TransformersMoEForCausalLM": _HfExamplesInfo(
1117
        os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924"), min_transformers_version="5.0.0"
1118
    ),
1119
    "TransformersMultiModalMoEForCausalLM": _HfExamplesInfo(
1120
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-30B-A3B-Instruct"), min_transformers_version="5.0.0"
1121
1122
    ),
    "TransformersMoEEmbeddingModel": _HfExamplesInfo(
1123
        os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B"), min_transformers_version="5.0.0"
1124
1125
    ),
    "TransformersMoEForSequenceClassification": _HfExamplesInfo(
1126
        os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B"), min_transformers_version="5.0.0"
1127
    ),
1128
    "TransformersMultiModalEmbeddingModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-4b-it")),
1129
    "TransformersMultiModalForSequenceClassification": _HfExamplesInfo(
1130
        os.path.join(models_path_prefix, "google/gemma-3-4b-it")
1131
    ),
1132
1133
}

1134
1135
1136
_EXAMPLE_MODELS = {
    **_TEXT_GENERATION_EXAMPLE_MODELS,
    **_EMBEDDING_EXAMPLE_MODELS,
1137
    **_SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS,
1138
1139
    **_MULTIMODAL_EXAMPLE_MODELS,
    **_SPECULATIVE_DECODING_EXAMPLE_MODELS,
1140
    **_TRANSFORMERS_BACKEND_MODELS,
1141
1142
1143
1144
1145
1146
1147
1148
1149
}


class HfExampleModels:
    def __init__(self, hf_models: Mapping[str, _HfExamplesInfo]) -> None:
        super().__init__()

        self.hf_models = hf_models

1150
    def get_supported_archs(self) -> Set[str]:
1151
1152
1153
        return self.hf_models.keys()

    def get_hf_info(self, model_arch: str) -> _HfExamplesInfo:
1154
1155
1156
        try:
            return self.hf_models[model_arch]
        except KeyError:
1157
1158
1159
            raise ValueError(
                f"No example model defined for {model_arch}; please update this file."
            ) from None
1160

1161
1162
1163
1164
1165
    def find_hf_info(self, model_id: str) -> _HfExamplesInfo:
        for info in self.hf_models.values():
            if info.default == model_id:
                return info

1166
1167
1168
1169
1170
        # Fallback to extras
        for info in self.hf_models.values():
            if any(extra == model_id for extra in info.extras.values()):
                return info

1171
1172
1173
        raise ValueError(
            f"No example model defined for {model_id}; please update this file."
        )
1174

1175

Patrick von Platen's avatar
Patrick von Platen committed
1176
HF_EXAMPLE_MODELS = HfExampleModels(_EXAMPLE_MODELS)
1177
AUTO_EXAMPLE_MODELS = HfExampleModels(_AUTOMATIC_CONVERTED_MODELS)