registry.py 57.5 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
from collections.abc import Mapping, Set
5
from dataclasses import dataclass, field
6
from typing import Any, Literal
7

zhuwenwen's avatar
zhuwenwen committed
8
import os
9
10
11
import pytest
from packaging.version import Version
from transformers import __version__ as TRANSFORMERS_VERSION
zhuwenwen's avatar
zhuwenwen committed
12
# from ..utils import models_path_prefix
13

zhuwenwen's avatar
zhuwenwen committed
14
models_path_prefix = os.getenv('VLLM_OPTEST_MODELS_PATH') or os.getenv("OPTEST_MODELS_PATH")
15

16
from vllm.config.model import ModelDType, TokenizerMode
17

zhuwenwen's avatar
zhuwenwen committed
18

19
20
21
22
23
24
25
26
@dataclass(frozen=True)
class _HfExamplesInfo:
    default: str
    """The default model to use for testing this architecture."""

    extras: Mapping[str, str] = field(default_factory=dict)
    """Extra models to use for testing this architecture."""

27
    tokenizer: str | None = None
28
29
    """Set the tokenizer to load for this architecture."""

30
    tokenizer_mode: TokenizerMode | str = "auto"
31
32
    """Set the tokenizer type for this architecture."""

33
    speculative_model: str | None = None
34
35
36
37
38
    """
    The default model to use for testing this architecture, which is only used
    for speculative decoding.
    """

39
40
41
42
43
    speculative_method: str | None = None
    """
    The method to use for speculative decoding.
    """

44
    min_transformers_version: str | None = None
45
46
47
48
    """
    The minimum version of HF Transformers that is required to run this model.
    """

49
    max_transformers_version: str | None = None
50
51
52
53
    """
    The maximum version of HF Transformers that this model runs on.
    """

54
    transformers_version_reason: dict[Literal["vllm", "hf"], str] | None = None
55
    """
56
57
58
    The type and reason to skip test for the minimum/maximum version requirement.
    vllm: skip all vLLM tests if the version requirement is not met.
    hf: only skip tests that uses HF runner if the version requirement is not met.
59
60
    """

61
    require_embed_inputs: bool = False
62
    """
63
64
    If `True`, enables prompt and multi-modal embedding inputs while
    disabling tokenization.
65
66
67
68
69
70
71
72
73
74
75
76
77
78
    """

    dtype: ModelDType = "auto"
    """
    The data type for the model weights and activations.
    """

    enforce_eager: bool = False
    """
    Whether to enforce eager execution. If True, we will
    disable CUDA graph and always execute the model in eager mode.
    If False, we will use CUDA graph and eager execution in hybrid.
    """

79
80
    is_available_online: bool = True
    """
81
    Set this to `False` if the name of this architecture no longer exists on
82
83
84
85
86
87
    the HF repo. To maintain backwards compatibility, we have not removed them
    from the main model registry, so without this flag the registry tests will
    fail.
    """

    trust_remote_code: bool = False
88
    """The `trust_remote_code` level required to load the model."""
89

90
    hf_overrides: dict[str, Any] = field(default_factory=dict)
91
    """The `hf_overrides` required to load the model."""
92

93
    max_model_len: int | None = None
94
95
96
97
98
    """
    The maximum model length to use for this model. Some models default to a
    length that is too large to fit into memory in CI.
    """

99
100
101
102
103
    max_num_batched_tokens: int | None = None
    """
    The maximum number of tokens to be processed in a single batch.
    """

104
    revision: str | None = None
105
106
107
108
109
    """
    The specific revision (commit hash, tag, or branch) to use for the model.
    If not specified, the default revision will be used.
    """

110
    max_num_seqs: int | None = None
111
112
    """Maximum number of sequences to be processed in a single iteration."""

113
114
    use_original_num_layers: bool = False
    """
Rayyyyy's avatar
Rayyyyy committed
115
    If True, use the original number of layers from the model config
116
117
118
    instead of minimal layers for testing.
    """

119
120
121
    def check_transformers_version(
        self,
        *,
122
        on_fail: Literal["error", "skip", "return"],
123
        check_version_reason: Literal["vllm", "hf"] = "hf",
124
125
        check_min_version: bool = True,
        check_max_version: bool = True,
126
    ) -> str | None:
127
128
129
130
        """
        If the installed transformers version does not meet the requirements,
        perform the given action.
        """
131
132
133
134
        if (
            self.min_transformers_version is None
            and self.max_transformers_version is None
        ):
135
            return None
136
137

        current_version = TRANSFORMERS_VERSION
138
        cur_base_version = Version(current_version).base_version
139
140
141
        min_version = self.min_transformers_version
        max_version = self.max_transformers_version
        msg = f"`transformers=={current_version}` installed, but `transformers"
142
143
        # Only check the base version for the min/max version, otherwise preview
        # models cannot be run because `x.yy.0.dev0`<`x.yy.0`
144
145
        if min_version and Version(cur_base_version) < Version(min_version):
            is_version_valid = not check_min_version
146
            msg += f">={min_version}` is required to run this model."
147
148
        elif max_version and Version(cur_base_version) > Version(max_version):
            is_version_valid = not check_max_version
149
150
            msg += f"<={max_version}` is required to run this model."
        else:
151
            is_version_valid = True
152

153
154
155
156
157
158
159
160
161
        # check if Transformers version breaks the corresponding model runner,
        # skip test when model runner not compatible
        is_reason_valid = not (
            check_version_reason
            and self.transformers_version_reason
            and check_version_reason in self.transformers_version_reason
        )
        is_transformers_valid = is_version_valid and is_reason_valid
        if is_transformers_valid:
162
            return None
163
164
165
        elif self.transformers_version_reason:
            for reason_type, reason in self.transformers_version_reason.items():
                msg += f" Reason({reason_type}): {reason}"
166
167
168

        if on_fail == "error":
            raise RuntimeError(msg)
169
        elif on_fail == "skip":
170
            pytest.skip(msg)
171

172
173
        return msg

174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
    def check_available_online(
        self,
        *,
        on_fail: Literal["error", "skip"],
    ) -> None:
        """
        If the model is not available online, perform the given action.
        """
        if not self.is_available_online:
            msg = "Model is not available online"

            if on_fail == "error":
                raise RuntimeError(msg)
            else:
                pytest.skip(msg)

190
191
192

_TEXT_GENERATION_EXAMPLE_MODELS = {
    # [Decoder-only]
193
    "AfmoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "arcee-ai/Trinity-Nano-Preview")),
194
195
196
    "ApertusForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "swiss-ai/Apertus-8B-Instruct-2509")),
    "AquilaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat-7B"), trust_remote_code=True),
    "AquilaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat2-7B"), trust_remote_code=True),
197
    "ArceeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "arcee-ai/AFM-4.5B-Base")),
198
    "ArcticForCausalLM": _HfExamplesInfo(
199
        os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-instruct"), trust_remote_code=True
200
201
    ),
    "BaiChuanForCausalLM": _HfExamplesInfo(
202
        os.path.join(models_path_prefix, "baichuan-inc/Baichuan-7B"), trust_remote_code=True
203
204
    ),
    "BaichuanForCausalLM": _HfExamplesInfo(
205
        os.path.join(models_path_prefix, "baichuan-inc/Baichuan2-7B-chat"), trust_remote_code=True
206
207
    ),
    "BailingMoeForCausalLM": _HfExamplesInfo(
208
        os.path.join(models_path_prefix, "inclusionAI/Ling-lite-1.5"), trust_remote_code=True
209
210
    ),
    "BailingMoeV2ForCausalLM": _HfExamplesInfo(
211
        os.path.join(models_path_prefix, "inclusionAI/Ling-mini-2.0"), trust_remote_code=True
212
213
    ),
    "BambaForCausalLM": _HfExamplesInfo(
214
215
        os.path.join(models_path_prefix, "ibm-ai-platform/Bamba-9B-v1"),
        extras={"tiny": os.path.join(models_path_prefix, "hmellor/tiny-random-BambaForCausalLM")},
216
217
    ),
    "BloomForCausalLM": _HfExamplesInfo(
218
        "bigscience/bloom-560m", {"1b": os.path.join(models_path_prefix, "bigscience/bloomz-1b1")}
219
220
    ),
    "ChatGLMModel": _HfExamplesInfo(
221
        os.path.join(models_path_prefix, "zai-org/chatglm3-6b"), trust_remote_code=True, max_transformers_version="4.48"
222
223
    ),
    "ChatGLMForConditionalGeneration": _HfExamplesInfo(
224
        os.path.join(models_path_prefix, "thu-coai/ShieldLM-6B-chatglm3"),
225
226
227
        trust_remote_code=True,
    ),
    "CohereForCausalLM": _HfExamplesInfo(
228
        os.path.join(models_path_prefix, "CohereLabs/c4ai-command-r-v01"), trust_remote_code=True
229
230
    ),
    "Cohere2ForCausalLM": _HfExamplesInfo(
231
        os.path.join(models_path_prefix, "CohereLabs/c4ai-command-r7b-12-2024"),
232
233
        trust_remote_code=True,
    ),
234
    "CwmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/cwm"), min_transformers_version="4.58"),
235
236
    # FIXME: databricks/dbrx-instruct has been deleted
    "DbrxForCausalLM": _HfExamplesInfo(
237
        os.path.join(models_path_prefix, "databricks/dbrx-instruct"), is_available_online=False
238
    ),
239
    "DeciLMForCausalLM": _HfExamplesInfo(
240
        os.path.join(models_path_prefix, "nvidia/Llama-3_3-Nemotron-Super-49B-v1"),
241
242
        trust_remote_code=True,
    ),
243
    "DeepseekForCausalLM": _HfExamplesInfo(
244
        os.path.join(models_path_prefix, "deepseek-ai/deepseek-moe-16b-base"),
245
246
        trust_remote_code=True,
    ),
247
    "DeepseekV2ForCausalLM": _HfExamplesInfo(
248
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V2-Lite-Chat"),
249
250
251
        trust_remote_code=True,
    ),
    "DeepseekV3ForCausalLM": _HfExamplesInfo(
252
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3"),
253
254
        trust_remote_code=True,
    ),
255
    "DeepseekV32ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3.2-Exp")),
256
257
    "Ernie4_5ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baidu/ERNIE-4.5-0.3B-PT")),
    "Ernie4_5_MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT")),
258
    "ExaoneForCausalLM": _HfExamplesInfo(
259
260
261
        os.path.join(models_path_prefix, "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"), trust_remote_code=True
    ),
    "Exaone4ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "LGAI-EXAONE/EXAONE-4.0-32B")),
Kyungmin Lee's avatar
Kyungmin Lee committed
262
    "ExaoneMoEForCausalLM": _HfExamplesInfo(
263
        "LGAI-EXAONE/K-EXAONE-236B-A23B", min_transformers_version="5.1.0"
Kyungmin Lee's avatar
Kyungmin Lee committed
264
    ),
265
    "Fairseq2LlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mgleize/fairseq2-dummy-Llama-3.2-1B")),
266
    "FalconForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-7b")),
267
268
    "FalconH1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/Falcon-H1-0.5B-Base")),
    "FlexOlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Flex-reddit-2x7B-1T")),
269
    "GemmaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")),
270
    "Gemma2ForCausalLM": _HfExamplesInfo(
271
        "google/gemma-2-9b", extras={"tiny": os.path.join(models_path_prefix, "google/gemma-2-2b-it")}
272
    ),
273
    "Gemma3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-1b-it")),
274
    "Gemma3nForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3n-E2B-it")),
275
276
    "GlmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/glm-4-9b-chat-hf")),
    "Glm4ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4-9B-0414")),
277
    "Glm4MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.5")),
278
    "Glm4MoeLiteForCausalLM": _HfExamplesInfo(
279
        os.path.join(models_path_prefix, "zai-org/GLM-4.7-Flash"),
280
        min_transformers_version="5.0.0",
281
    ),
zhuwenwen's avatar
zhuwenwen committed
282
283
284
    "GlmMoeDsaForCausalLM": _HfExamplesInfo(
        "zai-org/GLM-5", min_transformers_version="5.0.1", is_available_online=False
    ),
285
    "GPT2LMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "openai-community/gpt2"), {"alias": os.path.join(models_path_prefix, "gpt2")}),
286
    "GPTBigCodeForCausalLM": _HfExamplesInfo(
287
        os.path.join(models_path_prefix, "bigcode/starcoder"),
288
        extras={
289
290
            "tiny": os.path.join(models_path_prefix, "bigcode/tiny_starcoder_py"),
            "santacoder": os.path.join(models_path_prefix, "bigcode/gpt_bigcode-santacoder"),
291
        },
292
293
    ),
    "GPTJForCausalLM": _HfExamplesInfo(
294
        os.path.join(models_path_prefix, "Milos/slovak-gpt-j-405M"), {"6b": os.path.join(models_path_prefix, "EleutherAI/gpt-j-6b")}
295
296
    ),
    "GPTNeoXForCausalLM": _HfExamplesInfo(
297
        os.path.join(models_path_prefix, "EleutherAI/pythia-70m"), {"1b": os.path.join(models_path_prefix, "EleutherAI/pythia-1.4b")}
298
    ),
299
    "GptOssForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "lmsys/gpt-oss-20b-bf16")),
300
    "GraniteForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerLM-3b")),
301
    "GraniteMoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerMoE-3b")),
302
    "GraniteMoeHybridForCausalLM": _HfExamplesInfo(
303
        os.path.join(models_path_prefix, "ibm-granite/granite-4.0-tiny-preview")
304
305
    ),
    "GraniteMoeSharedForCausalLM": _HfExamplesInfo(
306
        os.path.join(models_path_prefix, "ibm-research/moe-7b-1b-active-shared-experts")
307
308
    ),
    "Grok1ModelForCausalLM": _HfExamplesInfo(
309
        os.path.join(models_path_prefix, "hpcai-tech/grok-1"), trust_remote_code=True
310
    ),
311
    "Grok1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "xai-org/grok-2"), trust_remote_code=True),
312
    "HunYuanDenseV1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tencent/Hunyuan-7B-Instruct")),
313
    "HunYuanMoEV1ForCausalLM": _HfExamplesInfo(
314
        os.path.join(models_path_prefix, "tencent/Hunyuan-A13B-Instruct"), trust_remote_code=True
315
316
    ),
    "InternLMForCausalLM": _HfExamplesInfo(
317
        os.path.join(models_path_prefix, "internlm/internlm-chat-7b"), trust_remote_code=True
318
319
    ),
    "InternLM2ForCausalLM": _HfExamplesInfo(
320
        os.path.join(models_path_prefix, "internlm/internlm2-chat-7b"), trust_remote_code=True
321
322
    ),
    "InternLM2VEForCausalLM": _HfExamplesInfo(
323
        os.path.join(models_path_prefix, "OpenGVLab/Mono-InternVL-2B"), trust_remote_code=True
324
325
    ),
    "InternLM3ForCausalLM": _HfExamplesInfo(
326
        os.path.join(models_path_prefix, "internlm/internlm3-8b-instruct"), trust_remote_code=True
327
    ),
328
    "JAISLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "inceptionai/jais-13b-chat")),
329
    "Jais2ForCausalLM": _HfExamplesInfo(
330
        os.path.join(models_path_prefix, "inceptionai/Jais-2-8B-Chat"), min_transformers_version="4.58"
331
    ),
332
    "IQuestCoderForCausalLM": _HfExamplesInfo(
333
        os.path.join(models_path_prefix, "IQuestLab/IQuest-Coder-V1-40B-Instruct"), trust_remote_code=True
334
335
    ),
    "IQuestLoopCoderForCausalLM": _HfExamplesInfo(
336
        os.path.join(models_path_prefix, "IQuestLab/IQuest-Coder-V1-40B-Loop-Instruct"), trust_remote_code=True
337
    ),
338
    "JAISLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "inceptionai/jais-13b-chat")),
339
    "Jais2ForCausalLM": _HfExamplesInfo(
340
        os.path.join(models_path_prefix, "inceptionai/Jais-2-8B-Chat"), min_transformers_version="4.58"
341
    ),
342
    "JambaForCausalLM": _HfExamplesInfo(
343
        os.path.join(models_path_prefix, "ai21labs/AI21-Jamba-1.5-Mini"),
344
        extras={
345
            "tiny": os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-dev"),
346
            "random": os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-random"),
347
348
        },
    ),
349
    "KimiLinearForCausalLM": _HfExamplesInfo(
350
        os.path.join(models_path_prefix, "moonshotai/Kimi-Linear-48B-A3B-Instruct"), trust_remote_code=True
351
    ),
352
    "Lfm2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "LiquidAI/LFM2-1.2B")),
Paul Pak's avatar
Paul Pak committed
353
    "Lfm2MoeForCausalLM": _HfExamplesInfo(
354
        os.path.join(models_path_prefix, "LiquidAI/LFM2-8B-A1B"), min_transformers_version="4.58"
Paul Pak's avatar
Paul Pak committed
355
    ),
356
    "LlamaForCausalLM": _HfExamplesInfo(
357
        os.path.join(models_path_prefix, "meta-llama/Llama-3.2-1B-Instruct"),
358
        extras={
359
360
361
362
            "guard": os.path.join(models_path_prefix, "meta-llama/Llama-Guard-3-1B"),
            "hermes": os.path.join(models_path_prefix, "NousResearch/Hermes-3-Llama-3.1-8B"),
            "fp8": os.path.join(models_path_prefix, "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8"),
            "tiny": os.path.join(models_path_prefix, "hmellor/tiny-random-LlamaForCausalLM"),
363
364
365
        },
    ),
    "LLaMAForCausalLM": _HfExamplesInfo(
366
        os.path.join(models_path_prefix, "decapoda-research/llama-7b-hf"), is_available_online=False
367
368
    ),
    "Llama4ForCausalLM": _HfExamplesInfo(
369
        os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
370
371
    ),
    "LongcatFlashForCausalLM": _HfExamplesInfo(
372
        os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat"), trust_remote_code=True
373
    ),
374
    "MambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "state-spaces/mamba-130m-hf")),
375
    "Mamba2ForCausalLM": _HfExamplesInfo(
376
        os.path.join(models_path_prefix, "mistralai/Mamba-Codestral-7B-v0.1"),
377
        extras={
378
            "random": os.path.join(models_path_prefix, "yujiepan/mamba2-codestral-v0.1-tiny-random"),
379
380
        },
    ),
381
    "FalconMambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-mamba-7b-instruct")),
382
    "MiniCPMForCausalLM": _HfExamplesInfo(
383
        os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"), trust_remote_code=True
384
385
    ),
    "MiniCPM3ForCausalLM": _HfExamplesInfo(
386
        os.path.join(models_path_prefix, "openbmb/MiniCPM3-4B"), trust_remote_code=True
387
    ),
388
    "MiniCPM4ForCausalLM": _HfExamplesInfo(
389
        os.path.join(models_path_prefix, "openbmb/MiniCPM4.1-8B"), trust_remote_code=True
390
    ),
391
    "MiniMaxForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01-hf")),
392
    "MiniMaxText01ForCausalLM": _HfExamplesInfo(
393
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01"),
394
395
396
397
        trust_remote_code=True,
        revision="a59aa9cbc53b9fb8742ca4e9e1531b9802b6fdc3",
    ),
    "MiniMaxM1ForCausalLM": _HfExamplesInfo(
398
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-M1-40k"), trust_remote_code=True
399
    ),
400
    "MiniMaxM2ForCausalLM": _HfExamplesInfo(
401
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-M2"),
youkaichao's avatar
youkaichao committed
402
        trust_remote_code=True,
403
    ),
zhuwenwen's avatar
zhuwenwen committed
404
    "MistralForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Mistral-7B-Instruct-v0.1")),
405
    "MistralLarge3ForCausalLM": _HfExamplesInfo(
406
        os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4")
407
    ),
408
    "MixtralForCausalLM": _HfExamplesInfo(
409
410
        os.path.join(models_path_prefix, "mistralai/Mixtral-8x7B-Instruct-v0.1"),
        {"tiny": os.path.join(models_path_prefix, "TitanML/tiny-mixtral")},
411
    ),
zhuwenwen's avatar
zhuwenwen committed
412
    "MptForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mpt"), is_available_online=False),
413
    # FIXME: mosaicml/mpt-7b has been deleted
414
    "MPTForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mosaicml/mpt-7b"), is_available_online=False),
zhuwenwen's avatar
zhuwenwen committed
415
    "NemotronForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/Minitron-8B-Base")),
416
    "NemotronHForCausalLM": _HfExamplesInfo(
417
        os.path.join(models_path_prefix, "nvidia/Nemotron-H-8B-Base-8K"), trust_remote_code=True
418
    ),
zhuwenwen's avatar
zhuwenwen committed
419
    "OlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-1B-hf")),
zhuwenwen's avatar
zhuwenwen committed
420
    "Olmo2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-2-0425-1B")),
421
    "Olmo3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Olmo-3-7B-Instruct")),
zhuwenwen's avatar
zhuwenwen committed
422
    "OlmoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924-Instruct")),
423
    "OpenPanguMTPModel": _HfExamplesInfo(
424
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1"),
425
426
427
        trust_remote_code=True,
        is_available_online=False,
    ),
428
    "OPTForCausalLM": _HfExamplesInfo(
429
        os.path.join(models_path_prefix, "facebook/opt-125m"), {"1b": os.path.join(models_path_prefix, "facebook/opt-iml-max-1.3b")}
430
431
    ),
    "OrionForCausalLM": _HfExamplesInfo(
432
        os.path.join(models_path_prefix, "OrionStarAI/Orion-14B-Chat"), trust_remote_code=True
433
    ),
434
    "OuroForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ByteDance/Ouro-1.4B"), trust_remote_code=True),
435
    "PanguEmbeddedForCausalLM": _HfExamplesInfo(
436
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Embedded-7B-V1.1"), trust_remote_code=True
437
    ),
438
439
440
441
442
    "PanguProMoEV2ForCausalLM": _HfExamplesInfo(
        "",
        trust_remote_code=True,
        is_available_online=False,
    ),
443
    "PanguUltraMoEForCausalLM": _HfExamplesInfo(
444
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1"),
445
446
447
        trust_remote_code=True,
        is_available_online=False,
    ),
448
449
450
    "PersimmonForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/persimmon-8b-chat")),
    "PhiForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/phi-2")),
    "Phi3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-3-mini-4k-instruct")),
451
    "PhiMoEForCausalLM": _HfExamplesInfo(
452
        os.path.join(models_path_prefix, "microsoft/Phi-3.5-MoE-instruct"), trust_remote_code=True
453
454
    ),
    "Plamo2ForCausalLM": _HfExamplesInfo(
455
        os.path.join(models_path_prefix, "pfnet/plamo-2-1b"),
456
457
        trust_remote_code=True,
    ),
458
    "Plamo3ForCausalLM": _HfExamplesInfo(
459
        os.path.join(models_path_prefix, "pfnet/plamo-3-nict-2b-base"),
460
461
        trust_remote_code=True,
    ),
462
    "QWenLMHeadModel": _HfExamplesInfo(
463
        os.path.join(models_path_prefix, "Qwen/Qwen-7B-Chat"),
464
        max_transformers_version="4.53",
465
466
467
        transformers_version_reason={
            "hf": "HF model uses remote code that is not compatible with latest Transformers"  # noqa: E501
        },
468
469
470
        trust_remote_code=True,
    ),
    "Qwen2ForCausalLM": _HfExamplesInfo(
471
        os.path.join(models_path_prefix, "Qwen/Qwen2-0.5B-Instruct"),
472
        extras={
473
474
            "2.5": os.path.join(models_path_prefix, "Qwen/Qwen2.5-0.5B-Instruct"),
            "2.5-1.5B": os.path.join(models_path_prefix, "Qwen/Qwen2.5-1.5B-Instruct"),
475
        },
476
    ),
477
478
479
    "Qwen2MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen1.5-MoE-A2.7B-Chat")),
    "Qwen3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-8B")),
    "Qwen3MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B")),
480
    "Qwen3NextForCausalLM": _HfExamplesInfo(
481
482
        os.path.join(models_path_prefix, "Qwen/Qwen3-Next-80B-A3B-Instruct"),
        extras={"tiny-random": os.path.join(models_path_prefix, "tiny-random/qwen3-next-moe")},
483
484
        min_transformers_version="4.56.3",
    ),
485
    "RWForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-40b")),
486
    "SeedOssForCausalLM": _HfExamplesInfo(
487
        os.path.join(models_path_prefix, "ByteDance-Seed/Seed-OSS-36B-Instruct"),
488
489
        trust_remote_code=True,
    ),
Li Xie's avatar
Li Xie committed
490
491
492
    "Step1ForCausalLM": _HfExamplesInfo(
        "stepfun-ai/Step-Audio-EditX", trust_remote_code=True
    ),
csy0225's avatar
csy0225 committed
493
    "Step3p5ForCausalLM": _HfExamplesInfo(
494
        os.path.join(models_path_prefix, "stepfun-ai/step-3.5-flash"), is_available_online=False
csy0225's avatar
csy0225 committed
495
    ),
496
497
498
499
500
    "SmolLM3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "HuggingFaceTB/SmolLM3-3B")),
    "StableLMEpochForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-zephyr-3b")),
    "StableLmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-3b-4e1t")),
    "Starcoder2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "bigcode/starcoder2-3b")),
    "Step3TextForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stepfun-ai/step3"), trust_remote_code=True),
501
    "SolarForCausalLM": _HfExamplesInfo(
502
        os.path.join(models_path_prefix, "upstage/solar-pro-preview-instruct"), trust_remote_code=True
503
    ),
504
    "TeleChatForCausalLM": _HfExamplesInfo(
505
        os.path.join(models_path_prefix, "chuhac/TeleChat2-35B"), trust_remote_code=True
506
    ),
507
    "TeleChat2ForCausalLM": _HfExamplesInfo(
508
        os.path.join(models_path_prefix, "Tele-AI/TeleChat2-3B"), trust_remote_code=True
509
510
    ),
    "TeleFLMForCausalLM": _HfExamplesInfo(
511
        os.path.join(models_path_prefix, "CofeAI/FLM-2-52B-Instruct-2407"), trust_remote_code=True
512
513
    ),
    "XverseForCausalLM": _HfExamplesInfo(
514
515
        os.path.join(models_path_prefix, "xverse/XVERSE-7B-Chat"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-2-7b"),
516
517
        trust_remote_code=True,
    ),
zhuwenwen's avatar
zhuwenwen committed
518
    "Zamba2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Zyphra/Zamba2-7B-instruct")),
519
    "MiMoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"), trust_remote_code=True),
520
    "MiMoV2FlashForCausalLM": _HfExamplesInfo(
521
        os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-V2-Flash"), trust_remote_code=True
522
    ),
523
    "Dots1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "rednote-hilab/dots.llm1.inst")),
524
525
526
527
}

_EMBEDDING_EXAMPLE_MODELS = {
    # [Text-only]
528
    "BertModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5")),
529
    "BgeM3EmbeddingModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-m3")),
530
    "Gemma2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-multilingual-gemma2")),
531
    "Gemma3TextModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/embeddinggemma-300m")),
532
    "GritLM": _HfExamplesInfo(os.path.join(models_path_prefix, "parasail-ai/GritLM-7B-vllm")),
533
    "GteModel": _HfExamplesInfo(
534
        os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-embed-m-v2.0"), trust_remote_code=True
535
536
    ),
    "GteNewModel": _HfExamplesInfo(
537
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-base-en-v1.5"),
538
539
540
541
        trust_remote_code=True,
        hf_overrides={"architectures": ["GteNewModel"]},
    ),
    "InternLM2ForRewardModel": _HfExamplesInfo(
542
        os.path.join(models_path_prefix, "internlm/internlm2-1_8b-reward"), trust_remote_code=True
543
    ),
544
    "JambaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-reward-dev")),
545
    "LlamaModel": _HfExamplesInfo("llama", is_available_online=False),
546
    "LlamaBidirectionalModel": _HfExamplesInfo(
547
        os.path.join(models_path_prefix, "nvidia/llama-nemotron-embed-1b-v2"), trust_remote_code=True
548
    ),
549
    "MistralModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")),
550
    "ModernBertModel": _HfExamplesInfo(
551
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-modernbert-base"), trust_remote_code=True
552
553
    ),
    "NomicBertModel": _HfExamplesInfo(
554
        os.path.join(models_path_prefix, "nomic-ai/nomic-embed-text-v2-moe"), trust_remote_code=True
555
    ),
556
    "Qwen2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "ssmits/Qwen2-7B-Instruct-embed-base")),
557
    "Qwen2ForRewardModel": _HfExamplesInfo(
558
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-RM-72B"),
559
        max_transformers_version="4.53",
560
561
562
        transformers_version_reason={
            "hf": "HF model uses remote code that is not compatible with latest Transformers"  # noqa: E501
        },
563
564
    ),
    "Qwen2ForProcessRewardModel": _HfExamplesInfo(
565
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-PRM-7B"),
566
        max_transformers_version="4.53",
567
568
569
        transformers_version_reason={
            "hf": "HF model uses remote code that is not compatible with latest Transformers"  # noqa: E501
        },
570
    ),
571
572
573
    "RobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/stsb-roberta-base-v2")),
    "RobertaForMaskedLM": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/all-roberta-large-v1")),
    "XLMRobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/multilingual-e5-small")),
574
    "BertSpladeSparseEmbeddingModel": _HfExamplesInfo(
575
        os.path.join(models_path_prefix, "naver/splade-v3"),
576
        hf_overrides={"architectures": ["BertSpladeSparseEmbeddingModel"]},
577
    ),
578
    # [Multimodal]
579
    "CLIPModel": _HfExamplesInfo(os.path.join(models_path_prefix, "openai/clip-vit-base-patch32")),
zhuwenwen's avatar
zhuwenwen committed
580
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "royokong/e5-v")),
581
    "Phi3VForCausalLM": _HfExamplesInfo(
582
        os.path.join(models_path_prefix, "TIGER-Lab/VLM2Vec-Full"), trust_remote_code=True
583
    ),
584
585
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "MrLight/dse-qwen2-2b-mrl-v1")),
    "SiglipModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/siglip-base-patch16-224")),
586
    "PrithviGeoSpatialMAE": _HfExamplesInfo(
587
        os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"),
588
        dtype="float16",
589
        enforce_eager=True,
590
591
        require_embed_inputs=True,
        # This is to avoid the model going OOM in CI
592
593
594
        max_num_seqs=32,
    ),
    "Terratorch": _HfExamplesInfo(
595
        os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"),
596
        dtype="float16",
597
        enforce_eager=True,
598
        require_embed_inputs=True,
599
600
601
        # This is to avoid the model going OOM in CI
        max_num_seqs=32,
    ),
602
603
}

604
605
_SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS = {
    # [Decoder-only]
606
    "GPT2ForSequenceClassification": _HfExamplesInfo(
607
        os.path.join(models_path_prefix, "nie3e/sentiment-polish-gpt2-small")
608
    ),
609
    # [Cross-encoder]
610
    "BertForSequenceClassification": _HfExamplesInfo(
611
        os.path.join(models_path_prefix, "cross-encoder/ms-marco-MiniLM-L-6-v2")
612
    ),
613
    "BertForTokenClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "boltuix/NeuroBERT-NER")),
614
    "GteNewForSequenceClassification": _HfExamplesInfo(
615
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-multilingual-reranker-base"),
616
617
618
        trust_remote_code=True,
        hf_overrides={"architectures": ["GteNewForSequenceClassification"]},
    ),
619
620
621
    "LlamaBidirectionalForSequenceClassification": _HfExamplesInfo(
        "nvidia/llama-nemotron-rerank-1b-v2", trust_remote_code=True
    ),
622
    "ModernBertForSequenceClassification": _HfExamplesInfo(
623
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-reranker-modernbert-base")
624
    ),
625
    "ModernBertForTokenClassification": _HfExamplesInfo(
626
        os.path.join(models_path_prefix, "disham993/electrical-ner-ModernBERT-base")
627
    ),
628
    "RobertaForSequenceClassification": _HfExamplesInfo(
629
        os.path.join(models_path_prefix, "cross-encoder/quora-roberta-base")
630
    ),
631
    "XLMRobertaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-m3")),
632
633
}

634
635
_AUTOMATIC_CONVERTED_MODELS = {
    # Use as_seq_cls_model for automatic conversion
636
    "GemmaForSequenceClassification": _HfExamplesInfo(
637
        os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-gemma"),
638
639
640
641
642
643
644
        hf_overrides={
            "architectures": ["GemmaForSequenceClassification"],
            "classifier_from_token": ["Yes"],
            "method": "no_post_processing",
        },
    ),
    "LlamaForSequenceClassification": _HfExamplesInfo(
645
        os.path.join(models_path_prefix, "Skywork/Skywork-Reward-V2-Llama-3.2-1B")
646
    ),
647
    "Qwen2ForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "jason9693/Qwen2.5-1.5B-apeach")),
648
    "Qwen3ForSequenceClassification": _HfExamplesInfo(
649
        os.path.join(models_path_prefix, "tomaarsen/Qwen3-Reranker-0.6B-seq-cls")
650
    ),
651
    "Qwen3ForTokenClassification": _HfExamplesInfo("bd2lcco/Qwen3-0.6B-finetuned"),
652
653
654
655
656
657
658
659
660
    "Qwen3VLForSequenceClassification": _HfExamplesInfo(
        "Qwen/Qwen3-VL-Reranker-2B",
        is_available_online=False,
        hf_overrides={
            "architectures": ["Qwen3VLForSequenceClassification"],
            "classifier_from_token": ["no", "yes"],
            "is_original_qwen3_reranker": True,
        },
    ),
661
662
}

663
664
_MULTIMODAL_EXAMPLE_MODELS = {
    # [Decoder-only]
665
    "AriaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "rhymes-ai/Aria")),
666
    "AudioFlamingo3ForConditionalGeneration": _HfExamplesInfo(
667
        os.path.join(models_path_prefix, "nvidia/audio-flamingo-3-hf"), min_transformers_version="5.0.0"
668
    ),
669
670
    "AyaVisionForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "CohereLabs/aya-vision-8b")),
    "BagelForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "ByteDance-Seed/BAGEL-7B-MoT")),
671
    "BeeForConditionalGeneration": _HfExamplesInfo(
672
        os.path.join(models_path_prefix, "Open-Bee/Bee-8B-RL"),
673
674
        trust_remote_code=True,
    ),
675
    "Blip2ForConditionalGeneration": _HfExamplesInfo(
676
677
        os.path.join(models_path_prefix, "Salesforce/blip2-opt-2.7b"),
        extras={"6b": os.path.join(models_path_prefix, "Salesforce/blip2-opt-6.7b")},
678
    ),
679
    "ChameleonForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/chameleon-7b")),
680
    "Cohere2VisionForConditionalGeneration": _HfExamplesInfo(
681
        os.path.join(models_path_prefix, "CohereLabs/command-a-vision-07-2025")
682
683
    ),
    "DeepseekVLV2ForCausalLM": _HfExamplesInfo(
684
685
        os.path.join(models_path_prefix, "deepseek-ai/deepseek-vl2-tiny"),
        extras={"fork": os.path.join(models_path_prefix, "Isotr0py/deepseek-vl2-tiny")},
686
        max_transformers_version="4.48",
687
        transformers_version_reason={"hf": "HF model is not compatible."},
688
689
        hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]},
    ),
690
    "DeepseekOCRForCausalLM": _HfExamplesInfo(
691
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-OCR"),
692
    ),
693
    "DotsOCRForCausalLM": _HfExamplesInfo(
694
       os.path.join(models_path_prefix,  "rednote-hilab/dots.ocr"), trust_remote_code=True
695
    ),
696
    "Eagle2_5_VLForConditionalGeneration": _HfExamplesInfo(
697
        os.path.join(models_path_prefix, "nvidia/Eagle2.5-8B"), trust_remote_code=True, is_available_online=False
698
    ),
699
    "Emu3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/Emu3-Chat-hf")),
700
    "Ernie4_5_VLMoeForConditionalGeneration": _HfExamplesInfo(
701
        os.path.join(models_path_prefix, "baidu/ERNIE-4.5-VL-28B-A3B-PT"),
702
703
        trust_remote_code=True,
    ),
704
705
    "FuyuForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/fuyu-8b")),
    "Gemma3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-4b-it")),
706
    "Gemma3nForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3n-E2B-it")),
707
    "GlmAsrForConditionalGeneration": _HfExamplesInfo(
708
        os.path.join(models_path_prefix, "zai-org/GLM-ASR-Nano-2512"),
709
        trust_remote_code=True,
710
        min_transformers_version="5.0.0",
711
    ),
712
    "GraniteVision": _HfExamplesInfo("ibm-granite/granite-vision-3.3-2b"),
713
    "GraniteSpeechForConditionalGeneration": _HfExamplesInfo(
714
        os.path.join(models_path_prefix, "ibm-granite/granite-speech-3.3-2b")
715
716
    ),
    "GLM4VForCausalLM": _HfExamplesInfo(
717
        os.path.join(models_path_prefix, "zai-org/glm-4v-9b"),
718
719
720
        trust_remote_code=True,
        hf_overrides={"architectures": ["GLM4VForCausalLM"]},
    ),
721
722
    "Glm4vForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.1V-9B-Thinking")),
    "Glm4vMoeForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.5V")),
723
    "GlmOcrForConditionalGeneration": _HfExamplesInfo(
724
        os.path.join(models_path_prefix, "zai-org/GLM-OCR"),
725
        is_available_online=False,
726
        min_transformers_version="5.1.0",
727
    ),
728
    "H2OVLChatModel": _HfExamplesInfo(
729
        os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-800m"),
730
        trust_remote_code=True,
731
        extras={"2b": os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-2b")},
732
        max_transformers_version="4.48",
733
        transformers_version_reason={"hf": "HF model is not compatible."},
734
735
    ),
    "HCXVisionForCausalLM": _HfExamplesInfo(
736
        os.path.join(models_path_prefix, "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"),
737
738
        trust_remote_code=True,
    ),
739
    "HunYuanVLForConditionalGeneration": _HfExamplesInfo(
740
        os.path.join(models_path_prefix, "tencent/HunyuanOCR"),
741
        hf_overrides={"num_experts": 0},
742
    ),
743
    "Idefics3ForConditionalGeneration": _HfExamplesInfo(
744
745
        os.path.join(models_path_prefix, "HuggingFaceM4/Idefics3-8B-Llama3"),
        extras={"tiny": os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM-256M-Instruct")},
746
    ),
oscardev256's avatar
oscardev256 committed
747
748
749
    "IsaacForConditionalGeneration": _HfExamplesInfo(
        "PerceptronAI/Isaac-0.1",
        trust_remote_code=True,
750
        extras={"0.2-2B-Preview": "PerceptronAI/Isaac-0.2-2B-Preview"},
oscardev256's avatar
oscardev256 committed
751
    ),
752
    "InternS1ForConditionalGeneration": _HfExamplesInfo(
753
        os.path.join(models_path_prefix, "internlm/Intern-S1"), trust_remote_code=True
754
755
    ),
    "InternVLChatModel": _HfExamplesInfo(
756
        os.path.join(models_path_prefix, "OpenGVLab/InternVL2-1B"),
757
        extras={
758
759
760
761
762
            "2B": os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B"),
            "3.0": os.path.join(models_path_prefix, "OpenGVLab/InternVL3-1B"),
            "3.5-qwen3": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-1B"),
            "3.5-qwen3moe": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-30B-A3B"),
            "3.5-gptoss": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview"),
763
764
765
        },
        trust_remote_code=True,
    ),
766
    "InternVLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "OpenGVLab/InternVL3-1B-hf")),
767
    "KananaVForConditionalGeneration": _HfExamplesInfo(
768
        os.path.join(models_path_prefix, "kakaocorp/kanana-1.5-v-3b-instruct"),
769
770
        trust_remote_code=True,
    ),
771
    "KeyeForConditionalGeneration": _HfExamplesInfo(
772
        os.path.join(models_path_prefix, "Kwai-Keye/Keye-VL-8B-Preview"),
773
774
775
        trust_remote_code=True,
    ),
    "KeyeVL1_5ForConditionalGeneration": _HfExamplesInfo(
776
        os.path.join(models_path_prefix, "Kwai-Keye/Keye-VL-1_5-8B"),
777
778
779
        trust_remote_code=True,
    ),
    "KimiVLForConditionalGeneration": _HfExamplesInfo(
780
781
        os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Instruct"),
        extras={"thinking": os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Thinking")},
782
        trust_remote_code=True,
783
        max_transformers_version="4.53.3",
784
785
786
787
788
789
790
        transformers_version_reason={
            "hf": (
                "HF model uses deprecated transformers API "
                "(PytorchGELUTanh, DynamicCache.seen_tokens, and more). See: "
                "https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/discussions/31"
            )
        },
791
    ),
Roger Wang's avatar
Roger Wang committed
792
793
794
795
796
    "KimiK25ForConditionalGeneration": _HfExamplesInfo(
        "moonshotai/Kimi-K2.5",
        trust_remote_code=True,
        is_available_online=False,
    ),
797
    "LightOnOCRForConditionalGeneration": _HfExamplesInfo(
798
        os.path.join(models_path_prefix, "lightonai/LightOnOCR-1B-1025")
799
    ),
800
801
802
803
    "Lfm2VlForConditionalGeneration": _HfExamplesInfo(
        "LiquidAI/LFM2-VL-450M",
        min_transformers_version="5.0.0",
    ),
804
    "Llama4ForConditionalGeneration": _HfExamplesInfo(
805
        os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
806
        max_model_len=10240,
807
        extras={"llama-guard-4": os.path.join(models_path_prefix, "meta-llama/Llama-Guard-4-12B")},
808
809
    ),
    "LlavaForConditionalGeneration": _HfExamplesInfo(
810
        os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
811
        extras={
812
813
            "mistral": os.path.join(models_path_prefix, "mistral-community/pixtral-12b"),
            "mistral-fp8": os.path.join(models_path_prefix, "nm-testing/pixtral-12b-FP8-dynamic"),
814
815
816
        },
    ),
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(
817
        os.path.join(models_path_prefix, "llava-hf/llava-v1.6-mistral-7b-hf")
818
819
    ),
    "LlavaNextVideoForConditionalGeneration": _HfExamplesInfo(
820
        os.path.join(models_path_prefix, "llava-hf/LLaVA-NeXT-Video-7B-hf")
821
822
    ),
    "LlavaOnevisionForConditionalGeneration": _HfExamplesInfo(
823
        os.path.join(models_path_prefix, "llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
824
825
    ),
    "MantisForConditionalGeneration": _HfExamplesInfo(
826
        os.path.join(models_path_prefix, "TIGER-Lab/Mantis-8B-siglip-llama3"),
827
        max_transformers_version="4.48",
828
        transformers_version_reason={"hf": "HF model is not compatible."},
829
830
831
        hf_overrides={"architectures": ["MantisForConditionalGeneration"]},
    ),
    "MiDashengLMModel": _HfExamplesInfo(
832
        os.path.join(models_path_prefix, "mispeech/midashenglm-7b"), trust_remote_code=True
833
    ),
834
    "MiniCPMO": _HfExamplesInfo(os.path.join(models_path_prefix, "openbmb/MiniCPM-o-2_6"), trust_remote_code=True),
835
    "MiniCPMV": _HfExamplesInfo(
836
        os.path.join(models_path_prefix, "openbmb/MiniCPM-Llama3-V-2_5"),
837
        extras={
838
839
840
            "2.6": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-2_6"),
            "4.0": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-4"),
            "4.5": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-4_5"),
841
842
843
844
        },
        trust_remote_code=True,
    ),
    "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo(
845
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-VL-01"),
846
847
848
        trust_remote_code=True,
    ),
    "Mistral3ForConditionalGeneration": _HfExamplesInfo(
849
850
        os.path.join(models_path_prefix, "mistralai/Mistral-Small-3.1-24B-Instruct-2503"),
        extras={"fp8": os.path.join(models_path_prefix, "nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic")},
851
852
    ),
    "MolmoForCausalLM": _HfExamplesInfo(
853
        os.path.join(models_path_prefix, "allenai/Molmo-7B-D-0924"),
854
        max_transformers_version="4.48",
855
856
857
        transformers_version_reason={
            "vllm": "Incorrectly-detected `tensorflow` import from processor."
        },
858
        extras={"olmo": os.path.join(models_path_prefix, "allenai/Molmo-7B-O-0924")},
859
860
        trust_remote_code=True,
    ),
861
    "Molmo2ForConditionalGeneration": _HfExamplesInfo(
862
        os.path.join(models_path_prefix, "allenai/Molmo2-8B"),
863
864
865
866
867
868
        extras={"olmo": "allenai/Molmo2-O-7B"},
        min_transformers_version="4.51",
        trust_remote_code=True,
        # required by current PrefixLM implementation
        max_num_batched_tokens=31872,
    ),
869
    "NVLM_D": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/NVLM-D-72B"), trust_remote_code=True),
870
    "Llama_Nemotron_Nano_VL": _HfExamplesInfo(
871
        os.path.join(models_path_prefix, "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1"),
872
873
874
        trust_remote_code=True,
    ),
    "NemotronH_Nano_VL_V2": _HfExamplesInfo(
875
        os.path.join(models_path_prefix, "nano_vl_dummy"), is_available_online=False, trust_remote_code=True
876
    ),
Zero's avatar
Zero committed
877
    "OpenCUAForConditionalGeneration": _HfExamplesInfo(
878
        os.path.join(models_path_prefix, "xlangai/OpenCUA-7B"), trust_remote_code=True
Zero's avatar
Zero committed
879
    ),
880
    "Ovis": _HfExamplesInfo(
881
        os.path.join(models_path_prefix, "AIDC-AI/Ovis2-1B"),
882
883
        trust_remote_code=True,
        max_transformers_version="4.53",
884
        transformers_version_reason={"hf": "HF model is not compatible"},
885
        extras={
886
887
            "1.6-llama": os.path.join(models_path_prefix, "AIDC-AI/Ovis1.6-Llama3.2-3B"),
            "1.6-gemma": os.path.join(models_path_prefix, "AIDC-AI/Ovis1.6-Gemma2-9B"),
888
889
        },
    ),
890
    "Ovis2_5": _HfExamplesInfo(os.path.join(models_path_prefix, "AIDC-AI/Ovis2.5-2B"), trust_remote_code=True),
891
    "PaddleOCRVLForConditionalGeneration": _HfExamplesInfo(
892
       os.path.join(models_path_prefix,  "PaddlePaddle/PaddleOCR-VL"),
893
894
        trust_remote_code=True,
    ),
895
    "PaliGemmaForConditionalGeneration": _HfExamplesInfo(
896
897
        os.path.join(models_path_prefix, "google/paligemma-3b-mix-224"),
        extras={"v2": os.path.join(models_path_prefix, "google/paligemma2-3b-ft-docci-448")},
898
899
    ),
    "Phi3VForCausalLM": _HfExamplesInfo(
900
        os.path.join(models_path_prefix, "microsoft/Phi-3-vision-128k-instruct"),
901
902
        trust_remote_code=True,
        max_transformers_version="4.48",
903
904
905
        transformers_version_reason={
            "hf": "HF model use deprecated imports which have been removed."
        },  # noqa: E501
906
        extras={"phi3.5": os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")},
907
908
    ),
    "Phi4MMForCausalLM": _HfExamplesInfo(
909
        os.path.join(models_path_prefix, "microsoft/Phi-4-multimodal-instruct"), trust_remote_code=True
910
911
    ),
    "PixtralForConditionalGeneration": _HfExamplesInfo(
912
        os.path.join(models_path_prefix, "mistralai/Pixtral-12B-2409"),
913
        extras={
914
915
            "mistral-large-3": os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4"),
            "ministral-3": os.path.join(models_path_prefix, "mistralai/Ministral-3-3B-Instruct-2512"),
916
        },
917
918
919
        tokenizer_mode="mistral",
    ),
    "QwenVLForConditionalGeneration": _HfExamplesInfo(
920
921
        os.path.join(models_path_prefix, "Qwen/Qwen-VL"),
        extras={"chat": os.path.join(models_path_prefix, "Qwen/Qwen-VL-Chat")},
922
        trust_remote_code=True,
923
        max_transformers_version="4.53.3",
924
925
926
        transformers_version_reason={
            "hf": "HF model uses deprecated imports which have been removed."
        },  # noqa: E501
927
928
929
        hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]},
    ),
    "Qwen2AudioForConditionalGeneration": _HfExamplesInfo(
930
        os.path.join(models_path_prefix, "Qwen/Qwen2-Audio-7B-Instruct")
931
    ),
932
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2-VL-2B-Instruct")),
933
    "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo(
934
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-3B-Instruct"),
935
936
        max_model_len=4096,
    ),
zhuwenwen's avatar
zhuwenwen committed
937
    "Qwen2_5OmniModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-3B")),
938
    "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-7B-AWQ")),
939
    "Qwen3VLForConditionalGeneration": _HfExamplesInfo(
940
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-4B-Instruct"),
941
942
943
944
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
    "Qwen3VLMoeForConditionalGeneration": _HfExamplesInfo(
945
       os.path.join(models_path_prefix,  "Qwen/Qwen3-VL-30B-A3B-Instruct"),
946
947
948
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
Rayyyyy's avatar
Rayyyyy committed
949
    "Qwen3_5ForConditionalGeneration": _HfExamplesInfo(
Rayyyyy's avatar
Rayyyyy committed
950
        os.path.join(models_path_prefix, "Qwen/Qwen3.5-9B-Instruct"),
Rayyyyy's avatar
Rayyyyy committed
951
952
953
954
        max_model_len=4096,
        min_transformers_version="5.1.0",
    ),
    "Qwen3_5MoeForConditionalGeneration": _HfExamplesInfo(
Rayyyyy's avatar
Rayyyyy committed
955
        os.path.join(models_path_prefix, "Qwen/Qwen3.5-35B-A3B-Instruct"),
Rayyyyy's avatar
Rayyyyy committed
956
957
958
959
        max_model_len=4096,
        min_transformers_version="5.1.0",
    ),
    "Qwen3_5MTP": _HfExamplesInfo(
Rayyyyy's avatar
Rayyyyy committed
960
        os.path.join(models_path_prefix, "Qwen/Qwen3.5-9B-Instruct"),
Rayyyyy's avatar
Rayyyyy committed
961
962
963
964
        speculative_model="Qwen/Qwen3.5-9B-Instruct",
        min_transformers_version="5.1.0",
    ),
    "Qwen3_5MoeMTP": _HfExamplesInfo(
Rayyyyy's avatar
Rayyyyy committed
965
        os.path.join(models_path_prefix, "Qwen/Qwen3.5-35B-A3B-Instruct"),
Rayyyyy's avatar
Rayyyyy committed
966
967
968
        speculative_model="Qwen/Qwen3.5-35B-A3B-Instruct",
        min_transformers_version="5.1.0",
    ),
969
    "Qwen3OmniMoeForConditionalGeneration": _HfExamplesInfo(
970
        os.path.join(models_path_prefix, "Qwen/Qwen3-Omni-30B-A3B-Instruct"),
971
972
973
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
974
    "RForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "YannQi/R-4B"), trust_remote_code=True),
975
    "SkyworkR1VChatModel": _HfExamplesInfo(
976
        os.path.join(models_path_prefix, "Skywork/Skywork-R1V-38B"), trust_remote_code=True
977
978
    ),
    "SmolVLMForConditionalGeneration": _HfExamplesInfo(
979
        os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM2-2.2B-Instruct")
980
981
    ),
    "Step3VLForConditionalGeneration": _HfExamplesInfo(
982
        os.path.join(models_path_prefix, "stepfun-ai/step3"), trust_remote_code=True
983
    ),
ltd0924's avatar
ltd0924 committed
984
985
986
    "StepVLForConditionalGeneration": _HfExamplesInfo(
        "stepfun-ai/Step3-VL-10B", trust_remote_code=True
    ),
987
    "UltravoxModel": _HfExamplesInfo(
988
        os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_5-llama-3_2-1b"),
989
990
        trust_remote_code=True,
    ),
991
    "TarsierForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "omni-research/Tarsier-7b")),
992
    "Tarsier2ForConditionalGeneration": _HfExamplesInfo(
993
        os.path.join(models_path_prefix, "omni-research/Tarsier2-Recap-7b"),
994
        hf_overrides={
995
            "architectures": [os.path.join(models_path_prefix, "Tarsier2ForConditionalGeneration")],
996
997
            "model_type": "tarsier2",
        },
998
    ),
999
1000
1001
1002
1003
    "VoxtralForConditionalGeneration": _HfExamplesInfo(
        "mistralai/Voxtral-Mini-3B-2507",
        # disable this temporarily until we support HF format
        is_available_online=False,
    ),
Patrick von Platen's avatar
Patrick von Platen committed
1004
1005
1006
1007
1008
    "VoxtralStreamingGeneration": _HfExamplesInfo(
        "<place-holder>",
        # disable this temporarily until we support HF format
        is_available_online=False,
    ),
1009
    # [Encoder-decoder]
1010
1011
1012
    "NemotronParseForConditionalGeneration": _HfExamplesInfo(
        "nvidia/NVIDIA-Nemotron-Parse-v1.1", trust_remote_code=True
    ),
1013
    "WhisperForConditionalGeneration": _HfExamplesInfo(
1014
1015
        os.path.join(models_path_prefix, "openai/whisper-large-v3-turbo"),
        extras={"v3": os.path.join(models_path_prefix, "openai/whisper-large-v3")},
1016
    ),
1017
    # [Cross-encoder]
1018
    "JinaVLForRanking": _HfExamplesInfo(os.path.join(models_path_prefix, "jinaai/jina-reranker-m0")),
1019
1020
}

1021

1022
_SPECULATIVE_DECODING_EXAMPLE_MODELS = {
1023
    "MedusaModel": _HfExamplesInfo(
1024
        os.path.join(models_path_prefix, "JackFram/llama-68m"), speculative_model=os.path.join(models_path_prefix, "abhigoyal/vllm-medusa-llama-68m-random")
1025
    ),
1026
1027
    # Temporarily disabled.
    # TODO(woosuk): Re-enable this once the MLP Speculator is supported in V1.
1028
1029
1030
1031
1032
    # "MLPSpeculatorPreTrainedModel": _HfExamplesInfo(
    #     "JackFram/llama-160m",
    #     speculative_model="ibm-ai-platform/llama-160m-accelerator"
    # ),
    "DeepSeekMTPModel": _HfExamplesInfo(
1033
1034
        os.path.join(models_path_prefix, "luccafong/deepseek_mtp_main_random"),
        speculative_model=os.path.join(models_path_prefix, "luccafong/deepseek_mtp_draft_random"),
1035
1036
1037
        trust_remote_code=True,
    ),
    "EagleDeepSeekMTPModel": _HfExamplesInfo(
1038
1039
        os.path.join(models_path_prefix, "eagle618/deepseek-v3-random"),
        speculative_model=os.path.join(models_path_prefix, "eagle618/eagle-deepseek-v3-random"),
1040
1041
1042
        trust_remote_code=True,
    ),
    "EagleLlamaForCausalLM": _HfExamplesInfo(
1043
       os.path.join(models_path_prefix,  "meta-llama/Meta-Llama-3-8B-Instruct"),
1044
        trust_remote_code=True,
1045
1046
        speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE-LLaMA3-Instruct-8B"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct"),
1047
1048
    ),
    "Eagle3LlamaForCausalLM": _HfExamplesInfo(
1049
        os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct"),
1050
        trust_remote_code=True,
1051
1052
        speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct"),
1053
1054
1055
        use_original_num_layers=True,
        max_model_len=10240,
    ),
1056
    "EagleMistralLarge3ForCausalLM": _HfExamplesInfo(
1057
        os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512"),
1058
        speculative_model=os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-Eagle"),
1059
        # TODO: revert once figuring out OOM in CI
1060
1061
        is_available_online=False,
    ),
1062
    "LlamaForCausalLMEagle3": _HfExamplesInfo(
1063
        os.path.join(models_path_prefix, "Qwen/Qwen3-8B"),
1064
        trust_remote_code=True,
1065
1066
        speculative_model=os.path.join(models_path_prefix, "AngelSlim/Qwen3-8B_eagle3"),
        tokenizer=os.path.join(models_path_prefix, "Qwen/Qwen3-8B"),
1067
1068
        use_original_num_layers=True,
    ),
zhiweiz's avatar
zhiweiz committed
1069
    "EagleLlama4ForCausalLM": _HfExamplesInfo(
1070
        os.path.join(models_path_prefix, "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct"),
zhiweiz's avatar
zhiweiz committed
1071
        trust_remote_code=True,
1072
        speculative_model=os.path.join(models_path_prefix, "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct"),
1073
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
1074
1075
    ),
    "EagleMiniCPMForCausalLM": _HfExamplesInfo(
1076
        os.path.join(models_path_prefix, "openbmb/MiniCPM-1B-sft-bf16"),
1077
        trust_remote_code=True,
1078
1079
1080
        speculative_model=os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"),
        speculative_method=os.path.join(models_path_prefix, "eagle"),
        tokenizer=os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"),
1081
1082
    ),
    "ErnieMTPModel": _HfExamplesInfo(
1083
        os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT"),
1084
        trust_remote_code=True,
1085
        speculative_model=os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT"),
1086
    ),
Kyungmin Lee's avatar
Kyungmin Lee committed
1087
1088
1089
    "ExaoneMoeMTP": _HfExamplesInfo(
        "LGAI-EXAONE/K-EXAONE-236B-A23B",
        speculative_model="LGAI-EXAONE/K-EXAONE-236B-A23B",
1090
        min_transformers_version="5.1.0",
Kyungmin Lee's avatar
Kyungmin Lee committed
1091
    ),
1092
    "Glm4MoeMTPModel": _HfExamplesInfo(
1093
        os.path.join(models_path_prefix, "zai-org/GLM-4.5"),
1094
1095
        speculative_model="zai-org/GLM-4.5",
    ),
1096
1097
1098
    "Glm4MoeLiteMTPModel": _HfExamplesInfo(
        "zai-org/GLM-4.7-Flash",
        speculative_model="zai-org/GLM-4.7-Flash",
1099
        min_transformers_version="5.0.0",
1100
1101
1102
1103
    ),
    "GlmOcrMTPModel": _HfExamplesInfo(
        "zai-org/GLM-OCR",
        speculative_model="zai-org/GLM-OCR",
1104
        is_available_online=False,
1105
        min_transformers_version="5.1.0",
1106
    ),
XuruiYang's avatar
XuruiYang committed
1107
    "LongCatFlashMTPModel": _HfExamplesInfo(
1108
        os.path.join(models_path_prefix, os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat")),
XuruiYang's avatar
XuruiYang committed
1109
        trust_remote_code=True,
1110
        speculative_model=os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat"),
1111
1112
    ),
    "MiMoMTPModel": _HfExamplesInfo(
1113
        os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"),
1114
        trust_remote_code=True,
1115
        speculative_model=os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"),
1116
    ),
1117
    "Eagle3Qwen2_5vlForCausalLM": _HfExamplesInfo(
1118
1119
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-7B-Instruct"),
        speculative_model=os.path.join(models_path_prefix, "Rayzl/qwen2.5-vl-7b-eagle3-sgl"),
1120
    ),
1121
    "Eagle3Qwen3vlForCausalLM": _HfExamplesInfo(
1122
1123
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-8B-Instruct"),
        speculative_model=os.path.join(models_path_prefix, "taobao-mnn/Qwen3-VL-8B-Instruct-Eagle3"),
1124
    ),
1125
    "Qwen3NextMTP": _HfExamplesInfo(
1126
        os.path.join(models_path_prefix, "Qwen/Qwen3-Next-80B-A3B-Instruct"), min_transformers_version="4.56.3"
1127
    ),
csy0225's avatar
csy0225 committed
1128
1129
1130
1131
1132
1133
    "Step3p5MTP": _HfExamplesInfo(
        "stepfun-ai/Step-3.5-Flash",
        trust_remote_code=True,
        speculative_model="stepfun-ai/Step-3.5-Flash",
        is_available_online=False,
    ),
1134
1135
}

1136
_TRANSFORMERS_BACKEND_MODELS = {
1137
    "TransformersEmbeddingModel": _HfExamplesInfo(
1138
        os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5"), min_transformers_version="5.0.0"
1139
1140
    ),
    "TransformersForSequenceClassification": _HfExamplesInfo(
1141
        os.path.join(models_path_prefix, "papluca/xlm-roberta-base-language-detection"),
1142
        min_transformers_version="5.0.0",
1143
1144
    ),
    "TransformersForCausalLM": _HfExamplesInfo(
1145
        os.path.join(models_path_prefix, "hmellor/Ilama-3.2-1B"), trust_remote_code=True
1146
    ),
1147
    "TransformersMultiModalForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/Emu3-Chat-hf")),
1148
    "TransformersMoEForCausalLM": _HfExamplesInfo(
1149
        os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924"), min_transformers_version="5.0.0"
1150
    ),
1151
    "TransformersMultiModalMoEForCausalLM": _HfExamplesInfo(
1152
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-30B-A3B-Instruct"), min_transformers_version="5.0.0"
1153
1154
    ),
    "TransformersMoEEmbeddingModel": _HfExamplesInfo(
1155
        os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B"), min_transformers_version="5.0.0"
1156
1157
    ),
    "TransformersMoEForSequenceClassification": _HfExamplesInfo(
1158
        os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B"), min_transformers_version="5.0.0"
1159
    ),
1160
    "TransformersMultiModalEmbeddingModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-4b-it")),
1161
    "TransformersMultiModalForSequenceClassification": _HfExamplesInfo(
1162
        os.path.join(models_path_prefix, "google/gemma-3-4b-it")
1163
    ),
1164
1165
}

1166
1167
1168
_EXAMPLE_MODELS = {
    **_TEXT_GENERATION_EXAMPLE_MODELS,
    **_EMBEDDING_EXAMPLE_MODELS,
1169
    **_SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS,
1170
1171
    **_MULTIMODAL_EXAMPLE_MODELS,
    **_SPECULATIVE_DECODING_EXAMPLE_MODELS,
1172
    **_TRANSFORMERS_BACKEND_MODELS,
1173
1174
1175
1176
1177
1178
1179
1180
1181
}


class HfExampleModels:
    def __init__(self, hf_models: Mapping[str, _HfExamplesInfo]) -> None:
        super().__init__()

        self.hf_models = hf_models

1182
    def get_supported_archs(self) -> Set[str]:
1183
1184
1185
        return self.hf_models.keys()

    def get_hf_info(self, model_arch: str) -> _HfExamplesInfo:
1186
1187
1188
        try:
            return self.hf_models[model_arch]
        except KeyError:
1189
1190
1191
            raise ValueError(
                f"No example model defined for {model_arch}; please update this file."
            ) from None
1192

1193
1194
1195
1196
1197
    def find_hf_info(self, model_id: str) -> _HfExamplesInfo:
        for info in self.hf_models.values():
            if info.default == model_id:
                return info

1198
1199
1200
1201
1202
        # Fallback to extras
        for info in self.hf_models.values():
            if any(extra == model_id for extra in info.extras.values()):
                return info

1203
1204
1205
        raise ValueError(
            f"No example model defined for {model_id}; please update this file."
        )
1206

1207

Patrick von Platen's avatar
Patrick von Platen committed
1208
HF_EXAMPLE_MODELS = HfExampleModels(_EXAMPLE_MODELS)
1209
AUTO_EXAMPLE_MODELS = HfExampleModels(_AUTOMATIC_CONVERTED_MODELS)