registry.py 54.4 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
from collections.abc import Mapping, Set
5
from dataclasses import dataclass, field
6
from typing import Any, Literal
7

zhuwenwen's avatar
zhuwenwen committed
8
import os
9
10
11
import pytest
from packaging.version import Version
from transformers import __version__ as TRANSFORMERS_VERSION
zhuwenwen's avatar
zhuwenwen committed
12
# from ..utils import models_path_prefix
13

zhuwenwen's avatar
zhuwenwen committed
14
models_path_prefix = os.getenv('VLLM_OPTEST_MODELS_PATH') or os.getenv("OPTEST_MODELS_PATH")
15

16
from vllm.config.model import ModelDType, TokenizerMode
17

zhuwenwen's avatar
zhuwenwen committed
18

19
20
21
22
23
24
25
26
@dataclass(frozen=True)
class _HfExamplesInfo:
    default: str
    """The default model to use for testing this architecture."""

    extras: Mapping[str, str] = field(default_factory=dict)
    """Extra models to use for testing this architecture."""

27
    tokenizer: str | None = None
28
29
    """Set the tokenizer to load for this architecture."""

30
    tokenizer_mode: TokenizerMode | str = "auto"
31
32
    """Set the tokenizer type for this architecture."""

33
    speculative_model: str | None = None
34
35
36
37
38
    """
    The default model to use for testing this architecture, which is only used
    for speculative decoding.
    """

39
40
41
42
43
    speculative_method: str | None = None
    """
    The method to use for speculative decoding.
    """

44
    min_transformers_version: str | None = None
45
46
47
48
    """
    The minimum version of HF Transformers that is required to run this model.
    """

49
    max_transformers_version: str | None = None
50
51
52
53
    """
    The maximum version of HF Transformers that this model runs on.
    """

54
    transformers_version_reason: dict[Literal["vllm", "hf"], str] | None = None
55
    """
56
57
58
    The type and reason to skip test for the minimum/maximum version requirement.
    vllm: skip all vLLM tests if the version requirement is not met.
    hf: only skip tests that uses HF runner if the version requirement is not met.
59
60
    """

61
    require_embed_inputs: bool = False
62
    """
63
64
    If `True`, enables prompt and multi-modal embedding inputs while
    disabling tokenization.
65
66
67
68
69
70
71
72
73
74
75
76
77
78
    """

    dtype: ModelDType = "auto"
    """
    The data type for the model weights and activations.
    """

    enforce_eager: bool = False
    """
    Whether to enforce eager execution. If True, we will
    disable CUDA graph and always execute the model in eager mode.
    If False, we will use CUDA graph and eager execution in hybrid.
    """

79
80
    is_available_online: bool = True
    """
81
    Set this to `False` if the name of this architecture no longer exists on
82
83
84
85
86
87
    the HF repo. To maintain backwards compatibility, we have not removed them
    from the main model registry, so without this flag the registry tests will
    fail.
    """

    trust_remote_code: bool = False
88
    """The `trust_remote_code` level required to load the model."""
89

90
    hf_overrides: dict[str, Any] = field(default_factory=dict)
91
    """The `hf_overrides` required to load the model."""
92

93
    max_model_len: int | None = None
94
95
96
97
98
    """
    The maximum model length to use for this model. Some models default to a
    length that is too large to fit into memory in CI.
    """

99
    revision: str | None = None
100
101
102
103
104
    """
    The specific revision (commit hash, tag, or branch) to use for the model.
    If not specified, the default revision will be used.
    """

105
    max_num_seqs: int | None = None
106
107
    """Maximum number of sequences to be processed in a single iteration."""

108
109
110
111
112
113
    use_original_num_layers: bool = False
    """
    If True, use the original number of layers from the model config 
    instead of minimal layers for testing.
    """

114
115
116
    def check_transformers_version(
        self,
        *,
117
        on_fail: Literal["error", "skip", "return"],
118
        check_version_reason: Literal["vllm", "hf"] = "hf",
119
120
        check_min_version: bool = True,
        check_max_version: bool = True,
121
    ) -> str | None:
122
123
124
125
        """
        If the installed transformers version does not meet the requirements,
        perform the given action.
        """
126
127
128
129
        if (
            self.min_transformers_version is None
            and self.max_transformers_version is None
        ):
130
            return None
131
132

        current_version = TRANSFORMERS_VERSION
133
        cur_base_version = Version(current_version).base_version
134
135
136
        min_version = self.min_transformers_version
        max_version = self.max_transformers_version
        msg = f"`transformers=={current_version}` installed, but `transformers"
137
138
        # Only check the base version for the min/max version, otherwise preview
        # models cannot be run because `x.yy.0.dev0`<`x.yy.0`
139
140
        if min_version and Version(cur_base_version) < Version(min_version):
            is_version_valid = not check_min_version
141
            msg += f">={min_version}` is required to run this model."
142
143
        elif max_version and Version(cur_base_version) > Version(max_version):
            is_version_valid = not check_max_version
144
145
            msg += f"<={max_version}` is required to run this model."
        else:
146
            is_version_valid = True
147

148
149
150
151
152
153
154
155
156
        # check if Transformers version breaks the corresponding model runner,
        # skip test when model runner not compatible
        is_reason_valid = not (
            check_version_reason
            and self.transformers_version_reason
            and check_version_reason in self.transformers_version_reason
        )
        is_transformers_valid = is_version_valid and is_reason_valid
        if is_transformers_valid:
157
            return None
158
159
160
        elif self.transformers_version_reason:
            for reason_type, reason in self.transformers_version_reason.items():
                msg += f" Reason({reason_type}): {reason}"
161
162
163

        if on_fail == "error":
            raise RuntimeError(msg)
164
        elif on_fail == "skip":
165
            pytest.skip(msg)
166

167
168
        return msg

169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
    def check_available_online(
        self,
        *,
        on_fail: Literal["error", "skip"],
    ) -> None:
        """
        If the model is not available online, perform the given action.
        """
        if not self.is_available_online:
            msg = "Model is not available online"

            if on_fail == "error":
                raise RuntimeError(msg)
            else:
                pytest.skip(msg)

185
186
187

_TEXT_GENERATION_EXAMPLE_MODELS = {
    # [Decoder-only]
188
    "AfmoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "arcee-ai/Trinity-Nano-Preview")),
189
190
191
    "ApertusForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "swiss-ai/Apertus-8B-Instruct-2509")),
    "AquilaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat-7B"), trust_remote_code=True),
    "AquilaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat2-7B"), trust_remote_code=True),
192
    "ArceeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "arcee-ai/AFM-4.5B-Base")),
193
    "ArcticForCausalLM": _HfExamplesInfo(
194
        os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-instruct"), trust_remote_code=True
195
196
    ),
    "BaiChuanForCausalLM": _HfExamplesInfo(
197
        os.path.join(models_path_prefix, "baichuan-inc/Baichuan-7B"), trust_remote_code=True
198
199
    ),
    "BaichuanForCausalLM": _HfExamplesInfo(
200
        os.path.join(models_path_prefix, "baichuan-inc/Baichuan2-7B-chat"), trust_remote_code=True
201
202
    ),
    "BailingMoeForCausalLM": _HfExamplesInfo(
203
        os.path.join(models_path_prefix, "inclusionAI/Ling-lite-1.5"), trust_remote_code=True
204
205
    ),
    "BailingMoeV2ForCausalLM": _HfExamplesInfo(
206
        os.path.join(models_path_prefix, "inclusionAI/Ling-mini-2.0"), trust_remote_code=True
207
208
    ),
    "BambaForCausalLM": _HfExamplesInfo(
209
210
        os.path.join(models_path_prefix, "ibm-ai-platform/Bamba-9B-v1"),
        extras={"tiny": os.path.join(models_path_prefix, "hmellor/tiny-random-BambaForCausalLM")},
211
212
    ),
    "BloomForCausalLM": _HfExamplesInfo(
213
        "bigscience/bloom-560m", {"1b": os.path.join(models_path_prefix, "bigscience/bloomz-1b1")}
214
215
    ),
    "ChatGLMModel": _HfExamplesInfo(
216
        os.path.join(models_path_prefix, "zai-org/chatglm3-6b"), trust_remote_code=True, max_transformers_version="4.48"
217
218
    ),
    "ChatGLMForConditionalGeneration": _HfExamplesInfo(
219
        os.path.join(models_path_prefix, "thu-coai/ShieldLM-6B-chatglm3"),
220
221
222
        trust_remote_code=True,
    ),
    "CohereForCausalLM": _HfExamplesInfo(
223
        os.path.join(models_path_prefix, "CohereLabs/c4ai-command-r-v01"), trust_remote_code=True
224
225
    ),
    "Cohere2ForCausalLM": _HfExamplesInfo(
226
        os.path.join(models_path_prefix, "CohereLabs/c4ai-command-r7b-12-2024"),
227
228
        trust_remote_code=True,
    ),
229
    "CwmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/cwm"), min_transformers_version="4.58"),
230
231
    # FIXME: databricks/dbrx-instruct has been deleted
    "DbrxForCausalLM": _HfExamplesInfo(
232
        os.path.join(models_path_prefix, "databricks/dbrx-instruct"), is_available_online=False
233
    ),
234
    "DeciLMForCausalLM": _HfExamplesInfo(
235
        os.path.join(models_path_prefix, "nvidia/Llama-3_3-Nemotron-Super-49B-v1"),
236
237
        trust_remote_code=True,
    ),
238
    "DeepseekForCausalLM": _HfExamplesInfo(
239
        os.path.join(models_path_prefix, "deepseek-ai/deepseek-moe-16b-base"),
240
241
        trust_remote_code=True,
    ),
242
    "DeepseekV2ForCausalLM": _HfExamplesInfo(
243
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V2-Lite-Chat"),
244
245
246
        trust_remote_code=True,
    ),
    "DeepseekV3ForCausalLM": _HfExamplesInfo(
247
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3"),
248
249
        trust_remote_code=True,
    ),
250
    "DeepseekV32ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3.2-Exp")),
251
252
    "Ernie4_5ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baidu/ERNIE-4.5-0.3B-PT")),
    "Ernie4_5_MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT")),
253
    "ExaoneForCausalLM": _HfExamplesInfo(
254
255
256
        os.path.join(models_path_prefix, "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"), trust_remote_code=True
    ),
    "Exaone4ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "LGAI-EXAONE/EXAONE-4.0-32B")),
Kyungmin Lee's avatar
Kyungmin Lee committed
257
258
259
    "ExaoneMoEForCausalLM": _HfExamplesInfo(
        "LGAI-EXAONE/K-EXAONE-236B-A23B", min_transformers_version="5.0.0"
    ),
260
    "Fairseq2LlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mgleize/fairseq2-dummy-Llama-3.2-1B")),
261
    "FalconForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-7b")),
262
263
    "FalconH1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/Falcon-H1-0.5B-Base")),
    "FlexOlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Flex-reddit-2x7B-1T")),
264
    "GemmaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")),
265
    "Gemma2ForCausalLM": _HfExamplesInfo(
266
        os.path.join(models_path_prefix, "google/gemma-2-9b"), extras={"tiny": os.path.join(models_path_prefix, "google/gemma-2-2b-it")}
267
    ),
268
    "Gemma3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-1b-it")),
269
    "Gemma3nForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3n-E2B-it")),
270
271
    "GlmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/glm-4-9b-chat-hf")),
    "Glm4ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4-9B-0414")),
272
    "Glm4MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.5")),
273
    "GPT2LMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "openai-community/gpt2"), {"alias": os.path.join(models_path_prefix, "gpt2")}),
274
    "GPTBigCodeForCausalLM": _HfExamplesInfo(
275
        os.path.join(models_path_prefix, "bigcode/starcoder"),
276
        extras={
277
278
            "tiny": os.path.join(models_path_prefix, "bigcode/tiny_starcoder_py"),
            "santacoder": os.path.join(models_path_prefix, "bigcode/gpt_bigcode-santacoder"),
279
        },
280
281
    ),
    "GPTJForCausalLM": _HfExamplesInfo(
282
        os.path.join(models_path_prefix, "Milos/slovak-gpt-j-405M"), {"6b": os.path.join(models_path_prefix, "EleutherAI/gpt-j-6b")}
283
284
    ),
    "GPTNeoXForCausalLM": _HfExamplesInfo(
285
        os.path.join(models_path_prefix, "EleutherAI/pythia-70m"), {"1b": os.path.join(models_path_prefix, "EleutherAI/pythia-1.4b")}
286
    ),
287
    "GptOssForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "lmsys/gpt-oss-20b-bf16")),
288
    "GraniteForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerLM-3b")),
289
    "GraniteMoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerMoE-3b")),
290
    "GraniteMoeHybridForCausalLM": _HfExamplesInfo(
291
        os.path.join(models_path_prefix, "ibm-granite/granite-4.0-tiny-preview")
292
293
    ),
    "GraniteMoeSharedForCausalLM": _HfExamplesInfo(
294
        os.path.join(models_path_prefix, "ibm-research/moe-7b-1b-active-shared-experts")
295
296
    ),
    "Grok1ModelForCausalLM": _HfExamplesInfo(
297
        os.path.join(models_path_prefix, "hpcai-tech/grok-1"), trust_remote_code=True
298
    ),
299
    "Grok1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "xai-org/grok-2"), trust_remote_code=True),
300
    "HunYuanDenseV1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tencent/Hunyuan-7B-Instruct")),
301
    "HunYuanMoEV1ForCausalLM": _HfExamplesInfo(
302
        os.path.join(models_path_prefix, "tencent/Hunyuan-A13B-Instruct"), trust_remote_code=True
303
304
    ),
    "InternLMForCausalLM": _HfExamplesInfo(
305
        os.path.join(models_path_prefix, "internlm/internlm-chat-7b"), trust_remote_code=True
306
307
    ),
    "InternLM2ForCausalLM": _HfExamplesInfo(
308
        os.path.join(models_path_prefix, "internlm/internlm2-chat-7b"), trust_remote_code=True
309
310
    ),
    "InternLM2VEForCausalLM": _HfExamplesInfo(
311
        os.path.join(models_path_prefix, "OpenGVLab/Mono-InternVL-2B"), trust_remote_code=True
312
313
    ),
    "InternLM3ForCausalLM": _HfExamplesInfo(
314
        os.path.join(models_path_prefix, "internlm/internlm3-8b-instruct"), trust_remote_code=True
315
    ),
316
    "JAISLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "inceptionai/jais-13b-chat")),
317
    "Jais2ForCausalLM": _HfExamplesInfo(
318
        os.path.join(models_path_prefix, "inceptionai/Jais-2-8B-Chat"), min_transformers_version="4.58"
319
    ),
320
    "IQuestCoderForCausalLM": _HfExamplesInfo(
321
        os.path.join(models_path_prefix, "IQuestLab/IQuest-Coder-V1-40B-Instruct"), trust_remote_code=True
322
323
    ),
    "IQuestLoopCoderForCausalLM": _HfExamplesInfo(
324
        os.path.join(models_path_prefix, "IQuestLab/IQuest-Coder-V1-40B-Loop-Instruct"), trust_remote_code=True
325
    ),
326
    "JAISLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "inceptionai/jais-13b-chat")),
327
    "Jais2ForCausalLM": _HfExamplesInfo(
328
        os.path.join(models_path_prefix, "inceptionai/Jais-2-8B-Chat"), min_transformers_version="4.58"
329
    ),
330
    "JambaForCausalLM": _HfExamplesInfo(
331
        os.path.join(models_path_prefix, "ai21labs/AI21-Jamba-1.5-Mini"),
332
        extras={
333
            "tiny": os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-dev"),
334
            "random": os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-random"),
335
336
        },
    ),
337
    "KimiLinearForCausalLM": _HfExamplesInfo(
338
        os.path.join(models_path_prefix, "moonshotai/Kimi-Linear-48B-A3B-Instruct"), trust_remote_code=True
339
    ),
340
    "Lfm2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "LiquidAI/LFM2-1.2B")),
Paul Pak's avatar
Paul Pak committed
341
    "Lfm2MoeForCausalLM": _HfExamplesInfo(
342
        os.path.join(models_path_prefix, "LiquidAI/LFM2-8B-A1B"), min_transformers_version="4.58"
Paul Pak's avatar
Paul Pak committed
343
    ),
344
    "LlamaForCausalLM": _HfExamplesInfo(
345
        os.path.join(models_path_prefix, "meta-llama/Llama-3.2-1B-Instruct"),
346
        extras={
347
348
349
350
            "guard": os.path.join(models_path_prefix, "meta-llama/Llama-Guard-3-1B"),
            "hermes": os.path.join(models_path_prefix, "NousResearch/Hermes-3-Llama-3.1-8B"),
            "fp8": os.path.join(models_path_prefix, "RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8"),
            "tiny": os.path.join(models_path_prefix, "hmellor/tiny-random-LlamaForCausalLM"),
351
352
353
        },
    ),
    "LLaMAForCausalLM": _HfExamplesInfo(
354
        os.path.join(models_path_prefix, "decapoda-research/llama-7b-hf"), is_available_online=False
355
356
    ),
    "Llama4ForCausalLM": _HfExamplesInfo(
357
        os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
358
359
    ),
    "LongcatFlashForCausalLM": _HfExamplesInfo(
360
        os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat"), trust_remote_code=True
361
    ),
362
    "MambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "state-spaces/mamba-130m-hf")),
363
    "Mamba2ForCausalLM": _HfExamplesInfo(
364
        os.path.join(models_path_prefix, "mistralai/Mamba-Codestral-7B-v0.1"),
365
        extras={
366
            "random": os.path.join(models_path_prefix, "yujiepan/mamba2-codestral-v0.1-tiny-random"),
367
368
        },
    ),
369
    "FalconMambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-mamba-7b-instruct")),
370
    "MiniCPMForCausalLM": _HfExamplesInfo(
371
        os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"), trust_remote_code=True
372
373
    ),
    "MiniCPM3ForCausalLM": _HfExamplesInfo(
374
        os.path.join(models_path_prefix, "openbmb/MiniCPM3-4B"), trust_remote_code=True
375
    ),
376
    "MiniCPM4ForCausalLM": _HfExamplesInfo(
377
        os.path.join(models_path_prefix, "openbmb/MiniCPM4.1-8B"), trust_remote_code=True
378
    ),
379
    "MiniMaxForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01-hf")),
380
    "MiniMaxText01ForCausalLM": _HfExamplesInfo(
381
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01"),
382
383
384
385
        trust_remote_code=True,
        revision="a59aa9cbc53b9fb8742ca4e9e1531b9802b6fdc3",
    ),
    "MiniMaxM1ForCausalLM": _HfExamplesInfo(
386
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-M1-40k"), trust_remote_code=True
387
    ),
388
    "MiniMaxM2ForCausalLM": _HfExamplesInfo(
389
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-M2"),
youkaichao's avatar
youkaichao committed
390
        trust_remote_code=True,
391
    ),
zhuwenwen's avatar
zhuwenwen committed
392
    "MistralForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Mistral-7B-Instruct-v0.1")),
393
    "MistralLarge3ForCausalLM": _HfExamplesInfo(
394
        os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4")
395
    ),
396
    "MixtralForCausalLM": _HfExamplesInfo(
397
398
        os.path.join(models_path_prefix, "mistralai/Mixtral-8x7B-Instruct-v0.1"),
        {"tiny": os.path.join(models_path_prefix, "TitanML/tiny-mixtral")},
399
    ),
zhuwenwen's avatar
zhuwenwen committed
400
    "MptForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mpt"), is_available_online=False),
401
    # FIXME: mosaicml/mpt-7b has been deleted
402
    "MPTForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mosaicml/mpt-7b"), is_available_online=False),
zhuwenwen's avatar
zhuwenwen committed
403
    "NemotronForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/Minitron-8B-Base")),
404
    "NemotronHForCausalLM": _HfExamplesInfo(
405
        os.path.join(models_path_prefix, "nvidia/Nemotron-H-8B-Base-8K"), trust_remote_code=True
406
    ),
zhuwenwen's avatar
zhuwenwen committed
407
    "OlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-1B-hf")),
zhuwenwen's avatar
zhuwenwen committed
408
    "Olmo2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-2-0425-1B")),
409
    "Olmo3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Olmo-3-7B-Instruct")),
zhuwenwen's avatar
zhuwenwen committed
410
    "OlmoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924-Instruct")),
411
    "OpenPanguMTPModel": _HfExamplesInfo(
412
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1"),
413
414
415
        trust_remote_code=True,
        is_available_online=False,
    ),
416
    "OPTForCausalLM": _HfExamplesInfo(
417
        os.path.join(models_path_prefix, "facebook/opt-125m"), {"1b": os.path.join(models_path_prefix, "facebook/opt-iml-max-1.3b")}
418
419
    ),
    "OrionForCausalLM": _HfExamplesInfo(
420
        os.path.join(models_path_prefix, "OrionStarAI/Orion-14B-Chat"), trust_remote_code=True
421
    ),
422
    "OuroForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ByteDance/Ouro-1.4B"), trust_remote_code=True),
423
    "PanguEmbeddedForCausalLM": _HfExamplesInfo(
424
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Embedded-7B-V1.1"), trust_remote_code=True
425
    ),
426
427
428
429
430
    "PanguProMoEV2ForCausalLM": _HfExamplesInfo(
        "",
        trust_remote_code=True,
        is_available_online=False,
    ),
431
    "PanguUltraMoEForCausalLM": _HfExamplesInfo(
432
        os.path.join(models_path_prefix, "FreedomIntelligence/openPangu-Ultra-MoE-718B-V1.1"),
433
434
435
        trust_remote_code=True,
        is_available_online=False,
    ),
436
437
438
    "PersimmonForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/persimmon-8b-chat")),
    "PhiForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/phi-2")),
    "Phi3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-3-mini-4k-instruct")),
439
    "PhiMoEForCausalLM": _HfExamplesInfo(
440
        os.path.join(models_path_prefix, "microsoft/Phi-3.5-MoE-instruct"), trust_remote_code=True
441
442
    ),
    "Plamo2ForCausalLM": _HfExamplesInfo(
443
        os.path.join(models_path_prefix, "pfnet/plamo-2-1b"),
444
445
        trust_remote_code=True,
    ),
446
    "Plamo3ForCausalLM": _HfExamplesInfo(
447
        os.path.join(models_path_prefix, "pfnet/plamo-3-nict-2b-base"),
448
449
        trust_remote_code=True,
    ),
450
    "QWenLMHeadModel": _HfExamplesInfo(
451
        os.path.join(models_path_prefix, "Qwen/Qwen-7B-Chat"),
452
        max_transformers_version="4.53",
453
454
455
        transformers_version_reason={
            "hf": "HF model uses remote code that is not compatible with latest Transformers"  # noqa: E501
        },
456
457
458
        trust_remote_code=True,
    ),
    "Qwen2ForCausalLM": _HfExamplesInfo(
459
        os.path.join(models_path_prefix, "Qwen/Qwen2-0.5B-Instruct"),
460
        extras={
461
462
            "2.5": os.path.join(models_path_prefix, "Qwen/Qwen2.5-0.5B-Instruct"),
            "2.5-1.5B": os.path.join(models_path_prefix, "Qwen/Qwen2.5-1.5B-Instruct"),
463
        },
464
    ),
465
466
467
    "Qwen2MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen1.5-MoE-A2.7B-Chat")),
    "Qwen3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-8B")),
    "Qwen3MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B")),
468
    "Qwen3NextForCausalLM": _HfExamplesInfo(
469
470
        os.path.join(models_path_prefix, "Qwen/Qwen3-Next-80B-A3B-Instruct"),
        extras={"tiny-random": os.path.join(models_path_prefix, "tiny-random/qwen3-next-moe")},
471
472
        min_transformers_version="4.56.3",
    ),
473
    "RWForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-40b")),
474
    "SeedOssForCausalLM": _HfExamplesInfo(
475
        os.path.join(models_path_prefix, "ByteDance-Seed/Seed-OSS-36B-Instruct"),
476
477
        trust_remote_code=True,
    ),
478
479
480
481
482
    "SmolLM3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "HuggingFaceTB/SmolLM3-3B")),
    "StableLMEpochForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-zephyr-3b")),
    "StableLmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-3b-4e1t")),
    "Starcoder2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "bigcode/starcoder2-3b")),
    "Step3TextForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stepfun-ai/step3"), trust_remote_code=True),
483
    "SolarForCausalLM": _HfExamplesInfo(
484
        os.path.join(models_path_prefix, "upstage/solar-pro-preview-instruct"), trust_remote_code=True
485
    ),
486
    "TeleChatForCausalLM": _HfExamplesInfo(
487
        os.path.join(models_path_prefix, "chuhac/TeleChat2-35B"), trust_remote_code=True
488
    ),
489
    "TeleChat2ForCausalLM": _HfExamplesInfo(
490
        os.path.join(models_path_prefix, "Tele-AI/TeleChat2-3B"), trust_remote_code=True
491
492
    ),
    "TeleFLMForCausalLM": _HfExamplesInfo(
493
        os.path.join(models_path_prefix, "CofeAI/FLM-2-52B-Instruct-2407"), trust_remote_code=True
494
495
    ),
    "XverseForCausalLM": _HfExamplesInfo(
496
497
        os.path.join(models_path_prefix, "xverse/XVERSE-7B-Chat"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-2-7b"),
498
499
        trust_remote_code=True,
    ),
zhuwenwen's avatar
zhuwenwen committed
500
    "Zamba2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Zyphra/Zamba2-7B-instruct")),
501
    "MiMoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"), trust_remote_code=True),
502
    "MiMoV2FlashForCausalLM": _HfExamplesInfo(
503
        os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-V2-Flash"), trust_remote_code=True
504
    ),
505
    "Dots1ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "rednote-hilab/dots.llm1.inst")),
506
507
508
509
}

_EMBEDDING_EXAMPLE_MODELS = {
    # [Text-only]
510
    "BertModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5")),
511
    "Gemma2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-multilingual-gemma2")),
512
    "Gemma3TextModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/embeddinggemma-300m")),
513
    "GritLM": _HfExamplesInfo(os.path.join(models_path_prefix, "parasail-ai/GritLM-7B-vllm")),
514
    "GteModel": _HfExamplesInfo(
515
        os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-embed-m-v2.0"), trust_remote_code=True
516
517
    ),
    "GteNewModel": _HfExamplesInfo(
518
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-base-en-v1.5"),
519
520
521
522
        trust_remote_code=True,
        hf_overrides={"architectures": ["GteNewModel"]},
    ),
    "InternLM2ForRewardModel": _HfExamplesInfo(
523
        os.path.join(models_path_prefix, "internlm/internlm2-1_8b-reward"), trust_remote_code=True
524
    ),
525
    "JambaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-reward-dev")),
526
    "LlamaModel": _HfExamplesInfo("llama", is_available_online=False),
527
    "LlamaBidirectionalModel": _HfExamplesInfo(
528
        os.path.join(models_path_prefix, "nvidia/llama-nemotron-embed-1b-v2"), trust_remote_code=True
529
    ),
530
    "MistralModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")),
531
    "ModernBertModel": _HfExamplesInfo(
532
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-modernbert-base"), trust_remote_code=True
533
534
    ),
    "NomicBertModel": _HfExamplesInfo(
535
        os.path.join(models_path_prefix, "nomic-ai/nomic-embed-text-v2-moe"), trust_remote_code=True
536
    ),
537
    "Qwen2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "ssmits/Qwen2-7B-Instruct-embed-base")),
538
    "Qwen2ForRewardModel": _HfExamplesInfo(
539
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-RM-72B"),
540
        max_transformers_version="4.53",
541
542
543
        transformers_version_reason={
            "hf": "HF model uses remote code that is not compatible with latest Transformers"  # noqa: E501
        },
544
545
    ),
    "Qwen2ForProcessRewardModel": _HfExamplesInfo(
546
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-PRM-7B"),
547
        max_transformers_version="4.53",
548
549
550
        transformers_version_reason={
            "hf": "HF model uses remote code that is not compatible with latest Transformers"  # noqa: E501
        },
551
    ),
552
553
554
    "RobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/stsb-roberta-base-v2")),
    "RobertaForMaskedLM": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/all-roberta-large-v1")),
    "XLMRobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/multilingual-e5-small")),
555
    "BertSpladeSparseEmbeddingModel": _HfExamplesInfo(
556
        os.path.join(models_path_prefix, "naver/splade-v3"),
557
        hf_overrides={"architectures": ["BertSpladeSparseEmbeddingModel"]},
558
    ),
559
    # [Multimodal]
560
    "CLIPModel": _HfExamplesInfo(os.path.join(models_path_prefix, "openai/clip-vit-base-patch32")),
zhuwenwen's avatar
zhuwenwen committed
561
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "royokong/e5-v")),
562
    "Phi3VForCausalLM": _HfExamplesInfo(
563
        os.path.join(models_path_prefix, "TIGER-Lab/VLM2Vec-Full"), trust_remote_code=True
564
    ),
565
566
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "MrLight/dse-qwen2-2b-mrl-v1")),
    "SiglipModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/siglip-base-patch16-224")),
567
    "PrithviGeoSpatialMAE": _HfExamplesInfo(
568
        os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"),
569
        dtype="float16",
570
        enforce_eager=True,
571
572
        require_embed_inputs=True,
        # This is to avoid the model going OOM in CI
573
574
575
        max_num_seqs=32,
    ),
    "Terratorch": _HfExamplesInfo(
576
        os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"),
577
        dtype="float16",
578
        enforce_eager=True,
579
        require_embed_inputs=True,
580
581
582
        # This is to avoid the model going OOM in CI
        max_num_seqs=32,
    ),
583
584
}

585
586
_SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS = {
    # [Decoder-only]
587
    "GPT2ForSequenceClassification": _HfExamplesInfo(
588
        os.path.join(models_path_prefix, "nie3e/sentiment-polish-gpt2-small")
589
    ),
590
    # [Cross-encoder]
591
    "BertForSequenceClassification": _HfExamplesInfo(
592
        os.path.join(models_path_prefix, "cross-encoder/ms-marco-MiniLM-L-6-v2")
593
    ),
594
    "BertForTokenClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "boltuix/NeuroBERT-NER")),
595
    "GteNewForSequenceClassification": _HfExamplesInfo(
596
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-multilingual-reranker-base"),
597
598
599
        trust_remote_code=True,
        hf_overrides={"architectures": ["GteNewForSequenceClassification"]},
    ),
600
601
602
    "LlamaBidirectionalForSequenceClassification": _HfExamplesInfo(
        "nvidia/llama-nemotron-rerank-1b-v2", trust_remote_code=True
    ),
603
    "ModernBertForSequenceClassification": _HfExamplesInfo(
604
        os.path.join(models_path_prefix, "Alibaba-NLP/gte-reranker-modernbert-base")
605
    ),
606
    "ModernBertForTokenClassification": _HfExamplesInfo(
607
        os.path.join(models_path_prefix, "disham993/electrical-ner-ModernBERT-base")
608
    ),
609
    "RobertaForSequenceClassification": _HfExamplesInfo(
610
        os.path.join(models_path_prefix, "cross-encoder/quora-roberta-base")
611
    ),
612
    "XLMRobertaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-m3")),
613
614
}

615
616
_AUTOMATIC_CONVERTED_MODELS = {
    # Use as_seq_cls_model for automatic conversion
617
    "GemmaForSequenceClassification": _HfExamplesInfo(
618
        os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-gemma"),
619
620
621
622
623
624
625
        hf_overrides={
            "architectures": ["GemmaForSequenceClassification"],
            "classifier_from_token": ["Yes"],
            "method": "no_post_processing",
        },
    ),
    "LlamaForSequenceClassification": _HfExamplesInfo(
626
        os.path.join(models_path_prefix, "Skywork/Skywork-Reward-V2-Llama-3.2-1B")
627
    ),
628
    "Qwen2ForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "jason9693/Qwen2.5-1.5B-apeach")),
629
    "Qwen3ForSequenceClassification": _HfExamplesInfo(
630
        os.path.join(models_path_prefix, "tomaarsen/Qwen3-Reranker-0.6B-seq-cls")
631
    ),
632
    "Qwen3ForTokenClassification": _HfExamplesInfo("bd2lcco/Qwen3-0.6B-finetuned"),
633
634
635
636
637
638
639
640
641
    "Qwen3VLForSequenceClassification": _HfExamplesInfo(
        "Qwen/Qwen3-VL-Reranker-2B",
        is_available_online=False,
        hf_overrides={
            "architectures": ["Qwen3VLForSequenceClassification"],
            "classifier_from_token": ["no", "yes"],
            "is_original_qwen3_reranker": True,
        },
    ),
642
643
}

644
645
_MULTIMODAL_EXAMPLE_MODELS = {
    # [Decoder-only]
646
    "AriaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "rhymes-ai/Aria")),
647
    "AudioFlamingo3ForConditionalGeneration": _HfExamplesInfo(
648
        os.path.join(models_path_prefix, "nvidia/audio-flamingo-3-hf"), min_transformers_version="5.0.0.dev"
649
    ),
650
651
    "AyaVisionForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "CohereLabs/aya-vision-8b")),
    "BagelForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "ByteDance-Seed/BAGEL-7B-MoT")),
652
    "BeeForConditionalGeneration": _HfExamplesInfo(
653
        os.path.join(models_path_prefix, "Open-Bee/Bee-8B-RL"),
654
655
        trust_remote_code=True,
    ),
656
    "Blip2ForConditionalGeneration": _HfExamplesInfo(
657
658
        os.path.join(models_path_prefix, "Salesforce/blip2-opt-2.7b"),
        extras={"6b": os.path.join(models_path_prefix, "Salesforce/blip2-opt-6.7b")},
659
    ),
660
    "ChameleonForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/chameleon-7b")),
661
    "Cohere2VisionForConditionalGeneration": _HfExamplesInfo(
662
        os.path.join(models_path_prefix, "CohereLabs/command-a-vision-07-2025")
663
664
    ),
    "DeepseekVLV2ForCausalLM": _HfExamplesInfo(
665
666
        os.path.join(models_path_prefix, "deepseek-ai/deepseek-vl2-tiny"),
        extras={"fork": os.path.join(models_path_prefix, "Isotr0py/deepseek-vl2-tiny")},
667
        max_transformers_version="4.48",
668
        transformers_version_reason={"hf": "HF model is not compatible."},
669
670
        hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]},
    ),
671
    "DeepseekOCRForCausalLM": _HfExamplesInfo(
672
        os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-OCR"),
673
    ),
674
    "DotsOCRForCausalLM": _HfExamplesInfo(
675
       os.path.join(models_path_prefix,  "rednote-hilab/dots.ocr"), trust_remote_code=True
676
    ),
677
    "Emu3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/Emu3-Chat-hf")),
678
    "Ernie4_5_VLMoeForConditionalGeneration": _HfExamplesInfo(
679
        os.path.join(models_path_prefix, "baidu/ERNIE-4.5-VL-28B-A3B-PT"),
680
681
        trust_remote_code=True,
    ),
682
683
    "FuyuForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/fuyu-8b")),
    "Gemma3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-4b-it")),
684
    "Gemma3nForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3n-E2B-it")),
685
    "GlmAsrForConditionalGeneration": _HfExamplesInfo(
686
        os.path.join(models_path_prefix, "zai-org/GLM-ASR-Nano-2512"),
687
688
689
        trust_remote_code=True,
        min_transformers_version="5.0",
    ),
690
    "GraniteSpeechForConditionalGeneration": _HfExamplesInfo(
691
        os.path.join(models_path_prefix, "ibm-granite/granite-speech-3.3-2b")
692
693
    ),
    "GLM4VForCausalLM": _HfExamplesInfo(
694
        os.path.join(models_path_prefix, "zai-org/glm-4v-9b"),
695
696
697
        trust_remote_code=True,
        hf_overrides={"architectures": ["GLM4VForCausalLM"]},
    ),
698
699
    "Glm4vForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.1V-9B-Thinking")),
    "Glm4vMoeForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "zai-org/GLM-4.5V")),
700
    "H2OVLChatModel": _HfExamplesInfo(
701
        os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-800m"),
702
        trust_remote_code=True,
703
        extras={"2b": os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-2b")},
704
        max_transformers_version="4.48",
705
        transformers_version_reason={"hf": "HF model is not compatible."},
706
707
    ),
    "HCXVisionForCausalLM": _HfExamplesInfo(
708
        os.path.join(models_path_prefix, "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"),
709
710
        trust_remote_code=True,
    ),
711
    "HunYuanVLForConditionalGeneration": _HfExamplesInfo(
712
        os.path.join(models_path_prefix, "tencent/HunyuanOCR"),
713
        hf_overrides={"num_experts": 0},
714
    ),
715
    "Idefics3ForConditionalGeneration": _HfExamplesInfo(
716
717
        os.path.join(models_path_prefix, "HuggingFaceM4/Idefics3-8B-Llama3"),
        extras={"tiny": os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM-256M-Instruct")},
718
    ),
oscardev256's avatar
oscardev256 committed
719
720
721
    "IsaacForConditionalGeneration": _HfExamplesInfo(
        "PerceptronAI/Isaac-0.1",
        trust_remote_code=True,
722
        extras={"0.2-2B-Preview": "PerceptronAI/Isaac-0.2-2B-Preview"},
oscardev256's avatar
oscardev256 committed
723
    ),
724
    "InternS1ForConditionalGeneration": _HfExamplesInfo(
725
        os.path.join(models_path_prefix, "internlm/Intern-S1"), trust_remote_code=True
726
727
    ),
    "InternVLChatModel": _HfExamplesInfo(
728
        os.path.join(models_path_prefix, "OpenGVLab/InternVL2-1B"),
729
        extras={
730
731
732
733
734
            "2B": os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B"),
            "3.0": os.path.join(models_path_prefix, "OpenGVLab/InternVL3-1B"),
            "3.5-qwen3": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-1B"),
            "3.5-qwen3moe": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-30B-A3B"),
            "3.5-gptoss": os.path.join(models_path_prefix, "OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview"),
735
736
737
        },
        trust_remote_code=True,
    ),
738
    "InternVLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "OpenGVLab/InternVL3-1B-hf")),
739
    "KananaVForConditionalGeneration": _HfExamplesInfo(
740
        os.path.join(models_path_prefix, "kakaocorp/kanana-1.5-v-3b-instruct"),
741
742
        trust_remote_code=True,
    ),
743
    "KeyeForConditionalGeneration": _HfExamplesInfo(
744
        os.path.join(models_path_prefix, "Kwai-Keye/Keye-VL-8B-Preview"),
745
746
747
        trust_remote_code=True,
    ),
    "KeyeVL1_5ForConditionalGeneration": _HfExamplesInfo(
748
        os.path.join(models_path_prefix, "Kwai-Keye/Keye-VL-1_5-8B"),
749
750
751
        trust_remote_code=True,
    ),
    "KimiVLForConditionalGeneration": _HfExamplesInfo(
752
753
        os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Instruct"),
        extras={"thinking": os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Thinking")},
754
        trust_remote_code=True,
755
        max_transformers_version="4.53.3",
756
757
758
759
760
761
762
        transformers_version_reason={
            "hf": (
                "HF model uses deprecated transformers API "
                "(PytorchGELUTanh, DynamicCache.seen_tokens, and more). See: "
                "https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/discussions/31"
            )
        },
763
    ),
764
    "LightOnOCRForConditionalGeneration": _HfExamplesInfo(
765
        os.path.join(models_path_prefix, "lightonai/LightOnOCR-1B-1025")
766
    ),
767
768
769
770
    "Lfm2VlForConditionalGeneration": _HfExamplesInfo(
        "LiquidAI/LFM2-VL-450M",
        min_transformers_version="5.0.0",
    ),
771
    "Llama4ForConditionalGeneration": _HfExamplesInfo(
772
        os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
773
        max_model_len=10240,
774
        extras={"llama-guard-4": os.path.join(models_path_prefix, "meta-llama/Llama-Guard-4-12B")},
775
776
    ),
    "LlavaForConditionalGeneration": _HfExamplesInfo(
777
        os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
778
        extras={
779
780
            "mistral": os.path.join(models_path_prefix, "mistral-community/pixtral-12b"),
            "mistral-fp8": os.path.join(models_path_prefix, "nm-testing/pixtral-12b-FP8-dynamic"),
781
782
783
        },
    ),
    "LlavaNextForConditionalGeneration": _HfExamplesInfo(
784
        os.path.join(models_path_prefix, "llava-hf/llava-v1.6-mistral-7b-hf")
785
786
    ),
    "LlavaNextVideoForConditionalGeneration": _HfExamplesInfo(
787
        os.path.join(models_path_prefix, "llava-hf/LLaVA-NeXT-Video-7B-hf")
788
789
    ),
    "LlavaOnevisionForConditionalGeneration": _HfExamplesInfo(
790
        os.path.join(models_path_prefix, "llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
791
792
    ),
    "MantisForConditionalGeneration": _HfExamplesInfo(
793
        os.path.join(models_path_prefix, "TIGER-Lab/Mantis-8B-siglip-llama3"),
794
        max_transformers_version="4.48",
795
        transformers_version_reason={"hf": "HF model is not compatible."},
796
797
798
        hf_overrides={"architectures": ["MantisForConditionalGeneration"]},
    ),
    "MiDashengLMModel": _HfExamplesInfo(
799
        os.path.join(models_path_prefix, "mispeech/midashenglm-7b"), trust_remote_code=True
800
    ),
801
    "MiniCPMO": _HfExamplesInfo(os.path.join(models_path_prefix, "openbmb/MiniCPM-o-2_6"), trust_remote_code=True),
802
    "MiniCPMV": _HfExamplesInfo(
803
        os.path.join(models_path_prefix, "openbmb/MiniCPM-Llama3-V-2_5"),
804
        extras={
805
806
807
            "2.6": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-2_6"),
            "4.0": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-4"),
            "4.5": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-4_5"),
808
809
810
811
        },
        trust_remote_code=True,
    ),
    "MiniMaxVL01ForConditionalGeneration": _HfExamplesInfo(
812
        os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-VL-01"),
813
814
815
        trust_remote_code=True,
    ),
    "Mistral3ForConditionalGeneration": _HfExamplesInfo(
816
817
        os.path.join(models_path_prefix, "mistralai/Mistral-Small-3.1-24B-Instruct-2503"),
        extras={"fp8": os.path.join(models_path_prefix, "nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic")},
818
819
    ),
    "MolmoForCausalLM": _HfExamplesInfo(
820
        os.path.join(models_path_prefix, "allenai/Molmo-7B-D-0924"),
821
        max_transformers_version="4.48",
822
823
824
        transformers_version_reason={
            "vllm": "Incorrectly-detected `tensorflow` import from processor."
        },
825
        extras={"olmo": os.path.join(models_path_prefix, "allenai/Molmo-7B-O-0924")},
826
827
        trust_remote_code=True,
    ),
828
    "NVLM_D": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/NVLM-D-72B"), trust_remote_code=True),
829
    "Llama_Nemotron_Nano_VL": _HfExamplesInfo(
830
        os.path.join(models_path_prefix, "nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1"),
831
832
833
        trust_remote_code=True,
    ),
    "NemotronH_Nano_VL_V2": _HfExamplesInfo(
834
        os.path.join(models_path_prefix, "nano_vl_dummy"), is_available_online=False, trust_remote_code=True
835
    ),
Zero's avatar
Zero committed
836
    "OpenCUAForConditionalGeneration": _HfExamplesInfo(
837
        os.path.join(models_path_prefix, "xlangai/OpenCUA-7B"), trust_remote_code=True
Zero's avatar
Zero committed
838
    ),
839
    "Ovis": _HfExamplesInfo(
840
        os.path.join(models_path_prefix, "AIDC-AI/Ovis2-1B"),
841
842
        trust_remote_code=True,
        max_transformers_version="4.53",
843
        transformers_version_reason={"hf": "HF model is not compatible"},
844
        extras={
845
846
            "1.6-llama": os.path.join(models_path_prefix, "AIDC-AI/Ovis1.6-Llama3.2-3B"),
            "1.6-gemma": os.path.join(models_path_prefix, "AIDC-AI/Ovis1.6-Gemma2-9B"),
847
848
        },
    ),
849
    "Ovis2_5": _HfExamplesInfo(os.path.join(models_path_prefix, "AIDC-AI/Ovis2.5-2B"), trust_remote_code=True),
850
    "PaddleOCRVLForConditionalGeneration": _HfExamplesInfo(
851
       os.path.join(models_path_prefix,  "PaddlePaddle/PaddleOCR-VL"),
852
853
        trust_remote_code=True,
    ),
854
    "PaliGemmaForConditionalGeneration": _HfExamplesInfo(
855
856
        os.path.join(models_path_prefix, "google/paligemma-3b-mix-224"),
        extras={"v2": os.path.join(models_path_prefix, "google/paligemma2-3b-ft-docci-448")},
857
858
    ),
    "Phi3VForCausalLM": _HfExamplesInfo(
859
        os.path.join(models_path_prefix, "microsoft/Phi-3-vision-128k-instruct"),
860
861
        trust_remote_code=True,
        max_transformers_version="4.48",
862
863
864
        transformers_version_reason={
            "hf": "HF model use deprecated imports which have been removed."
        },  # noqa: E501
865
        extras={"phi3.5": os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")},
866
867
    ),
    "Phi4MMForCausalLM": _HfExamplesInfo(
868
        os.path.join(models_path_prefix, "microsoft/Phi-4-multimodal-instruct"), trust_remote_code=True
869
870
    ),
    "PixtralForConditionalGeneration": _HfExamplesInfo(
871
        os.path.join(models_path_prefix, "mistralai/Pixtral-12B-2409"),
872
        extras={
873
874
            "mistral-large-3": os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-NVFP4"),
            "ministral-3": os.path.join(models_path_prefix, "mistralai/Ministral-3-3B-Instruct-2512"),
875
        },
876
877
878
        tokenizer_mode="mistral",
    ),
    "QwenVLForConditionalGeneration": _HfExamplesInfo(
879
880
        os.path.join(models_path_prefix, "Qwen/Qwen-VL"),
        extras={"chat": os.path.join(models_path_prefix, "Qwen/Qwen-VL-Chat")},
881
        trust_remote_code=True,
882
        max_transformers_version="4.53.3",
883
884
885
        transformers_version_reason={
            "hf": "HF model uses deprecated imports which have been removed."
        },  # noqa: E501
886
887
888
        hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]},
    ),
    "Qwen2AudioForConditionalGeneration": _HfExamplesInfo(
889
        os.path.join(models_path_prefix, "Qwen/Qwen2-Audio-7B-Instruct")
890
    ),
891
    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2-VL-2B-Instruct")),
892
    "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo(
893
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-3B-Instruct"),
894
895
        max_model_len=4096,
    ),
zhuwenwen's avatar
zhuwenwen committed
896
    "Qwen2_5OmniModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-3B")),
897
    "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-7B-AWQ")),
898
    "Qwen3VLForConditionalGeneration": _HfExamplesInfo(
899
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-4B-Instruct"),
900
901
902
903
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
    "Qwen3VLMoeForConditionalGeneration": _HfExamplesInfo(
904
       os.path.join(models_path_prefix,  "Qwen/Qwen3-VL-30B-A3B-Instruct"),
905
906
907
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
908
    "Qwen3OmniMoeForConditionalGeneration": _HfExamplesInfo(
909
        os.path.join(models_path_prefix, "Qwen/Qwen3-Omni-30B-A3B-Instruct"),
910
911
912
        max_model_len=4096,
        min_transformers_version="4.57",
    ),
913
    "RForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "YannQi/R-4B"), trust_remote_code=True),
914
    "SkyworkR1VChatModel": _HfExamplesInfo(
915
        os.path.join(models_path_prefix, "Skywork/Skywork-R1V-38B"), trust_remote_code=True
916
917
    ),
    "SmolVLMForConditionalGeneration": _HfExamplesInfo(
918
        os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM2-2.2B-Instruct")
919
920
    ),
    "Step3VLForConditionalGeneration": _HfExamplesInfo(
921
        os.path.join(models_path_prefix, "stepfun-ai/step3"), trust_remote_code=True
922
923
    ),
    "UltravoxModel": _HfExamplesInfo(
924
        os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_5-llama-3_2-1b"),
925
926
        trust_remote_code=True,
    ),
927
    "TarsierForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "omni-research/Tarsier-7b")),
928
    "Tarsier2ForConditionalGeneration": _HfExamplesInfo(
929
        os.path.join(models_path_prefix, "omni-research/Tarsier2-Recap-7b"),
930
        hf_overrides={
931
            "architectures": [os.path.join(models_path_prefix, "Tarsier2ForConditionalGeneration")],
932
933
            "model_type": "tarsier2",
        },
934
    ),
935
936
937
938
939
    "VoxtralForConditionalGeneration": _HfExamplesInfo(
        "mistralai/Voxtral-Mini-3B-2507",
        # disable this temporarily until we support HF format
        is_available_online=False,
    ),
Patrick von Platen's avatar
Patrick von Platen committed
940
941
942
943
944
    "VoxtralStreamingGeneration": _HfExamplesInfo(
        "<place-holder>",
        # disable this temporarily until we support HF format
        is_available_online=False,
    ),
945
    # [Encoder-decoder]
946
947
948
    "NemotronParseForConditionalGeneration": _HfExamplesInfo(
        "nvidia/NVIDIA-Nemotron-Parse-v1.1", trust_remote_code=True
    ),
949
    "WhisperForConditionalGeneration": _HfExamplesInfo(
950
951
        os.path.join(models_path_prefix, "openai/whisper-large-v3-turbo"),
        extras={"v3": os.path.join(models_path_prefix, "openai/whisper-large-v3")},
952
    ),
953
    # [Cross-encoder]
954
    "JinaVLForRanking": _HfExamplesInfo(os.path.join(models_path_prefix, "jinaai/jina-reranker-m0")),
955
956
}

957

958
_SPECULATIVE_DECODING_EXAMPLE_MODELS = {
959
    "MedusaModel": _HfExamplesInfo(
960
        os.path.join(models_path_prefix, "JackFram/llama-68m"), speculative_model=os.path.join(models_path_prefix, "abhigoyal/vllm-medusa-llama-68m-random")
961
    ),
962
963
    # Temporarily disabled.
    # TODO(woosuk): Re-enable this once the MLP Speculator is supported in V1.
964
965
966
967
968
    # "MLPSpeculatorPreTrainedModel": _HfExamplesInfo(
    #     "JackFram/llama-160m",
    #     speculative_model="ibm-ai-platform/llama-160m-accelerator"
    # ),
    "DeepSeekMTPModel": _HfExamplesInfo(
969
970
        os.path.join(models_path_prefix, "luccafong/deepseek_mtp_main_random"),
        speculative_model=os.path.join(models_path_prefix, "luccafong/deepseek_mtp_draft_random"),
971
972
973
        trust_remote_code=True,
    ),
    "EagleDeepSeekMTPModel": _HfExamplesInfo(
974
975
        os.path.join(models_path_prefix, "eagle618/deepseek-v3-random"),
        speculative_model=os.path.join(models_path_prefix, "eagle618/eagle-deepseek-v3-random"),
976
977
978
        trust_remote_code=True,
    ),
    "EagleLlamaForCausalLM": _HfExamplesInfo(
979
       os.path.join(models_path_prefix,  "meta-llama/Meta-Llama-3-8B-Instruct"),
980
        trust_remote_code=True,
981
982
        speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE-LLaMA3-Instruct-8B"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct"),
983
984
    ),
    "Eagle3LlamaForCausalLM": _HfExamplesInfo(
985
        os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct"),
986
        trust_remote_code=True,
987
988
        speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"),
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct"),
989
990
991
        use_original_num_layers=True,
        max_model_len=10240,
    ),
992
    "EagleMistralLarge3ForCausalLM": _HfExamplesInfo(
993
        os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512"),
994
        speculative_model=os.path.join(models_path_prefix, "mistralai/Mistral-Large-3-675B-Instruct-2512-Eagle"),
995
        # TODO: revert once figuring out OOM in CI
996
997
        is_available_online=False,
    ),
998
    "LlamaForCausalLMEagle3": _HfExamplesInfo(
999
        os.path.join(models_path_prefix, "Qwen/Qwen3-8B"),
1000
        trust_remote_code=True,
1001
1002
        speculative_model=os.path.join(models_path_prefix, "AngelSlim/Qwen3-8B_eagle3"),
        tokenizer=os.path.join(models_path_prefix, "Qwen/Qwen3-8B"),
1003
1004
        use_original_num_layers=True,
    ),
zhiweiz's avatar
zhiweiz committed
1005
    "EagleLlama4ForCausalLM": _HfExamplesInfo(
1006
        os.path.join(models_path_prefix, "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct"),
zhiweiz's avatar
zhiweiz committed
1007
        trust_remote_code=True,
1008
        speculative_model=os.path.join(models_path_prefix, "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct"),
1009
        tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),
1010
1011
    ),
    "EagleMiniCPMForCausalLM": _HfExamplesInfo(
1012
        os.path.join(models_path_prefix, "openbmb/MiniCPM-1B-sft-bf16"),
1013
        trust_remote_code=True,
1014
1015
1016
        speculative_model=os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"),
        speculative_method=os.path.join(models_path_prefix, "eagle"),
        tokenizer=os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"),
1017
1018
    ),
    "ErnieMTPModel": _HfExamplesInfo(
1019
        os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT"),
1020
        trust_remote_code=True,
1021
        speculative_model=os.path.join(models_path_prefix, "baidu/ERNIE-4.5-21B-A3B-PT"),
1022
    ),
Kyungmin Lee's avatar
Kyungmin Lee committed
1023
1024
1025
1026
1027
    "ExaoneMoeMTP": _HfExamplesInfo(
        "LGAI-EXAONE/K-EXAONE-236B-A23B",
        speculative_model="LGAI-EXAONE/K-EXAONE-236B-A23B",
        min_transformers_version="5.0.0",
    ),
1028
    "Glm4MoeMTPModel": _HfExamplesInfo(
1029
        os.path.join(models_path_prefix, "zai-org/GLM-4.5"),
1030
1031
        speculative_model="zai-org/GLM-4.5",
    ),
XuruiYang's avatar
XuruiYang committed
1032
    "LongCatFlashMTPModel": _HfExamplesInfo(
1033
        os.path.join(models_path_prefix, os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat")),
XuruiYang's avatar
XuruiYang committed
1034
        trust_remote_code=True,
1035
        speculative_model=os.path.join(models_path_prefix, "meituan-longcat/LongCat-Flash-Chat"),
1036
1037
    ),
    "MiMoMTPModel": _HfExamplesInfo(
1038
        os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"),
1039
        trust_remote_code=True,
1040
        speculative_model=os.path.join(models_path_prefix, "XiaomiMiMo/MiMo-7B-RL"),
1041
    ),
1042
    "Eagle3Qwen2_5vlForCausalLM": _HfExamplesInfo(
1043
1044
        os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-7B-Instruct"),
        speculative_model=os.path.join(models_path_prefix, "Rayzl/qwen2.5-vl-7b-eagle3-sgl"),
1045
    ),
1046
    "Eagle3Qwen3vlForCausalLM": _HfExamplesInfo(
1047
1048
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-8B-Instruct"),
        speculative_model=os.path.join(models_path_prefix, "taobao-mnn/Qwen3-VL-8B-Instruct-Eagle3"),
1049
    ),
1050
    "Qwen3NextMTP": _HfExamplesInfo(
1051
        os.path.join(models_path_prefix, "Qwen/Qwen3-Next-80B-A3B-Instruct"), min_transformers_version="4.56.3"
1052
    ),
1053
1054
}

1055
_TRANSFORMERS_BACKEND_MODELS = {
1056
    "TransformersEmbeddingModel": _HfExamplesInfo(
1057
        os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5"), min_transformers_version="5.0.0.dev"
1058
1059
    ),
    "TransformersForSequenceClassification": _HfExamplesInfo(
1060
        os.path.join(models_path_prefix, "papluca/xlm-roberta-base-language-detection"),
1061
        min_transformers_version="5.0.0.dev",
1062
1063
    ),
    "TransformersForCausalLM": _HfExamplesInfo(
1064
        os.path.join(models_path_prefix, "hmellor/Ilama-3.2-1B"), trust_remote_code=True
1065
    ),
1066
    "TransformersMultiModalForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/Emu3-Chat-hf")),
1067
    "TransformersMoEForCausalLM": _HfExamplesInfo(
1068
        os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924"), min_transformers_version="5.0.0.dev"
1069
    ),
1070
    "TransformersMultiModalMoEForCausalLM": _HfExamplesInfo(
1071
        os.path.join(models_path_prefix, "Qwen/Qwen3-VL-30B-A3B-Instruct"), min_transformers_version="5.0.0.dev"
1072
1073
    ),
    "TransformersMoEEmbeddingModel": _HfExamplesInfo(
1074
        os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B"), min_transformers_version="5.0.0.dev"
1075
1076
    ),
    "TransformersMoEForSequenceClassification": _HfExamplesInfo(
1077
        os.path.join(models_path_prefix, "Qwen/Qwen3-30B-A3B"), min_transformers_version="5.0.0.dev"
1078
    ),
1079
    "TransformersMultiModalEmbeddingModel": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-4b-it")),
1080
    "TransformersMultiModalForSequenceClassification": _HfExamplesInfo(
1081
        os.path.join(models_path_prefix, "google/gemma-3-4b-it")
1082
    ),
1083
1084
}

1085
1086
1087
_EXAMPLE_MODELS = {
    **_TEXT_GENERATION_EXAMPLE_MODELS,
    **_EMBEDDING_EXAMPLE_MODELS,
1088
    **_SEQUENCE_CLASSIFICATION_EXAMPLE_MODELS,
1089
1090
    **_MULTIMODAL_EXAMPLE_MODELS,
    **_SPECULATIVE_DECODING_EXAMPLE_MODELS,
1091
    **_TRANSFORMERS_BACKEND_MODELS,
1092
1093
1094
1095
1096
1097
1098
1099
1100
}


class HfExampleModels:
    def __init__(self, hf_models: Mapping[str, _HfExamplesInfo]) -> None:
        super().__init__()

        self.hf_models = hf_models

1101
    def get_supported_archs(self) -> Set[str]:
1102
1103
1104
        return self.hf_models.keys()

    def get_hf_info(self, model_arch: str) -> _HfExamplesInfo:
1105
1106
1107
        try:
            return self.hf_models[model_arch]
        except KeyError:
1108
1109
1110
            raise ValueError(
                f"No example model defined for {model_arch}; please update this file."
            ) from None
1111

1112
1113
1114
1115
1116
    def find_hf_info(self, model_id: str) -> _HfExamplesInfo:
        for info in self.hf_models.values():
            if info.default == model_id:
                return info

1117
1118
1119
1120
1121
        # Fallback to extras
        for info in self.hf_models.values():
            if any(extra == model_id for extra in info.extras.values()):
                return info

1122
1123
1124
        raise ValueError(
            f"No example model defined for {model_id}; please update this file."
        )
1125

1126

Patrick von Platen's avatar
Patrick von Platen committed
1127
HF_EXAMPLE_MODELS = HfExampleModels(_EXAMPLE_MODELS)
1128
AUTO_EXAMPLE_MODELS = HfExampleModels(_AUTOMATIC_CONVERTED_MODELS)