Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
90e10dee
"vscode:/vscode.git/clone" did not exist on "ac2f3f7fee93cf9cd97c0078e362feab7b6c8299"
Commit
90e10dee
authored
Jun 09, 2025
by
zhuwenwen
Browse files
[tests] update models
parent
d7d4623e
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
188 additions
and
182 deletions
+188
-182
tests/models/registry.py
tests/models/registry.py
+184
-180
tests/models/test_gptq_bitblas.py
tests/models/test_gptq_bitblas.py
+4
-2
No files found.
tests/models/registry.py
View file @
90e10dee
...
...
@@ -4,11 +4,13 @@ from collections.abc import Mapping, Set
from
dataclasses
import
dataclass
,
field
from
typing
import
Any
,
Literal
,
Optional
import
os
import
pytest
from
packaging.version
import
Version
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
@
dataclass
(
frozen
=
True
)
class
_HfExamplesInfo
:
default
:
str
...
...
@@ -107,302 +109,304 @@ class _HfExamplesInfo:
pytest
.
skip
(
msg
)
models_path_prefix
=
os
.
getenv
(
'VLLM_OPTEST_MODELS_PATH'
)
or
os
.
getenv
(
"OPTEST_MODELS_PATH"
)
# yapf: disable
_TEXT_GENERATION_EXAMPLE_MODELS
=
{
# [Decoder-only]
"AquilaModel"
:
_HfExamplesInfo
(
"BAAI/AquilaChat-7B"
,
"AquilaModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"BAAI/AquilaChat-7B"
)
,
trust_remote_code
=
True
),
"AquilaForCausalLM"
:
_HfExamplesInfo
(
"BAAI/AquilaChat2-7B"
,
"AquilaForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"BAAI/AquilaChat2-7B"
)
,
trust_remote_code
=
True
),
"ArcticForCausalLM"
:
_HfExamplesInfo
(
"Snowflake/snowflake-arctic-instruct"
,
"ArcticForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Snowflake/snowflake-arctic-instruct"
)
,
trust_remote_code
=
True
),
"BaiChuanForCausalLM"
:
_HfExamplesInfo
(
"baichuan-inc/Baichuan-7B"
,
"BaiChuanForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"baichuan-inc/Baichuan-7B"
)
,
trust_remote_code
=
True
),
"BaichuanForCausalLM"
:
_HfExamplesInfo
(
"baichuan-inc/Baichuan2-7B-chat"
,
"BaichuanForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"baichuan-inc/Baichuan2-7B-chat"
)
,
trust_remote_code
=
True
),
"BambaForCausalLM"
:
_HfExamplesInfo
(
"ibm-ai-platform/Bamba-9B"
),
"BloomForCausalLM"
:
_HfExamplesInfo
(
"bigscience/bloom-560m"
,
"BambaForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"ibm-ai-platform/Bamba-9B"
)
)
,
"BloomForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"bigscience/bloom-560m"
)
,
{
"1b"
:
"bigscience/bloomz-1b1"
}),
"ChatGLMModel"
:
_HfExamplesInfo
(
"THUDM/chatglm3-6b"
,
"ChatGLMModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"THUDM/chatglm3-6b"
)
,
trust_remote_code
=
True
,
max_transformers_version
=
"4.48"
),
"ChatGLMForConditionalGeneration"
:
_HfExamplesInfo
(
"thu-coai/ShieldLM-6B-chatglm3"
,
# noqa: E501
"ChatGLMForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"thu-coai/ShieldLM-6B-chatglm3"
)
,
# noqa: E501
trust_remote_code
=
True
),
"CohereForCausalLM"
:
_HfExamplesInfo
(
"CohereForAI/c4ai-command-r-v01"
,
"CohereForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"CohereForAI/c4ai-command-r-v01"
)
,
trust_remote_code
=
True
),
"Cohere2ForCausalLM"
:
_HfExamplesInfo
(
"CohereForAI/c4ai-command-r7b-12-2024"
,
# noqa: E501
"Cohere2ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"CohereForAI/c4ai-command-r7b-12-2024"
)
,
# noqa: E501
trust_remote_code
=
True
),
"DbrxForCausalLM"
:
_HfExamplesInfo
(
"databricks/dbrx-instruct"
),
"DeciLMForCausalLM"
:
_HfExamplesInfo
(
"nvidia/Llama-3_3-Nemotron-Super-49B-v1"
,
# noqa: E501
"DbrxForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"databricks/dbrx-instruct"
)
)
,
"DeciLMForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"nvidia/Llama-3_3-Nemotron-Super-49B-v1"
)
,
# noqa: E501
trust_remote_code
=
True
),
"DeepseekForCausalLM"
:
_HfExamplesInfo
(
"deepseek-ai/deepseek-llm-7b-chat"
),
"DeepseekV2ForCausalLM"
:
_HfExamplesInfo
(
"deepseek-ai/DeepSeek-V2-Lite-Chat"
,
# noqa: E501
"DeepseekForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"deepseek-ai/deepseek-llm-7b-chat"
)
)
,
"DeepseekV2ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"deepseek-ai/DeepSeek-V2-Lite-Chat"
)
,
# noqa: E501
trust_remote_code
=
True
),
"DeepseekV3ForCausalLM"
:
_HfExamplesInfo
(
"deepseek-ai/DeepSeek-V3"
,
# noqa: E501
"DeepseekV3ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"deepseek-ai/DeepSeek-V3"
)
,
# noqa: E501
trust_remote_code
=
True
),
"ExaoneForCausalLM"
:
_HfExamplesInfo
(
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
),
# noqa: E501
"Fairseq2LlamaForCausalLM"
:
_HfExamplesInfo
(
"mgleize/fairseq2-dummy-Llama-3.2-1B"
),
# noqa: E501
"FalconForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/falcon-7b"
),
"GemmaForCausalLM"
:
_HfExamplesInfo
(
"google/gemma-1.1-2b-it"
),
"Gemma2ForCausalLM"
:
_HfExamplesInfo
(
"google/gemma-2-9b"
),
"Gemma3ForCausalLM"
:
_HfExamplesInfo
(
"google/gemma-3-1b-it"
),
"GlmForCausalLM"
:
_HfExamplesInfo
(
"THUDM/glm-4-9b-chat-hf"
),
"ExaoneForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
)
)
,
# noqa: E501
"Fairseq2LlamaForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"mgleize/fairseq2-dummy-Llama-3.2-1B"
)
)
,
# noqa: E501
"FalconForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"tiiuae/falcon-7b"
)
)
,
"GemmaForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"google/gemma-1.1-2b-it"
)
)
,
"Gemma2ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"google/gemma-2-9b"
)
)
,
"Gemma3ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"google/gemma-3-1b-it"
)
)
,
"GlmForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"THUDM/glm-4-9b-chat-hf"
)
)
,
"Glm4ForCausalLM"
:
_HfExamplesInfo
(
"THUDM/GLM-4-32B-0414"
,
is_available_online
=
False
,
min_transformers_version
=
"4.52.dev0"
),
"GPT2LMHeadModel"
:
_HfExamplesInfo
(
"openai-community/gpt2"
,
"GPT2LMHeadModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"openai-community/gpt2"
)
,
{
"alias"
:
"gpt2"
}),
"GPTBigCodeForCausalLM"
:
_HfExamplesInfo
(
"bigcode/starcoder"
,
{
"tiny"
:
"bigcode/tiny_starcoder_py"
}),
# noqa: E501
"GPTJForCausalLM"
:
_HfExamplesInfo
(
"Milos/slovak-gpt-j-405M"
,
{
"6b"
:
"EleutherAI/gpt-j-6b"
}),
"GPTNeoXForCausalLM"
:
_HfExamplesInfo
(
"EleutherAI/pythia-70m"
,
{
"1b"
:
"EleutherAI/pythia-1.4b"
}),
"GraniteForCausalLM"
:
_HfExamplesInfo
(
"ibm/PowerLM-3b"
),
"GraniteMoeForCausalLM"
:
_HfExamplesInfo
(
"ibm/PowerMoE-3b"
),
"GraniteMoeSharedForCausalLM"
:
_HfExamplesInfo
(
"ibm-research/moe-7b-1b-active-shared-experts"
),
# noqa: E501
"Grok1ModelForCausalLM"
:
_HfExamplesInfo
(
"hpcai-tech/grok-1"
,
trust_remote_code
=
True
),
"InternLMForCausalLM"
:
_HfExamplesInfo
(
"internlm/internlm-chat-7b"
,
trust_remote_code
=
True
),
"InternLM2ForCausalLM"
:
_HfExamplesInfo
(
"internlm/internlm2-chat-7b"
,
trust_remote_code
=
True
),
"InternLM2VEForCausalLM"
:
_HfExamplesInfo
(
"OpenGVLab/Mono-InternVL-2B"
,
trust_remote_code
=
True
),
"InternLM3ForCausalLM"
:
_HfExamplesInfo
(
"internlm/internlm3-8b-instruct"
,
trust_remote_code
=
True
),
"JAISLMHeadModel"
:
_HfExamplesInfo
(
"inceptionai/jais-13b-chat"
),
"JambaForCausalLM"
:
_HfExamplesInfo
(
"ai21labs/AI21-Jamba-1.5-Mini"
,
extras
=
{
"tiny"
:
"ai21labs/Jamba-tiny-dev"
}),
# noqa: E501
"LlamaForCausalLM"
:
_HfExamplesInfo
(
"meta-llama/Llama-3.2-1B-Instruct"
),
"LLaMAForCausalLM"
:
_HfExamplesInfo
(
"decapoda-research/llama-7b-hf"
,
"GPTBigCodeForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"bigcode/starcoder"
)
,
{
"tiny"
:
os
.
path
.
join
(
models_path_prefix
,
"bigcode/tiny_starcoder_py"
)
}),
# noqa: E501
"GPTJForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Milos/slovak-gpt-j-405M"
)
,
{
"6b"
:
os
.
path
.
join
(
models_path_prefix
,
"EleutherAI/gpt-j-6b"
)
}),
"GPTNeoXForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"EleutherAI/pythia-70m"
)
,
{
"1b"
:
os
.
path
.
join
(
models_path_prefix
,
"EleutherAI/pythia-1.4b"
)
}),
"GraniteForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"ibm/PowerLM-3b"
)
)
,
"GraniteMoeForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"ibm/PowerMoE-3b"
)
)
,
"GraniteMoeSharedForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"ibm-research/moe-7b-1b-active-shared-experts"
)
)
,
# noqa: E501
"Grok1ModelForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"hpcai-tech/grok-1"
)
,
trust_remote_code
=
True
),
"InternLMForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"internlm/internlm-chat-7b"
)
,
trust_remote_code
=
True
),
"InternLM2ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"internlm/internlm2-chat-7b"
)
,
trust_remote_code
=
True
),
"InternLM2VEForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"OpenGVLab/Mono-InternVL-2B"
)
,
trust_remote_code
=
True
),
"InternLM3ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"internlm/internlm3-8b-instruct"
)
,
trust_remote_code
=
True
),
"JAISLMHeadModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"inceptionai/jais-13b-chat"
)
)
,
"JambaForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"ai21labs/AI21-Jamba-1.5-Mini"
)
,
extras
=
{
"tiny"
:
os
.
path
.
join
(
models_path_prefix
,
"ai21labs/Jamba-tiny-dev"
)
}),
# noqa: E501
"LlamaForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-3.2-1B-Instruct"
)
)
,
"LLaMAForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"decapoda-research/llama-7b-hf"
)
,
is_available_online
=
False
),
"MambaForCausalLM"
:
_HfExamplesInfo
(
"state-spaces/mamba-130m-hf"
),
"Mamba2ForCausalLM"
:
_HfExamplesInfo
(
"mistralai/Mamba-Codestral-7B-v0.1"
,
"MambaForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"state-spaces/mamba-130m-hf"
)
)
,
"Mamba2ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Mamba-Codestral-7B-v0.1"
)
,
is_available_online
=
False
),
"FalconMambaForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/falcon-mamba-7b-instruct"
),
# noqa: E501
"MiniCPMForCausalLM"
:
_HfExamplesInfo
(
"openbmb/MiniCPM-2B-sft-bf16"
,
"FalconMambaForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"tiiuae/falcon-mamba-7b-instruct"
)
)
,
# noqa: E501
"MiniCPMForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"openbmb/MiniCPM-2B-sft-bf16"
)
,
trust_remote_code
=
True
),
"MiniCPM3ForCausalLM"
:
_HfExamplesInfo
(
"openbmb/MiniCPM3-4B"
,
"MiniCPM3ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"openbmb/MiniCPM3-4B"
)
,
trust_remote_code
=
True
),
"MiniMaxText01ForCausalLM"
:
_HfExamplesInfo
(
"MiniMaxAI/MiniMax-Text-01"
,
"MiniMaxText01ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"MiniMaxAI/MiniMax-Text-01"
)
,
trust_remote_code
=
True
),
"MistralForCausalLM"
:
_HfExamplesInfo
(
"mistralai/Mistral-7B-Instruct-v0.1"
),
"MixtralForCausalLM"
:
_HfExamplesInfo
(
"mistralai/Mixtral-8x7B-Instruct-v0.1"
,
# noqa: E501
{
"falcon3"
:
"ehristoforu/Falcon3-MoE-2x7B-Insruct"
}),
# noqa: E501
"QuantMixtralForCausalLM"
:
_HfExamplesInfo
(
"mistral-community/Mixtral-8x22B-v0.1-AWQ"
),
# noqa: E501
"MptForCausalLM"
:
_HfExamplesInfo
(
"mpt"
,
is_available_online
=
False
),
"MPTForCausalLM"
:
_HfExamplesInfo
(
"mosaicml/mpt-7b"
),
"NemotronForCausalLM"
:
_HfExamplesInfo
(
"nvidia/Minitron-8B-Base"
),
"OlmoForCausalLM"
:
_HfExamplesInfo
(
"allenai/OLMo-1B-hf"
),
"Olmo2ForCausalLM"
:
_HfExamplesInfo
(
"shanearora/OLMo-7B-1124-hf"
),
"OlmoeForCausalLM"
:
_HfExamplesInfo
(
"allenai/OLMoE-1B-7B-0924-Instruct"
),
"OPTForCausalLM"
:
_HfExamplesInfo
(
"facebook/opt-125m"
,
{
"1b"
:
"facebook/opt-iml-max-1.3b"
}),
"OrionForCausalLM"
:
_HfExamplesInfo
(
"OrionStarAI/Orion-14B-Chat"
,
"MistralForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Mistral-7B-Instruct-v0.1"
)
)
,
"MixtralForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Mixtral-8x7B-Instruct-v0.1"
)
,
# noqa: E501
{
"falcon3"
:
os
.
path
.
join
(
models_path_prefix
,
"ehristoforu/Falcon3-MoE-2x7B-Insruct"
)
}),
# noqa: E501
"QuantMixtralForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"mistral-community/Mixtral-8x22B-v0.1-AWQ"
)
)
,
# noqa: E501
"MptForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"mpt"
)
,
is_available_online
=
False
),
"MPTForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"mosaicml/mpt-7b"
)
)
,
"NemotronForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"nvidia/Minitron-8B-Base"
)
)
,
"OlmoForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"allenai/OLMo-1B-hf"
)
)
,
"Olmo2ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"shanearora/OLMo-7B-1124-hf"
)
)
,
"OlmoeForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"allenai/OLMoE-1B-7B-0924-Instruct"
)
)
,
"OPTForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"facebook/opt-125m"
)
,
{
"1b"
:
os
.
path
.
join
(
models_path_prefix
,
"facebook/opt-iml-max-1.3b"
)
}),
"OrionForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"OrionStarAI/Orion-14B-Chat"
)
,
trust_remote_code
=
True
),
"PersimmonForCausalLM"
:
_HfExamplesInfo
(
"adept/persimmon-8b-chat"
),
"PhiForCausalLM"
:
_HfExamplesInfo
(
"microsoft/phi-2"
),
"Phi3ForCausalLM"
:
_HfExamplesInfo
(
"microsoft/Phi-3-mini-4k-instruct"
),
"Phi3SmallForCausalLM"
:
_HfExamplesInfo
(
"microsoft/Phi-3-small-8k-instruct"
,
"PersimmonForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"adept/persimmon-8b-chat"
)
)
,
"PhiForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"microsoft/phi-2"
)
)
,
"Phi3ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"microsoft/Phi-3-mini-4k-instruct"
)
)
,
"Phi3SmallForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"microsoft/Phi-3-small-8k-instruct"
)
,
trust_remote_code
=
True
),
"PhiMoEForCausalLM"
:
_HfExamplesInfo
(
"microsoft/Phi-3.5-MoE-instruct"
,
"PhiMoEForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"microsoft/Phi-3.5-MoE-instruct"
)
,
trust_remote_code
=
True
),
"Plamo2ForCausalLM"
:
_HfExamplesInfo
(
"pfnet/plamo-2-1b"
,
"Plamo2ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"pfnet/plamo-2-1b"
)
,
trust_remote_code
=
True
),
"QWenLMHeadModel"
:
_HfExamplesInfo
(
"Qwen/Qwen-7B-Chat"
,
"QWenLMHeadModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen-7B-Chat"
)
,
trust_remote_code
=
True
),
"Qwen2ForCausalLM"
:
_HfExamplesInfo
(
"Qwen/Qwen2-0.5B-Instruct"
,
extras
=
{
"2.5"
:
"Qwen/Qwen2.5-0.5B-Instruct"
}),
# noqa: E501
"Qwen2MoeForCausalLM"
:
_HfExamplesInfo
(
"Qwen/Qwen1.5-MoE-A2.7B-Chat"
),
"Qwen2ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-0.5B-Instruct"
)
,
extras
=
{
"2.5"
:
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2.5-0.5B-Instruct"
)
}),
# noqa: E501
"Qwen2MoeForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen1.5-MoE-A2.7B-Chat"
)
)
,
"Qwen3ForCausalLM"
:
_HfExamplesInfo
(
"Qwen/Qwen3-8B"
,
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen3-8B"
)
,
is_available_online
=
False
,
min_transformers_version
=
"4.51"
),
"Qwen3MoeForCausalLM"
:
_HfExamplesInfo
(
"Qwen/Qwen3-MoE-15B-A2B"
,
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen3-MoE-15B-A2B"
)
,
is_available_online
=
False
,
min_transformers_version
=
"4.51"
),
"RWForCausalLM"
:
_HfExamplesInfo
(
"tiiuae/falcon-40b"
,
"RWForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"tiiuae/falcon-40b"
)
,
is_available_online
=
False
),
"StableLMEpochForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-zephyr-3b"
,
# noqa: E501
"StableLMEpochForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"stabilityai/stablelm-zephyr-3b"
)
,
# noqa: E501
is_available_online
=
False
),
"StableLmForCausalLM"
:
_HfExamplesInfo
(
"stabilityai/stablelm-3b-4e1t"
),
"Starcoder2ForCausalLM"
:
_HfExamplesInfo
(
"bigcode/starcoder2-3b"
),
"SolarForCausalLM"
:
_HfExamplesInfo
(
"upstage/solar-pro-preview-instruct"
),
"TeleChat2ForCausalLM"
:
_HfExamplesInfo
(
"Tele-AI/TeleChat2-3B"
,
"StableLmForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"stabilityai/stablelm-3b-4e1t"
)
)
,
"Starcoder2ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"bigcode/starcoder2-3b"
)
)
,
"SolarForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"upstage/solar-pro-preview-instruct"
)
)
,
"TeleChat2ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Tele-AI/TeleChat2-3B"
)
,
trust_remote_code
=
True
),
"TeleFLMForCausalLM"
:
_HfExamplesInfo
(
"CofeAI/FLM-2-52B-Instruct-2407"
,
"TeleFLMForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"CofeAI/FLM-2-52B-Instruct-2407"
)
,
trust_remote_code
=
True
),
"XverseForCausalLM"
:
_HfExamplesInfo
(
"xverse/XVERSE-7B-Chat"
,
"XverseForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"xverse/XVERSE-7B-Chat"
)
,
is_available_online
=
False
,
trust_remote_code
=
True
),
"Zamba2ForCausalLM"
:
_HfExamplesInfo
(
"Zyphra/Zamba2-7B-instruct"
),
"Zamba2ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Zyphra/Zamba2-7B-instruct"
)
)
,
# [Encoder-decoder]
"BartModel"
:
_HfExamplesInfo
(
"facebook/bart-base"
),
"BartForConditionalGeneration"
:
_HfExamplesInfo
(
"facebook/bart-large-cnn"
),
"BartModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"facebook/bart-base"
)
)
,
"BartForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"facebook/bart-large-cnn"
)
)
,
}
_EMBEDDING_EXAMPLE_MODELS
=
{
# [Text-only]
"BertModel"
:
_HfExamplesInfo
(
"BAAI/bge-base-en-v1.5"
),
"Gemma2Model"
:
_HfExamplesInfo
(
"BAAI/bge-multilingual-gemma2"
),
"GritLM"
:
_HfExamplesInfo
(
"parasail-ai/GritLM-7B-vllm"
),
"GteModel"
:
_HfExamplesInfo
(
"Snowflake/snowflake-arctic-embed-m-v2.0"
,
trust_remote_code
=
True
),
"InternLM2ForRewardModel"
:
_HfExamplesInfo
(
"internlm/internlm2-1_8b-reward"
,
trust_remote_code
=
True
),
"JambaForSequenceClassification"
:
_HfExamplesInfo
(
"ai21labs/Jamba-tiny-reward-dev"
),
# noqa: E501
"LlamaModel"
:
_HfExamplesInfo
(
"llama"
,
is_available_online
=
False
),
"MistralModel"
:
_HfExamplesInfo
(
"intfloat/e5-mistral-7b-instruct"
),
"NomicBertModel"
:
_HfExamplesInfo
(
"Snowflake/snowflake-arctic-embed-m-long"
,
# noqa: E501
trust_remote_code
=
True
),
"Qwen2Model"
:
_HfExamplesInfo
(
"ssmits/Qwen2-7B-Instruct-embed-base"
),
"Qwen2ForRewardModel"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Math-RM-72B"
),
"Qwen2ForProcessRewardModel"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Math-PRM-7B"
),
"Qwen2ForSequenceClassification"
:
_HfExamplesInfo
(
"jason9693/Qwen2.5-1.5B-apeach"
),
# noqa: E501
"RobertaModel"
:
_HfExamplesInfo
(
"sentence-transformers/stsb-roberta-base-v2"
),
# noqa: E501
"RobertaForMaskedLM"
:
_HfExamplesInfo
(
"sentence-transformers/all-roberta-large-v1"
),
# noqa: E501
"XLMRobertaModel"
:
_HfExamplesInfo
(
"intfloat/multilingual-e5-small"
),
"BertModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"BAAI/bge-base-en-v1.5"
)
)
,
"Gemma2Model"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"BAAI/bge-multilingual-gemma2"
)
)
,
"GritLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"parasail-ai/GritLM-7B-vllm"
)
)
,
"GteModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Snowflake/snowflake-arctic-embed-m-v2.0"
)
,
trust_remote_code
=
True
),
"InternLM2ForRewardModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"internlm/internlm2-1_8b-reward"
)
,
trust_remote_code
=
True
),
"JambaForSequenceClassification"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"ai21labs/Jamba-tiny-reward-dev"
)
)
,
# noqa: E501
"LlamaModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"llama"
)
,
is_available_online
=
False
),
"MistralModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"intfloat/e5-mistral-7b-instruct"
)
)
,
"NomicBertModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Snowflake/snowflake-arctic-embed-m-long"
)
,
# noqa: E501
trust_remote_code
=
True
),
"Qwen2Model"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"ssmits/Qwen2-7B-Instruct-embed-base"
)
)
,
"Qwen2ForRewardModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2.5-Math-RM-72B"
)
)
,
"Qwen2ForProcessRewardModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2.5-Math-PRM-7B"
)
)
,
"Qwen2ForSequenceClassification"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"jason9693/Qwen2.5-1.5B-apeach"
)
)
,
# noqa: E501
"RobertaModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"sentence-transformers/stsb-roberta-base-v2"
)
)
,
# noqa: E501
"RobertaForMaskedLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"sentence-transformers/all-roberta-large-v1"
)
)
,
# noqa: E501
"XLMRobertaModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"intfloat/multilingual-e5-small"
)
)
,
# [Multimodal]
"LlavaNextForConditionalGeneration"
:
_HfExamplesInfo
(
"royokong/e5-v"
),
"Phi3VForCausalLM"
:
_HfExamplesInfo
(
"TIGER-Lab/VLM2Vec-Full"
,
"LlavaNextForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"royokong/e5-v"
)
)
,
"Phi3VForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"TIGER-Lab/VLM2Vec-Full"
)
,
trust_remote_code
=
True
),
"Qwen2VLForConditionalGeneration"
:
_HfExamplesInfo
(
"MrLight/dse-qwen2-2b-mrl-v1"
),
# noqa: E501
"Qwen2VLForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"MrLight/dse-qwen2-2b-mrl-v1"
)
)
,
# noqa: E501
# The model on Huggingface is currently being updated,
# hence I temporarily mark it as not available online
"PrithviGeoSpatialMAE"
:
_HfExamplesInfo
(
"ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"
,
# noqa: E501
"PrithviGeoSpatialMAE"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"
)
,
# noqa: E501
is_available_online
=
False
),
}
_CROSS_ENCODER_EXAMPLE_MODELS
=
{
# [Text-only]
"BertForSequenceClassification"
:
_HfExamplesInfo
(
"cross-encoder/ms-marco-MiniLM-L-6-v2"
),
# noqa: E501
"RobertaForSequenceClassification"
:
_HfExamplesInfo
(
"cross-encoder/quora-roberta-base"
),
# noqa: E501
"XLMRobertaForSequenceClassification"
:
_HfExamplesInfo
(
"BAAI/bge-reranker-v2-m3"
),
# noqa: E501
"ModernBertForSequenceClassification"
:
_HfExamplesInfo
(
"Alibaba-NLP/gte-reranker-modernbert-base"
),
# noqa: E501
"BertForSequenceClassification"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"cross-encoder/ms-marco-MiniLM-L-6-v2"
)
)
,
# noqa: E501
"RobertaForSequenceClassification"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"cross-encoder/quora-roberta-base"
)
)
,
# noqa: E501
"XLMRobertaForSequenceClassification"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"BAAI/bge-reranker-v2-m3"
)
)
,
# noqa: E501
"ModernBertForSequenceClassification"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Alibaba-NLP/gte-reranker-modernbert-base"
)
)
,
# noqa: E501
}
_MULTIMODAL_EXAMPLE_MODELS
=
{
# [Decoder-only]
"AriaForConditionalGeneration"
:
_HfExamplesInfo
(
"rhymes-ai/Aria"
),
"AyaVisionForConditionalGeneration"
:
_HfExamplesInfo
(
"CohereForAI/aya-vision-8b"
),
# noqa: E501
"Blip2ForConditionalGeneration"
:
_HfExamplesInfo
(
"Salesforce/blip2-opt-2.7b"
,
# noqa: E501
extras
=
{
"6b"
:
"Salesforce/blip2-opt-6.7b"
}),
# noqa: E501
"ChameleonForConditionalGeneration"
:
_HfExamplesInfo
(
"facebook/chameleon-7b"
),
# noqa: E501
"DeepseekVLV2ForCausalLM"
:
_HfExamplesInfo
(
"deepseek-ai/deepseek-vl2-tiny"
,
# noqa: E501
extras
=
{
"fork"
:
"Isotr0py/deepseek-vl2-tiny"
},
# noqa: E501
"AriaForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"rhymes-ai/Aria"
)
)
,
"AyaVisionForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"CohereForAI/aya-vision-8b"
)
)
,
# noqa: E501
"Blip2ForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Salesforce/blip2-opt-2.7b"
)
,
# noqa: E501
extras
=
{
"6b"
:
os
.
path
.
join
(
models_path_prefix
,
"Salesforce/blip2-opt-6.7b"
)
}),
# noqa: E501
"ChameleonForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"facebook/chameleon-7b"
)
)
,
# noqa: E501
"DeepseekVLV2ForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"deepseek-ai/deepseek-vl2-tiny"
)
,
# noqa: E501
extras
=
{
"fork"
:
os
.
path
.
join
(
models_path_prefix
,
"Isotr0py/deepseek-vl2-tiny"
)
},
# noqa: E501
max_transformers_version
=
"4.48"
,
# noqa: E501
transformers_version_reason
=
"HF model is not compatible."
,
# noqa: E501
hf_overrides
=
{
"architectures"
:
[
"DeepseekVLV2ForCausalLM"
]}),
# noqa: E501
"FuyuForCausalLM"
:
_HfExamplesInfo
(
"adept/fuyu-8b"
),
"Gemma3ForConditionalGeneration"
:
_HfExamplesInfo
(
"google/gemma-3-4b-it"
),
"GraniteSpeechForConditionalGeneration"
:
_HfExamplesInfo
(
"ibm-granite/granite-speech-3.3-8b"
,
# noqa: E501
"FuyuForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"adept/fuyu-8b"
)
)
,
"Gemma3ForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"google/gemma-3-4b-it"
)
)
,
"GraniteSpeechForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"ibm-granite/granite-speech-3.3-8b"
)
,
# noqa: E501
min_transformers_version
=
"4.52.0"
),
# noqa: E501
"GLM4VForCausalLM"
:
_HfExamplesInfo
(
"THUDM/glm-4v-9b"
,
"GLM4VForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"THUDM/glm-4v-9b"
)
,
trust_remote_code
=
True
,
hf_overrides
=
{
"architectures"
:
[
"GLM4VForCausalLM"
]}),
# noqa: E501
"H2OVLChatModel"
:
_HfExamplesInfo
(
"h2oai/h2ovl-mississippi-800m"
,
extras
=
{
"2b"
:
"h2oai/h2ovl-mississippi-2b"
},
# noqa: E501
"H2OVLChatModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"h2oai/h2ovl-mississippi-800m"
)
,
extras
=
{
"2b"
:
os
.
path
.
join
(
models_path_prefix
,
"h2oai/h2ovl-mississippi-2b"
)
},
# noqa: E501
max_transformers_version
=
"4.48"
,
# noqa: E501
transformers_version_reason
=
"HF model is not compatible."
),
# noqa: E501
"InternVLChatModel"
:
_HfExamplesInfo
(
"OpenGVLab/InternVL2-1B"
,
extras
=
{
"2B"
:
"OpenGVLab/InternVL2-
2
B"
},
# noqa: E501
"InternVLChatModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"OpenGVLab/InternVL2-1B"
)
,
extras
=
{
"2B"
:
os
.
path
.
join
(
models_path_prefix
,
"OpenGVLab/InternVL2-
1
B"
)
},
# noqa: E501
trust_remote_code
=
True
),
"Idefics3ForConditionalGeneration"
:
_HfExamplesInfo
(
"HuggingFaceM4/Idefics3-8B-Llama3"
,
# noqa: E501
{
"tiny"
:
"HuggingFaceTB/SmolVLM-256M-Instruct"
}),
# noqa: E501
"KimiVLForConditionalGeneration"
:
_HfExamplesInfo
(
"moonshotai/Kimi-VL-A3B-Instruct"
,
# noqa: E501
extras
=
{
"thinking"
:
"moonshotai/Kimi-VL-A3B-Thinking"
},
# noqa: E501
"Idefics3ForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"HuggingFaceM4/Idefics3-8B-Llama3"
)
,
# noqa: E501
{
"tiny"
:
os
.
path
.
join
(
models_path_prefix
,
"HuggingFaceTB/SmolVLM-256M-Instruct"
)
}),
# noqa: E501
"KimiVLForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"moonshotai/Kimi-VL-A3B-Instruct"
)
,
# noqa: E501
extras
=
{
"thinking"
:
os
.
path
.
join
(
models_path_prefix
,
"moonshotai/Kimi-VL-A3B-Thinking"
)
},
# noqa: E501
trust_remote_code
=
True
),
"Llama4ForConditionalGeneration"
:
_HfExamplesInfo
(
"meta-llama/Llama-4-Scout-17B-16E-Instruct"
,
# noqa: E501
"Llama4ForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-4-Scout-17B-16E-Instruct"
)
,
# noqa: E501
min_transformers_version
=
"4.51"
),
"LlavaForConditionalGeneration"
:
_HfExamplesInfo
(
"llava-hf/llava-1.5-7b-hf"
,
extras
=
{
"mistral"
:
"mistral-community/pixtral-12b"
,
# noqa: E501
"mistral-fp8"
:
"nm-testing/pixtral-12b-FP8-dynamic"
}),
# noqa: E501
"LlavaNextForConditionalGeneration"
:
_HfExamplesInfo
(
"llava-hf/llava-v1.6-mistral-7b-hf"
),
# noqa: E501
"LlavaNextVideoForConditionalGeneration"
:
_HfExamplesInfo
(
"llava-hf/LLaVA-NeXT-Video-7B-hf"
),
# noqa: E501
"LlavaOnevisionForConditionalGeneration"
:
_HfExamplesInfo
(
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
),
# noqa: E501
"MantisForConditionalGeneration"
:
_HfExamplesInfo
(
"TIGER-Lab/Mantis-8B-siglip-llama3"
,
# noqa: E501
"LlavaForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"llava-hf/llava-1.5-7b-hf"
)
,
extras
=
{
"mistral"
:
os
.
path
.
join
(
models_path_prefix
,
"mistral-community/pixtral-12b"
)
,
# noqa: E501
"mistral-fp8"
:
os
.
path
.
join
(
models_path_prefix
,
"nm-testing/pixtral-12b-FP8-dynamic"
)
}),
# noqa: E501
"LlavaNextForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"llava-hf/llava-v1.6-mistral-7b-hf"
)
)
,
# noqa: E501
"LlavaNextVideoForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"llava-hf/LLaVA-NeXT-Video-7B-hf"
)
)
,
# noqa: E501
"LlavaOnevisionForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
)
)
,
# noqa: E501
"MantisForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"TIGER-Lab/Mantis-8B-siglip-llama3"
)
,
# noqa: E501
max_transformers_version
=
"4.48"
,
# noqa: E501
transformers_version_reason
=
"HF model is not compatible."
,
# noqa: E501
hf_overrides
=
{
"architectures"
:
[
"MantisForConditionalGeneration"
]}),
# noqa: E501
"MiniCPMO"
:
_HfExamplesInfo
(
"openbmb/MiniCPM-o-2_6"
,
"MiniCPMO"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"openbmb/MiniCPM-o-2_6"
)
,
max_transformers_version
=
"4.48"
,
transformers_version_reason
=
"Use of deprecated imports which have been removed."
,
# noqa: E501
trust_remote_code
=
True
),
"MiniCPMV"
:
_HfExamplesInfo
(
"openbmb/MiniCPM-Llama3-V-2_5"
,
extras
=
{
"2.6"
:
"openbmb/MiniCPM-V-2_6"
},
# noqa: E501
"MiniCPMV"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"openbmb/MiniCPM-Llama3-V-2_5"
)
,
extras
=
{
"2.6"
:
os
.
path
.
join
(
models_path_prefix
,
"openbmb/MiniCPM-V-2_6"
)
},
# noqa: E501
trust_remote_code
=
True
),
"Mistral3ForConditionalGeneration"
:
_HfExamplesInfo
(
"mistralai/Mistral-Small-3.1-24B-Instruct-2503"
,
# noqa: E501
extras
=
{
"fp8"
:
"nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic"
}),
# noqa: E501
"MolmoForCausalLM"
:
_HfExamplesInfo
(
"allenai/Molmo-7B-D-0924"
,
"Mistral3ForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Mistral-Small-3.1-24B-Instruct-2503"
)
,
# noqa: E501
extras
=
{
"fp8"
:
os
.
path
.
join
(
models_path_prefix
,
"nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic"
)
}),
# noqa: E501
"MolmoForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"allenai/Molmo-7B-D-0924"
)
,
max_transformers_version
=
"4.48"
,
transformers_version_reason
=
"Incorrectly-detected `tensorflow` import."
,
# noqa: E501
extras
=
{
"olmo"
:
"allenai/Molmo-7B-O-0924"
},
# noqa: E501
extras
=
{
"olmo"
:
os
.
path
.
join
(
models_path_prefix
,
"allenai/Molmo-7B-O-0924"
)
},
# noqa: E501
trust_remote_code
=
True
),
"NVLM_D"
:
_HfExamplesInfo
(
"nvidia/NVLM-D-72B"
,
"NVLM_D"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"nvidia/NVLM-D-72B"
)
,
trust_remote_code
=
True
),
"PaliGemmaForConditionalGeneration"
:
_HfExamplesInfo
(
"google/paligemma-3b-mix-224"
,
# noqa: E501
extras
=
{
"v2"
:
"google/paligemma2-3b-ft-docci-448"
}),
# noqa: E501
"Phi3VForCausalLM"
:
_HfExamplesInfo
(
"microsoft/Phi-3-vision-128k-instruct"
,
"PaliGemmaForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"google/paligemma-3b-mix-224"
)
,
# noqa: E501
extras
=
{
"v2"
:
os
.
path
.
join
(
models_path_prefix
,
"google/paligemma2-3b-ft-docci-448"
)
}),
# noqa: E501
"Phi3VForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"microsoft/Phi-3-vision-128k-instruct"
)
,
trust_remote_code
=
True
,
max_transformers_version
=
"4.48"
,
transformers_version_reason
=
"Use of deprecated imports which have been removed."
,
# noqa: E501
extras
=
{
"phi3.5"
:
"microsoft/Phi-3.5-vision-instruct"
}),
# noqa: E501
"Phi4MMForCausalLM"
:
_HfExamplesInfo
(
"microsoft/Phi-4-multimodal-instruct"
,
extras
=
{
"phi3.5"
:
os
.
path
.
join
(
models_path_prefix
,
"microsoft/Phi-3.5-vision-instruct"
),
}),
# noqa: E501
"Phi4MMForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"microsoft/Phi-4-multimodal-instruct"
)
,
trust_remote_code
=
True
),
"PixtralForConditionalGeneration"
:
_HfExamplesInfo
(
"mistralai/Pixtral-12B-2409"
,
# noqa: E501
"PixtralForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"mistralai/Pixtral-12B-2409"
)
,
# noqa: E501
tokenizer_mode
=
"mistral"
),
"QwenVLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen-VL"
,
extras
=
{
"chat"
:
"Qwen/Qwen-VL-Chat"
},
# noqa: E501
"QwenVLForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen-VL"
)
,
extras
=
{
"chat"
:
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen-VL-Chat"
)
},
# noqa: E501
trust_remote_code
=
True
,
hf_overrides
=
{
"architectures"
:
[
"QwenVLForConditionalGeneration"
]}),
# noqa: E501
"Qwen2AudioForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2-Audio-7B-Instruct"
),
# noqa: E501
"Qwen2VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2-VL-2B-Instruct"
),
# noqa: E501
"Qwen2_5_VLForConditionalGeneration"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-VL-3B-Instruct"
),
# noqa: E501
"Qwen2_5OmniModel"
:
_HfExamplesInfo
(
"Qwen/Qwen2.5-Omni-7B"
,
# noqa: E501
"Qwen2AudioForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-Audio-7B-Instruct"
)
)
,
# noqa: E501
"Qwen2VLForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-VL-2B-Instruct"
)
)
,
# noqa: E501
"Qwen2_5_VLForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2.5-VL-3B-Instruct"
)
)
,
# noqa: E501
"Qwen2_5OmniModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2.5-Omni-7B"
)
,
# noqa: E501
min_transformers_version
=
"4.52"
),
# noqa: E501
"SkyworkR1VChatModel"
:
_HfExamplesInfo
(
"Skywork/Skywork-R1V-38B"
),
"SmolVLMForConditionalGeneration"
:
_HfExamplesInfo
(
"HuggingFaceTB/SmolVLM2-2.2B-Instruct"
),
# noqa: E501
"UltravoxModel"
:
_HfExamplesInfo
(
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
,
# noqa: E501
"SkyworkR1VChatModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"Skywork/Skywork-R1V-38B"
)
)
,
"SmolVLMForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"HuggingFaceTB/SmolVLM2-2.2B-Instruct"
)
)
,
# noqa: E501
"UltravoxModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"fixie-ai/ultravox-v0_5-llama-3_2-1b"
)
,
# noqa: E501
trust_remote_code
=
True
),
# [Encoder-decoder]
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
# Therefore, we borrow the BartTokenizer from the original Bart model
"Florence2ForConditionalGeneration"
:
_HfExamplesInfo
(
"microsoft/Florence-2-base"
,
# noqa: E501
tokenizer
=
"Isotr0py/Florence-2-tokenizer"
,
"Florence2ForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"microsoft/Florence-2-base"
)
,
# noqa: E501
tokenizer
=
os
.
path
.
join
(
models_path_prefix
,
"Isotr0py/Florence-2-tokenizer"
)
,
trust_remote_code
=
True
),
# noqa: E501
"MllamaForConditionalGeneration"
:
_HfExamplesInfo
(
"meta-llama/Llama-3.2-11B-Vision-Instruct"
),
# noqa: E501
"Llama4ForConditionalGeneration"
:
_HfExamplesInfo
(
"meta-llama/Llama-4-Scout-17B-16E-Instruct"
),
# noqa: E501
"WhisperForConditionalGeneration"
:
_HfExamplesInfo
(
"openai/whisper-large-v3"
),
# noqa: E501
"MllamaForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-3.2-11B-Vision-Instruct"
)
)
,
# noqa: E501
"Llama4ForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-4-Scout-17B-16E-Instruct"
)
)
,
# noqa: E501
"WhisperForConditionalGeneration"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"openai/whisper-large-v3"
)
)
,
# noqa: E501
}
_SPECULATIVE_DECODING_EXAMPLE_MODELS
=
{
"EAGLEModel"
:
_HfExamplesInfo
(
"JackFram/llama-68m"
,
speculative_model
=
"abhigoyal/vllm-eagle-llama-68m-random"
),
# noqa: E501
"MedusaModel"
:
_HfExamplesInfo
(
"JackFram/llama-68m"
,
speculative_model
=
"abhigoyal/vllm-medusa-llama-68m-random"
),
# noqa: E501
"MLPSpeculatorPreTrainedModel"
:
_HfExamplesInfo
(
"JackFram/llama-160m"
,
speculative_model
=
"ibm-ai-platform/llama-160m-accelerator"
),
# noqa: E501
"DeepSeekMTPModel"
:
_HfExamplesInfo
(
"luccafong/deepseek_mtp_main_random"
,
speculative_model
=
"luccafong/deepseek_mtp_draft_random"
,
# noqa: E501
trust_remote_code
=
True
),
"EagleLlamaForCausalLM"
:
_HfExamplesInfo
(
"yuhuili/EAGLE-LLaMA3-Instruct-8B"
,
"EAGLEModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"JackFram/llama-68m"
)
,
speculative_model
=
os
.
path
.
join
(
models_path_prefix
,
"abhigoyal/vllm-eagle-llama-68m-random"
)
)
,
# noqa: E501
"MedusaModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"JackFram/llama-68m"
)
,
speculative_model
=
os
.
path
.
join
(
models_path_prefix
,
"abhigoyal/vllm-medusa-llama-68m-random"
)
)
,
# noqa: E501
"MLPSpeculatorPreTrainedModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"JackFram/llama-160m"
)
,
speculative_model
=
os
.
path
.
join
(
models_path_prefix
,
"ibm-ai-platform/llama-160m-accelerator"
)
)
,
# noqa: E501
"DeepSeekMTPModel"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"luccafong/deepseek_mtp_main_random"
)
,
speculative_model
=
os
.
path
.
join
(
models_path_prefix
,
"luccafong/deepseek_mtp_draft_random"
)
,
# noqa: E501
trust_remote_code
=
True
),
"EagleLlamaForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"yuhuili/EAGLE-LLaMA3-Instruct-8B"
)
,
trust_remote_code
=
True
,
speculative_model
=
"yuhuili/EAGLE-LLaMA3-Instruct-8B"
,
tokenizer
=
"meta-llama/Meta-Llama-3-8B-Instruct"
),
# noqa: E501
"Eagle3LlamaForCausalLM"
:
_HfExamplesInfo
(
"yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"
,
# noqa: E501
speculative_model
=
os
.
path
.
join
(
models_path_prefix
,
"yuhuili/EAGLE-LLaMA3-Instruct-8B"
)
,
tokenizer
=
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Meta-Llama-3-8B-Instruct"
)
)
,
# noqa: E501
"Eagle3LlamaForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"
)
,
# noqa: E501
trust_remote_code
=
True
,
speculative_model
=
"yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"
,
tokenizer
=
"meta-llama/Llama-3.1-8B-Instruct"
),
speculative_model
=
os
.
path
.
join
(
models_path_prefix
,
"yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"
)
,
tokenizer
=
os
.
path
.
join
(
models_path_prefix
,
"meta-llama/Llama-3.1-8B-Instruct"
)
)
,
}
_TRANSFORMERS_MODELS
=
{
"TransformersForCausalLM"
:
_HfExamplesInfo
(
"ArthurZ/Ilama-3.2-1B"
,
trust_remote_code
=
True
),
# noqa: E501
"TransformersForCausalLM"
:
_HfExamplesInfo
(
os
.
path
.
join
(
models_path_prefix
,
"ArthurZ/Ilama-3.2-1B"
)
,
trust_remote_code
=
True
),
# noqa: E501
}
_EXAMPLE_MODELS
=
{
...
...
tests/models/test_gptq_bitblas.py
View file @
90e10dee
...
...
@@ -13,9 +13,11 @@ Run `pytest tests/models/test_bitblas.py`.
"""
from
dataclasses
import
dataclass
import
os
import
pytest
from
.utils
import
check_logprobs_close
from
..utils
import
models_path_prefix
@
dataclass
...
...
@@ -24,7 +26,7 @@ class ModelPair:
model_pairs
=
[
ModelPair
(
model_gptq
=
"hxbgsyxh/opt-125m-4bit-128g"
),
ModelPair
(
model_gptq
=
os
.
path
.
join
(
models_path_prefix
,
"hxbgsyxh/opt-125m-4bit-128g"
)
)
,
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment