[tests] update models

90e10dee · zhuwenwen · d7d4623e · 90e10dee · 90e10dee
Commit 90e10dee authored Jun 09, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 188 additions and 182 deletions

tests/models/registry.py tests/models/registry.py +184 -180

tests/models/test_gptq_bitblas.py tests/models/test_gptq_bitblas.py +4 -2

No files found.
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -4,11 +4,13 @@ from collections.abc import Mapping, Set
 from dataclasses import dataclass, field
 from typing import Any, Literal, Optional

+import os
 import pytest
 from packaging.version import Version
 from transformers import __version__ as TRANSFORMERS_VERSION


+
 @dataclass(frozen=True)
 class _HfExamplesInfo:
    default: str
@@ -107,302 +109,304 @@ class _HfExamplesInfo:
                pytest.skip(msg)


+models_path_prefix = os.getenv('VLLM_OPTEST_MODELS_PATH') or os.getenv("OPTEST_MODELS_PATH")
+
 # yapf: disable
 _TEXT_GENERATION_EXAMPLE_MODELS = {
    # [Decoder-only]
-    "AquilaModel": _HfExamplesInfo("BAAI/AquilaChat-7B",
+    "AquilaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat-7B"),
                                   trust_remote_code=True),
-    "AquilaForCausalLM": _HfExamplesInfo("BAAI/AquilaChat2-7B",
+    "AquilaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/AquilaChat2-7B"),
                                         trust_remote_code=True),
-    "ArcticForCausalLM": _HfExamplesInfo("Snowflake/snowflake-arctic-instruct",
+    "ArcticForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-instruct"),
                                         trust_remote_code=True),
-    "BaiChuanForCausalLM": _HfExamplesInfo("baichuan-inc/Baichuan-7B",
+    "BaiChuanForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baichuan-inc/Baichuan-7B"),
                                         trust_remote_code=True),
-    "BaichuanForCausalLM": _HfExamplesInfo("baichuan-inc/Baichuan2-7B-chat",
+    "BaichuanForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "baichuan-inc/Baichuan2-7B-chat"),
                                         trust_remote_code=True),
-    "BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B"),
-    "BloomForCausalLM": _HfExamplesInfo("bigscience/bloom-560m",
+    "BambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm-ai-platform/Bamba-9B")),
+    "BloomForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "bigscience/bloom-560m"),
                                        {"1b": "bigscience/bloomz-1b1"}),
-    "ChatGLMModel": _HfExamplesInfo("THUDM/chatglm3-6b",
+    "ChatGLMModel": _HfExamplesInfo(os.path.join(models_path_prefix, "THUDM/chatglm3-6b"),
                                    trust_remote_code=True,
                                    max_transformers_version="4.48"),
-    "ChatGLMForConditionalGeneration": _HfExamplesInfo("thu-coai/ShieldLM-6B-chatglm3",  # noqa: E501
+    "ChatGLMForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "thu-coai/ShieldLM-6B-chatglm3"),  # noqa: E501
                                                       trust_remote_code=True),
-    "CohereForCausalLM": _HfExamplesInfo("CohereForAI/c4ai-command-r-v01",
+    "CohereForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "CohereForAI/c4ai-command-r-v01"),
                                         trust_remote_code=True),
-    "Cohere2ForCausalLM": _HfExamplesInfo("CohereForAI/c4ai-command-r7b-12-2024", # noqa: E501
+    "Cohere2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "CohereForAI/c4ai-command-r7b-12-2024"), # noqa: E501
                                         trust_remote_code=True),
-    "DbrxForCausalLM": _HfExamplesInfo("databricks/dbrx-instruct"),
-    "DeciLMForCausalLM": _HfExamplesInfo("nvidia/Llama-3_3-Nemotron-Super-49B-v1", # noqa: E501
+    "DbrxForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "databricks/dbrx-instruct")),
+    "DeciLMForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/Llama-3_3-Nemotron-Super-49B-v1"), # noqa: E501
                                         trust_remote_code=True),
-    "DeepseekForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-llm-7b-chat"),
-    "DeepseekV2ForCausalLM": _HfExamplesInfo("deepseek-ai/DeepSeek-V2-Lite-Chat",  # noqa: E501
+    "DeepseekForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "deepseek-ai/deepseek-llm-7b-chat")),
+    "DeepseekV2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V2-Lite-Chat"),  # noqa: E501
                                         trust_remote_code=True),
-    "DeepseekV3ForCausalLM": _HfExamplesInfo("deepseek-ai/DeepSeek-V3",  # noqa: E501
+    "DeepseekV3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "deepseek-ai/DeepSeek-V3"),  # noqa: E501
                                         trust_remote_code=True),
-    "ExaoneForCausalLM": _HfExamplesInfo("LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"),  # noqa: E501
-    "Fairseq2LlamaForCausalLM": _HfExamplesInfo("mgleize/fairseq2-dummy-Llama-3.2-1B"),  # noqa: E501
-    "FalconForCausalLM": _HfExamplesInfo("tiiuae/falcon-7b"),
-    "GemmaForCausalLM": _HfExamplesInfo("google/gemma-1.1-2b-it"),
-    "Gemma2ForCausalLM": _HfExamplesInfo("google/gemma-2-9b"),
-    "Gemma3ForCausalLM": _HfExamplesInfo("google/gemma-3-1b-it"),
-    "GlmForCausalLM": _HfExamplesInfo("THUDM/glm-4-9b-chat-hf"),
+    "ExaoneForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct")),  # noqa: E501
+    "Fairseq2LlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mgleize/fairseq2-dummy-Llama-3.2-1B")),  # noqa: E501
+    "FalconForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-7b")),
+    "GemmaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")),
+    "Gemma2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-2-9b")),
+    "Gemma3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-1b-it")),
+    "GlmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "THUDM/glm-4-9b-chat-hf")),
    "Glm4ForCausalLM": _HfExamplesInfo(
        "THUDM/GLM-4-32B-0414",
        is_available_online=False,
        min_transformers_version="4.52.dev0"
    ),
-    "GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2",
+    "GPT2LMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "openai-community/gpt2"),
                                       {"alias": "gpt2"}),
-    "GPTBigCodeForCausalLM": _HfExamplesInfo("bigcode/starcoder",
-                                             {"tiny": "bigcode/tiny_starcoder_py"}),  # noqa: E501
-    "GPTJForCausalLM": _HfExamplesInfo("Milos/slovak-gpt-j-405M",
-                                       {"6b": "EleutherAI/gpt-j-6b"}),
-    "GPTNeoXForCausalLM": _HfExamplesInfo("EleutherAI/pythia-70m",
-                                          {"1b": "EleutherAI/pythia-1.4b"}),
-    "GraniteForCausalLM": _HfExamplesInfo("ibm/PowerLM-3b"),
-    "GraniteMoeForCausalLM": _HfExamplesInfo("ibm/PowerMoE-3b"),
-    "GraniteMoeSharedForCausalLM": _HfExamplesInfo("ibm-research/moe-7b-1b-active-shared-experts"),  # noqa: E501
-    "Grok1ModelForCausalLM": _HfExamplesInfo("hpcai-tech/grok-1",
-                                             trust_remote_code=True),
-    "InternLMForCausalLM": _HfExamplesInfo("internlm/internlm-chat-7b",
-                                           trust_remote_code=True),
-    "InternLM2ForCausalLM": _HfExamplesInfo("internlm/internlm2-chat-7b",
-                                            trust_remote_code=True),
-    "InternLM2VEForCausalLM": _HfExamplesInfo("OpenGVLab/Mono-InternVL-2B",
-                                              trust_remote_code=True),
-    "InternLM3ForCausalLM": _HfExamplesInfo("internlm/internlm3-8b-instruct",
-                                            trust_remote_code=True),
-    "JAISLMHeadModel": _HfExamplesInfo("inceptionai/jais-13b-chat"),
-    "JambaForCausalLM": _HfExamplesInfo("ai21labs/AI21-Jamba-1.5-Mini",
-                                        extras={"tiny": "ai21labs/Jamba-tiny-dev"}),  # noqa: E501
-    "LlamaForCausalLM": _HfExamplesInfo("meta-llama/Llama-3.2-1B-Instruct"),
-    "LLaMAForCausalLM": _HfExamplesInfo("decapoda-research/llama-7b-hf",
+    "GPTBigCodeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "bigcode/starcoder"),
+                                             {"tiny": os.path.join(models_path_prefix, "bigcode/tiny_starcoder_py")}),  # noqa: E501
+    "GPTJForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Milos/slovak-gpt-j-405M"),
+                                       {"6b": os.path.join(models_path_prefix, "EleutherAI/gpt-j-6b")}),
+    "GPTNeoXForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "EleutherAI/pythia-70m"),
+                                          {"1b": os.path.join(models_path_prefix, "EleutherAI/pythia-1.4b")}),
+    "GraniteForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerLM-3b")),
+    "GraniteMoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm/PowerMoE-3b")),
+    "GraniteMoeSharedForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm-research/moe-7b-1b-active-shared-experts")),  # noqa: E501
+    "Grok1ModelForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "hpcai-tech/grok-1"),
+                                             trust_remote_code=True),
+    "InternLMForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "internlm/internlm-chat-7b"),
+                                           trust_remote_code=True),
+    "InternLM2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "internlm/internlm2-chat-7b"),
+                                            trust_remote_code=True),
+    "InternLM2VEForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "OpenGVLab/Mono-InternVL-2B"),
+                                              trust_remote_code=True),
+    "InternLM3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "internlm/internlm3-8b-instruct"),
+                                            trust_remote_code=True),
+    "JAISLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "inceptionai/jais-13b-chat")),
+    "JambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ai21labs/AI21-Jamba-1.5-Mini"),
+                                        extras={"tiny": os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-dev")}),  # noqa: E501
+    "LlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "meta-llama/Llama-3.2-1B-Instruct")),
+    "LLaMAForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "decapoda-research/llama-7b-hf"),
                                        is_available_online=False),
-    "MambaForCausalLM": _HfExamplesInfo("state-spaces/mamba-130m-hf"),
-    "Mamba2ForCausalLM": _HfExamplesInfo("mistralai/Mamba-Codestral-7B-v0.1",
+    "MambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "state-spaces/mamba-130m-hf")),
+    "Mamba2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Mamba-Codestral-7B-v0.1"),
                                         is_available_online=False),
-    "FalconMambaForCausalLM": _HfExamplesInfo("tiiuae/falcon-mamba-7b-instruct"),  # noqa: E501
-    "MiniCPMForCausalLM": _HfExamplesInfo("openbmb/MiniCPM-2B-sft-bf16",
+    "FalconMambaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-mamba-7b-instruct")),  # noqa: E501
+    "MiniCPMForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "openbmb/MiniCPM-2B-sft-bf16"),
                                         trust_remote_code=True),
-    "MiniCPM3ForCausalLM": _HfExamplesInfo("openbmb/MiniCPM3-4B",
+    "MiniCPM3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "openbmb/MiniCPM3-4B"),
                                         trust_remote_code=True),
-    "MiniMaxText01ForCausalLM": _HfExamplesInfo("MiniMaxAI/MiniMax-Text-01",
+    "MiniMaxText01ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "MiniMaxAI/MiniMax-Text-01"),
                                                trust_remote_code=True),
-    "MistralForCausalLM": _HfExamplesInfo("mistralai/Mistral-7B-Instruct-v0.1"),
-    "MixtralForCausalLM": _HfExamplesInfo("mistralai/Mixtral-8x7B-Instruct-v0.1",  # noqa: E501
-                                          {"falcon3": "ehristoforu/Falcon3-MoE-2x7B-Insruct"}),  # noqa: E501
-    "QuantMixtralForCausalLM": _HfExamplesInfo("mistral-community/Mixtral-8x22B-v0.1-AWQ"),  # noqa: E501
-    "MptForCausalLM": _HfExamplesInfo("mpt", is_available_online=False),
-    "MPTForCausalLM": _HfExamplesInfo("mosaicml/mpt-7b"),
-    "NemotronForCausalLM": _HfExamplesInfo("nvidia/Minitron-8B-Base"),
-    "OlmoForCausalLM": _HfExamplesInfo("allenai/OLMo-1B-hf"),
-    "Olmo2ForCausalLM": _HfExamplesInfo("shanearora/OLMo-7B-1124-hf"),
-    "OlmoeForCausalLM": _HfExamplesInfo("allenai/OLMoE-1B-7B-0924-Instruct"),
-    "OPTForCausalLM": _HfExamplesInfo("facebook/opt-125m",
-                                      {"1b": "facebook/opt-iml-max-1.3b"}),
-    "OrionForCausalLM": _HfExamplesInfo("OrionStarAI/Orion-14B-Chat",
+    "MistralForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Mistral-7B-Instruct-v0.1")),
+    "MixtralForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Mixtral-8x7B-Instruct-v0.1"),  # noqa: E501
+                                          {"falcon3": os.path.join(models_path_prefix, "ehristoforu/Falcon3-MoE-2x7B-Insruct")}),  # noqa: E501
+    "QuantMixtralForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mistral-community/Mixtral-8x22B-v0.1-AWQ")),  # noqa: E501
+    "MptForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mpt"), is_available_online=False),
+    "MPTForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "mosaicml/mpt-7b")),
+    "NemotronForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/Minitron-8B-Base")),
+    "OlmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMo-1B-hf")),
+    "Olmo2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "shanearora/OLMo-7B-1124-hf")),
+    "OlmoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/OLMoE-1B-7B-0924-Instruct")),
+    "OPTForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/opt-125m"),
+                                      {"1b": os.path.join(models_path_prefix, "facebook/opt-iml-max-1.3b")}),
+    "OrionForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "OrionStarAI/Orion-14B-Chat"),
                                        trust_remote_code=True),
-    "PersimmonForCausalLM": _HfExamplesInfo("adept/persimmon-8b-chat"),
-    "PhiForCausalLM": _HfExamplesInfo("microsoft/phi-2"),
-    "Phi3ForCausalLM": _HfExamplesInfo("microsoft/Phi-3-mini-4k-instruct"),
-    "Phi3SmallForCausalLM": _HfExamplesInfo("microsoft/Phi-3-small-8k-instruct",
+    "PersimmonForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/persimmon-8b-chat")),
+    "PhiForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/phi-2")),
+    "Phi3ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-3-mini-4k-instruct")),
+    "Phi3SmallForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-3-small-8k-instruct"),
                                            trust_remote_code=True),
-    "PhiMoEForCausalLM": _HfExamplesInfo("microsoft/Phi-3.5-MoE-instruct",
+    "PhiMoEForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-3.5-MoE-instruct"),
                                         trust_remote_code=True),
-    "Plamo2ForCausalLM": _HfExamplesInfo("pfnet/plamo-2-1b",
+    "Plamo2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "pfnet/plamo-2-1b"),
                                        trust_remote_code=True),
-    "QWenLMHeadModel": _HfExamplesInfo("Qwen/Qwen-7B-Chat",
+    "QWenLMHeadModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen-7B-Chat"),
                                       trust_remote_code=True),
-    "Qwen2ForCausalLM": _HfExamplesInfo("Qwen/Qwen2-0.5B-Instruct",
-                                        extras={"2.5": "Qwen/Qwen2.5-0.5B-Instruct"}), # noqa: E501
-    "Qwen2MoeForCausalLM": _HfExamplesInfo("Qwen/Qwen1.5-MoE-A2.7B-Chat"),
+    "Qwen2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2-0.5B-Instruct"),
+                                        extras={"2.5": os.path.join(models_path_prefix, "Qwen/Qwen2.5-0.5B-Instruct")}), # noqa: E501
+    "Qwen2MoeForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen1.5-MoE-A2.7B-Chat")),
    "Qwen3ForCausalLM": _HfExamplesInfo(
-        "Qwen/Qwen3-8B",
+        os.path.join(models_path_prefix, "Qwen/Qwen3-8B"),
        is_available_online=False,
        min_transformers_version="4.51"
    ),
    "Qwen3MoeForCausalLM": _HfExamplesInfo(
-        "Qwen/Qwen3-MoE-15B-A2B",
+        os.path.join(models_path_prefix, "Qwen/Qwen3-MoE-15B-A2B"),
        is_available_online=False,
        min_transformers_version="4.51"
    ),
-    "RWForCausalLM": _HfExamplesInfo("tiiuae/falcon-40b",
+    "RWForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "tiiuae/falcon-40b"),
                                     is_available_online=False),
-    "StableLMEpochForCausalLM": _HfExamplesInfo("stabilityai/stablelm-zephyr-3b",  # noqa: E501
+    "StableLMEpochForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-zephyr-3b"),  # noqa: E501
                                                is_available_online=False),
-    "StableLmForCausalLM": _HfExamplesInfo("stabilityai/stablelm-3b-4e1t"),
-    "Starcoder2ForCausalLM": _HfExamplesInfo("bigcode/starcoder2-3b"),
-    "SolarForCausalLM": _HfExamplesInfo("upstage/solar-pro-preview-instruct"),
-    "TeleChat2ForCausalLM": _HfExamplesInfo("Tele-AI/TeleChat2-3B",
+    "StableLmForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "stabilityai/stablelm-3b-4e1t")),
+    "Starcoder2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "bigcode/starcoder2-3b")),
+    "SolarForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "upstage/solar-pro-preview-instruct")),
+    "TeleChat2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Tele-AI/TeleChat2-3B"),
                                            trust_remote_code=True),
-    "TeleFLMForCausalLM": _HfExamplesInfo("CofeAI/FLM-2-52B-Instruct-2407",
+    "TeleFLMForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "CofeAI/FLM-2-52B-Instruct-2407"),
                                            trust_remote_code=True),
-    "XverseForCausalLM": _HfExamplesInfo("xverse/XVERSE-7B-Chat",
+    "XverseForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "xverse/XVERSE-7B-Chat"),
                                         is_available_online=False,
                                         trust_remote_code=True),
-    "Zamba2ForCausalLM": _HfExamplesInfo("Zyphra/Zamba2-7B-instruct"),
+    "Zamba2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "Zyphra/Zamba2-7B-instruct")),
    # [Encoder-decoder]
-    "BartModel": _HfExamplesInfo("facebook/bart-base"),
-    "BartForConditionalGeneration": _HfExamplesInfo("facebook/bart-large-cnn"),
+    "BartModel": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/bart-base")),
+    "BartForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/bart-large-cnn")),
 }

 _EMBEDDING_EXAMPLE_MODELS = {
    # [Text-only]
-    "BertModel": _HfExamplesInfo("BAAI/bge-base-en-v1.5"),
-    "Gemma2Model": _HfExamplesInfo("BAAI/bge-multilingual-gemma2"),
-    "GritLM": _HfExamplesInfo("parasail-ai/GritLM-7B-vllm"),
-    "GteModel": _HfExamplesInfo("Snowflake/snowflake-arctic-embed-m-v2.0",
-                                               trust_remote_code=True),
-    "InternLM2ForRewardModel": _HfExamplesInfo("internlm/internlm2-1_8b-reward",
-                                               trust_remote_code=True),
-    "JambaForSequenceClassification": _HfExamplesInfo("ai21labs/Jamba-tiny-reward-dev"),  # noqa: E501
-    "LlamaModel": _HfExamplesInfo("llama", is_available_online=False),
-    "MistralModel": _HfExamplesInfo("intfloat/e5-mistral-7b-instruct"),
-    "NomicBertModel": _HfExamplesInfo("Snowflake/snowflake-arctic-embed-m-long",  # noqa: E501
-                                               trust_remote_code=True),
-    "Qwen2Model": _HfExamplesInfo("ssmits/Qwen2-7B-Instruct-embed-base"),
-    "Qwen2ForRewardModel": _HfExamplesInfo("Qwen/Qwen2.5-Math-RM-72B"),
-    "Qwen2ForProcessRewardModel": _HfExamplesInfo("Qwen/Qwen2.5-Math-PRM-7B"),
-    "Qwen2ForSequenceClassification": _HfExamplesInfo("jason9693/Qwen2.5-1.5B-apeach"),  # noqa: E501
-    "RobertaModel": _HfExamplesInfo("sentence-transformers/stsb-roberta-base-v2"),  # noqa: E501
-    "RobertaForMaskedLM": _HfExamplesInfo("sentence-transformers/all-roberta-large-v1"),  # noqa: E501
-    "XLMRobertaModel": _HfExamplesInfo("intfloat/multilingual-e5-small"),
+    "BertModel": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-base-en-v1.5")),
+    "Gemma2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-multilingual-gemma2")),
+    "GritLM": _HfExamplesInfo(os.path.join(models_path_prefix, "parasail-ai/GritLM-7B-vllm")),
+    "GteModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-embed-m-v2.0"),
+                                               trust_remote_code=True),
+    "InternLM2ForRewardModel": _HfExamplesInfo(os.path.join(models_path_prefix, "internlm/internlm2-1_8b-reward"),
+                                               trust_remote_code=True),
+    "JambaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "ai21labs/Jamba-tiny-reward-dev")),  # noqa: E501
+    "LlamaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "llama"), is_available_online=False),
+    "MistralModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct")),
+    "NomicBertModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Snowflake/snowflake-arctic-embed-m-long"),  # noqa: E501
+                                               trust_remote_code=True),
+    "Qwen2Model": _HfExamplesInfo(os.path.join(models_path_prefix, "ssmits/Qwen2-7B-Instruct-embed-base")),
+    "Qwen2ForRewardModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-RM-72B")),
+    "Qwen2ForProcessRewardModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Math-PRM-7B")),
+    "Qwen2ForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "jason9693/Qwen2.5-1.5B-apeach")),  # noqa: E501
+    "RobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/stsb-roberta-base-v2")),  # noqa: E501
+    "RobertaForMaskedLM": _HfExamplesInfo(os.path.join(models_path_prefix, "sentence-transformers/all-roberta-large-v1")),  # noqa: E501
+    "XLMRobertaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "intfloat/multilingual-e5-small")),
    # [Multimodal]
-    "LlavaNextForConditionalGeneration": _HfExamplesInfo("royokong/e5-v"),
-    "Phi3VForCausalLM": _HfExamplesInfo("TIGER-Lab/VLM2Vec-Full",
+    "LlavaNextForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "royokong/e5-v")),
+    "Phi3VForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "TIGER-Lab/VLM2Vec-Full"),
                                         trust_remote_code=True),
-    "Qwen2VLForConditionalGeneration": _HfExamplesInfo("MrLight/dse-qwen2-2b-mrl-v1"), # noqa: E501
+    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "MrLight/dse-qwen2-2b-mrl-v1")), # noqa: E501
    # The model on Huggingface is currently being updated,
    # hence I temporarily mark it as not available online
-    "PrithviGeoSpatialMAE": _HfExamplesInfo("ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11",  # noqa: E501
+    "PrithviGeoSpatialMAE": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11"),  # noqa: E501
                                            is_available_online=False),
 }

 _CROSS_ENCODER_EXAMPLE_MODELS = {
    # [Text-only]
-    "BertForSequenceClassification": _HfExamplesInfo("cross-encoder/ms-marco-MiniLM-L-6-v2"),  # noqa: E501
-    "RobertaForSequenceClassification": _HfExamplesInfo("cross-encoder/quora-roberta-base"),  # noqa: E501
-    "XLMRobertaForSequenceClassification": _HfExamplesInfo("BAAI/bge-reranker-v2-m3"),  # noqa: E501
-    "ModernBertForSequenceClassification": _HfExamplesInfo("Alibaba-NLP/gte-reranker-modernbert-base"),  # noqa: E501
+    "BertForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "cross-encoder/ms-marco-MiniLM-L-6-v2")),  # noqa: E501
+    "RobertaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "cross-encoder/quora-roberta-base")),  # noqa: E501
+    "XLMRobertaForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "BAAI/bge-reranker-v2-m3")),  # noqa: E501
+    "ModernBertForSequenceClassification": _HfExamplesInfo(os.path.join(models_path_prefix, "Alibaba-NLP/gte-reranker-modernbert-base")),  # noqa: E501
 }

 _MULTIMODAL_EXAMPLE_MODELS = {
    # [Decoder-only]
-    "AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria"),
-    "AyaVisionForConditionalGeneration": _HfExamplesInfo("CohereForAI/aya-vision-8b"), # noqa: E501
-    "Blip2ForConditionalGeneration": _HfExamplesInfo("Salesforce/blip2-opt-2.7b",  # noqa: E501
-                                                     extras={"6b": "Salesforce/blip2-opt-6.7b"}),  # noqa: E501
-    "ChameleonForConditionalGeneration": _HfExamplesInfo("facebook/chameleon-7b"),  # noqa: E501
-    "DeepseekVLV2ForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-vl2-tiny",  # noqa: E501
-                                                extras={"fork": "Isotr0py/deepseek-vl2-tiny"},  # noqa: E501
+    "AriaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "rhymes-ai/Aria")),
+    "AyaVisionForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "CohereForAI/aya-vision-8b")), # noqa: E501
+    "Blip2ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Salesforce/blip2-opt-2.7b"),  # noqa: E501
+                                                     extras={"6b": os.path.join(models_path_prefix, "Salesforce/blip2-opt-6.7b")}),  # noqa: E501
+    "ChameleonForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "facebook/chameleon-7b")),  # noqa: E501
+    "DeepseekVLV2ForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "deepseek-ai/deepseek-vl2-tiny"),  # noqa: E501
+                                                extras={"fork": os.path.join(models_path_prefix, "Isotr0py/deepseek-vl2-tiny")},  # noqa: E501
                                                max_transformers_version="4.48",  # noqa: E501
                                                transformers_version_reason="HF model is not compatible.",  # noqa: E501
                                                hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}),  # noqa: E501
-    "FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"),
-    "Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it"),
-    "GraniteSpeechForConditionalGeneration": _HfExamplesInfo("ibm-granite/granite-speech-3.3-8b",  # noqa: E501
+    "FuyuForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "adept/fuyu-8b")),
+    "Gemma3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/gemma-3-4b-it")),
+    "GraniteSpeechForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "ibm-granite/granite-speech-3.3-8b"),  # noqa: E501
                                                             min_transformers_version="4.52.0"),  # noqa: E501
-    "GLM4VForCausalLM": _HfExamplesInfo("THUDM/glm-4v-9b",
+    "GLM4VForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "THUDM/glm-4v-9b"),
                                        trust_remote_code=True,
                                        hf_overrides={"architectures": ["GLM4VForCausalLM"]}),  # noqa: E501
-    "H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
-                                      extras={"2b": "h2oai/h2ovl-mississippi-2b"},  # noqa: E501
+    "H2OVLChatModel": _HfExamplesInfo(os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-800m"),
+                                      extras={"2b": os.path.join(models_path_prefix, "h2oai/h2ovl-mississippi-2b")},  # noqa: E501
                                      max_transformers_version="4.48",  # noqa: E501
                                      transformers_version_reason="HF model is not compatible."),  # noqa: E501
-    "InternVLChatModel": _HfExamplesInfo("OpenGVLab/InternVL2-1B",
-                                         extras={"2B": "OpenGVLab/InternVL2-2B"},  # noqa: E501
+    "InternVLChatModel": _HfExamplesInfo(os.path.join(models_path_prefix, "OpenGVLab/InternVL2-1B"),
+                                         extras={"2B": os.path.join(models_path_prefix, "OpenGVLab/InternVL2-1B")},  # noqa: E501
                                         trust_remote_code=True),
-    "Idefics3ForConditionalGeneration": _HfExamplesInfo("HuggingFaceM4/Idefics3-8B-Llama3",  # noqa: E501
-                                                        {"tiny": "HuggingFaceTB/SmolVLM-256M-Instruct"}),  # noqa: E501
-    "KimiVLForConditionalGeneration": _HfExamplesInfo("moonshotai/Kimi-VL-A3B-Instruct",  # noqa: E501
-                                                      extras={"thinking": "moonshotai/Kimi-VL-A3B-Thinking"},  # noqa: E501
+    "Idefics3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "HuggingFaceM4/Idefics3-8B-Llama3"),  # noqa: E501
+                                                        {"tiny": os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM-256M-Instruct")}),  # noqa: E501
+    "KimiVLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Instruct"),  # noqa: E501
+                                                      extras={"thinking": os.path.join(models_path_prefix, "moonshotai/Kimi-VL-A3B-Thinking")},  # noqa: E501
                                                      trust_remote_code=True),
-    "Llama4ForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-4-Scout-17B-16E-Instruct",   # noqa: E501
+    "Llama4ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct"),   # noqa: E501
                                                      min_transformers_version="4.51"),
-    "LlavaForConditionalGeneration": _HfExamplesInfo("llava-hf/llava-1.5-7b-hf",
-                                                     extras={"mistral": "mistral-community/pixtral-12b", # noqa: E501
-                                                             "mistral-fp8": "nm-testing/pixtral-12b-FP8-dynamic"}),  # noqa: E501
-    "LlavaNextForConditionalGeneration": _HfExamplesInfo("llava-hf/llava-v1.6-mistral-7b-hf"),  # noqa: E501
-    "LlavaNextVideoForConditionalGeneration": _HfExamplesInfo("llava-hf/LLaVA-NeXT-Video-7B-hf"),  # noqa: E501
-    "LlavaOnevisionForConditionalGeneration": _HfExamplesInfo("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"),  # noqa: E501
-    "MantisForConditionalGeneration": _HfExamplesInfo("TIGER-Lab/Mantis-8B-siglip-llama3",  # noqa: E501
+    "LlavaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
+                                                     extras={"mistral": os.path.join(models_path_prefix, "mistral-community/pixtral-12b"), # noqa: E501
+                                                             "mistral-fp8": os.path.join(models_path_prefix, "nm-testing/pixtral-12b-FP8-dynamic")}),  # noqa: E501
+    "LlavaNextForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "llava-hf/llava-v1.6-mistral-7b-hf")),  # noqa: E501
+    "LlavaNextVideoForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "llava-hf/LLaVA-NeXT-Video-7B-hf")),  # noqa: E501
+    "LlavaOnevisionForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "llava-hf/llava-onevision-qwen2-0.5b-ov-hf")),  # noqa: E501
+    "MantisForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "TIGER-Lab/Mantis-8B-siglip-llama3"),  # noqa: E501
                                                      max_transformers_version="4.48",  # noqa: E501
                                                      transformers_version_reason="HF model is not compatible.",  # noqa: E501
                                                      hf_overrides={"architectures": ["MantisForConditionalGeneration"]}),  # noqa: E501
-    "MiniCPMO": _HfExamplesInfo("openbmb/MiniCPM-o-2_6",
+    "MiniCPMO": _HfExamplesInfo(os.path.join(models_path_prefix, "openbmb/MiniCPM-o-2_6"),
                                max_transformers_version="4.48",
                                transformers_version_reason="Use of deprecated imports which have been removed.",  # noqa: E501
                                trust_remote_code=True),
-    "MiniCPMV": _HfExamplesInfo("openbmb/MiniCPM-Llama3-V-2_5",
-                                extras={"2.6": "openbmb/MiniCPM-V-2_6"},  # noqa: E501
+    "MiniCPMV": _HfExamplesInfo(os.path.join(models_path_prefix, "openbmb/MiniCPM-Llama3-V-2_5"),
+                                extras={"2.6": os.path.join(models_path_prefix, "openbmb/MiniCPM-V-2_6")},  # noqa: E501
                                trust_remote_code=True),
-    "Mistral3ForConditionalGeneration": _HfExamplesInfo("mistralai/Mistral-Small-3.1-24B-Instruct-2503",  # noqa: E501
-                                                        extras={"fp8": "nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic"}),  # noqa: E501
-    "MolmoForCausalLM": _HfExamplesInfo("allenai/Molmo-7B-D-0924",
+    "Mistral3ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Mistral-Small-3.1-24B-Instruct-2503"),  # noqa: E501
+                                                        extras={"fp8": os.path.join(models_path_prefix, "nm-testing/Mistral-Small-3.1-24B-Instruct-2503-FP8-dynamic")}),  # noqa: E501
+    "MolmoForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "allenai/Molmo-7B-D-0924"),
                                        max_transformers_version="4.48",
                                        transformers_version_reason="Incorrectly-detected `tensorflow` import.",  # noqa: E501
-                                        extras={"olmo": "allenai/Molmo-7B-O-0924"},  # noqa: E501
+                                        extras={"olmo": os.path.join(models_path_prefix, "allenai/Molmo-7B-O-0924")},  # noqa: E501
                                        trust_remote_code=True),
-    "NVLM_D": _HfExamplesInfo("nvidia/NVLM-D-72B",
+    "NVLM_D": _HfExamplesInfo(os.path.join(models_path_prefix, "nvidia/NVLM-D-72B"),
                              trust_remote_code=True),
-    "PaliGemmaForConditionalGeneration": _HfExamplesInfo("google/paligemma-3b-mix-224",  # noqa: E501
-                                                         extras={"v2": "google/paligemma2-3b-ft-docci-448"}),  # noqa: E501
-    "Phi3VForCausalLM": _HfExamplesInfo("microsoft/Phi-3-vision-128k-instruct",
+    "PaliGemmaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "google/paligemma-3b-mix-224"),  # noqa: E501
+                                                         extras={"v2": os.path.join(models_path_prefix, "google/paligemma2-3b-ft-docci-448")}),  # noqa: E501
+    "Phi3VForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-3-vision-128k-instruct"),
                                        trust_remote_code=True,
                                        max_transformers_version="4.48",
                                        transformers_version_reason="Use of deprecated imports which have been removed.",  # noqa: E501
-                                        extras={"phi3.5": "microsoft/Phi-3.5-vision-instruct"}),  # noqa: E501
-    "Phi4MMForCausalLM": _HfExamplesInfo("microsoft/Phi-4-multimodal-instruct",
+                                        extras={"phi3.5": os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct"),}),  # noqa: E501
+    "Phi4MMForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Phi-4-multimodal-instruct"),
                                        trust_remote_code=True),
-    "PixtralForConditionalGeneration": _HfExamplesInfo("mistralai/Pixtral-12B-2409",  # noqa: E501
+    "PixtralForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "mistralai/Pixtral-12B-2409"),  # noqa: E501
                                                       tokenizer_mode="mistral"),
-    "QwenVLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen-VL",
-                                                      extras={"chat": "Qwen/Qwen-VL-Chat"},  # noqa: E501
+    "QwenVLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen-VL"),
+                                                      extras={"chat": os.path.join(models_path_prefix, "Qwen/Qwen-VL-Chat")},  # noqa: E501
                                                      trust_remote_code=True,
                                                      hf_overrides={"architectures": ["QwenVLForConditionalGeneration"]}),  # noqa: E501
-    "Qwen2AudioForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-Audio-7B-Instruct"),  # noqa: E501
-    "Qwen2VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-VL-2B-Instruct"),  # noqa: E501
-    "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-VL-3B-Instruct"),  # noqa: E501
-    "Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-7B",  # noqa: E501
+    "Qwen2AudioForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2-Audio-7B-Instruct")),  # noqa: E501
+    "Qwen2VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2-VL-2B-Instruct")),  # noqa: E501
+    "Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-VL-3B-Instruct")),  # noqa: E501
+    "Qwen2_5OmniModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Qwen/Qwen2.5-Omni-7B"),  # noqa: E501
                                                                  min_transformers_version="4.52"),  # noqa: E501
-    "SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B"),
-    "SmolVLMForConditionalGeneration": _HfExamplesInfo("HuggingFaceTB/SmolVLM2-2.2B-Instruct"),  # noqa: E501
-    "UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_5-llama-3_2-1b",  # noqa: E501
+    "SkyworkR1VChatModel": _HfExamplesInfo(os.path.join(models_path_prefix, "Skywork/Skywork-R1V-38B")),
+    "SmolVLMForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM2-2.2B-Instruct")),  # noqa: E501
+    "UltravoxModel": _HfExamplesInfo(os.path.join(models_path_prefix, "fixie-ai/ultravox-v0_5-llama-3_2-1b"),  # noqa: E501
                                     trust_remote_code=True),
    # [Encoder-decoder]
    # Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
    # Therefore, we borrow the BartTokenizer from the original Bart model
-    "Florence2ForConditionalGeneration": _HfExamplesInfo("microsoft/Florence-2-base",  # noqa: E501
-                                                         tokenizer="Isotr0py/Florence-2-tokenizer",
+    "Florence2ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "microsoft/Florence-2-base"),  # noqa: E501
+                                                         tokenizer=os.path.join(models_path_prefix, "Isotr0py/Florence-2-tokenizer"),
                                                         trust_remote_code=True),  # noqa: E501
-    "MllamaForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-3.2-11B-Vision-Instruct"),  # noqa: E501
-    "Llama4ForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-4-Scout-17B-16E-Instruct"),  # noqa: E501
-    "WhisperForConditionalGeneration": _HfExamplesInfo("openai/whisper-large-v3"),  # noqa: E501
+    "MllamaForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "meta-llama/Llama-3.2-11B-Vision-Instruct")),  # noqa: E501
+    "Llama4ForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "meta-llama/Llama-4-Scout-17B-16E-Instruct")),  # noqa: E501
+    "WhisperForConditionalGeneration": _HfExamplesInfo(os.path.join(models_path_prefix, "openai/whisper-large-v3")),  # noqa: E501
 }

 _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
-    "EAGLEModel": _HfExamplesInfo("JackFram/llama-68m",
-                                  speculative_model="abhigoyal/vllm-eagle-llama-68m-random"),  # noqa: E501
-    "MedusaModel": _HfExamplesInfo("JackFram/llama-68m",
-                                   speculative_model="abhigoyal/vllm-medusa-llama-68m-random"),  # noqa: E501
-    "MLPSpeculatorPreTrainedModel": _HfExamplesInfo("JackFram/llama-160m",
-                                                    speculative_model="ibm-ai-platform/llama-160m-accelerator"),  # noqa: E501
-    "DeepSeekMTPModel": _HfExamplesInfo("luccafong/deepseek_mtp_main_random",
-                                        speculative_model="luccafong/deepseek_mtp_draft_random",  # noqa: E501
-                                        trust_remote_code=True),
-    "EagleLlamaForCausalLM": _HfExamplesInfo("yuhuili/EAGLE-LLaMA3-Instruct-8B",
+    "EAGLEModel": _HfExamplesInfo(os.path.join(models_path_prefix, "JackFram/llama-68m"),
+                                  speculative_model=os.path.join(models_path_prefix, "abhigoyal/vllm-eagle-llama-68m-random")),  # noqa: E501
+    "MedusaModel": _HfExamplesInfo(os.path.join(models_path_prefix, "JackFram/llama-68m"),
+                                   speculative_model=os.path.join(models_path_prefix, "abhigoyal/vllm-medusa-llama-68m-random")),  # noqa: E501
+    "MLPSpeculatorPreTrainedModel": _HfExamplesInfo(os.path.join(models_path_prefix, "JackFram/llama-160m"),
+                                                    speculative_model=os.path.join(models_path_prefix, "ibm-ai-platform/llama-160m-accelerator")),  # noqa: E501
+    "DeepSeekMTPModel": _HfExamplesInfo(os.path.join(models_path_prefix, "luccafong/deepseek_mtp_main_random"),
+                                        speculative_model=os.path.join(models_path_prefix, "luccafong/deepseek_mtp_draft_random"),  # noqa: E501
+                                        trust_remote_code=True),
+    "EagleLlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "yuhuili/EAGLE-LLaMA3-Instruct-8B"),
                                             trust_remote_code=True,
-                                             speculative_model="yuhuili/EAGLE-LLaMA3-Instruct-8B",
-                                             tokenizer="meta-llama/Meta-Llama-3-8B-Instruct"),  # noqa: E501
-    "Eagle3LlamaForCausalLM": _HfExamplesInfo("yuhuili/EAGLE3-LLaMA3.1-Instruct-8B",  # noqa: E501
+                                             speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE-LLaMA3-Instruct-8B"),
+                                             tokenizer=os.path.join(models_path_prefix, "meta-llama/Meta-Llama-3-8B-Instruct")),  # noqa: E501
+    "Eagle3LlamaForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"),  # noqa: E501
                                            trust_remote_code=True,
-                                            speculative_model="yuhuili/EAGLE3-LLaMA3.1-Instruct-8B",
-                                            tokenizer="meta-llama/Llama-3.1-8B-Instruct"),
+                                            speculative_model=os.path.join(models_path_prefix, "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B"),
+                                            tokenizer=os.path.join(models_path_prefix, "meta-llama/Llama-3.1-8B-Instruct")),
 }

 _TRANSFORMERS_MODELS = {
-    "TransformersForCausalLM": _HfExamplesInfo("ArthurZ/Ilama-3.2-1B", trust_remote_code=True),  # noqa: E501
+    "TransformersForCausalLM": _HfExamplesInfo(os.path.join(models_path_prefix, "ArthurZ/Ilama-3.2-1B"), trust_remote_code=True),  # noqa: E501
 }

 _EXAMPLE_MODELS = {

--- a/tests/models/test_gptq_bitblas.py
+++ b/tests/models/test_gptq_bitblas.py
@@ -13,9 +13,11 @@ Run `pytest tests/models/test_bitblas.py`.
 """
 from dataclasses import dataclass

+import os
 import pytest

 from .utils import check_logprobs_close
+from ..utils import models_path_prefix


 @dataclass
@@ -24,7 +26,7 @@ class ModelPair:


 model_pairs = [
-    ModelPair(model_gptq="hxbgsyxh/opt-125m-4bit-128g"),
+    ModelPair(model_gptq=os.path.join(models_path_prefix, "hxbgsyxh/opt-125m-4bit-128g")),
 ]