add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH to load models from local path...

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH to load models from local path instead of Hugging Face Hub

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH to load models from local path...
add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH to load models from local path instead of Hugging Face Hub
3c9817d2 · zhuwenwen · 49204f68 · 3c9817d2 · 3c9817d2 · 3c9817d2
Commit 3c9817d2 authored Nov 27, 2024 by zhuwenwen
20 changed files
--- a/tests/models/decoder_only/vision_language/test_intern_vit.py
+++ b/tests/models/decoder_only/vision_language/test_intern_vit.py
 from typing import Optional
+import os
 import pytest
 import torch
 import torch.nn as nn
@@ -7,14 +8,15 @@ from huggingface_hub import snapshot_download
 from transformers import AutoConfig, AutoModel, CLIPImageProcessor
 from ....conftest import _ImageAssets, cleanup
+from ....utils import models_path_prefix
 # we use snapshot_download to prevent conflicts between
 # dynamic_module and trust_remote_code for hf_runner
 DOWNLOAD_PATTERN = ["*.json", "*.py", "*.safetensors", "*.txt", "*.model"]
 models = [
-    snapshot_download("OpenGVLab/InternViT-300M-448px",
+    snapshot_download(os.path.join(models_path_prefix, "OpenGVLab/InternViT-300M-448px"),
                      allow_patterns=DOWNLOAD_PATTERN),
-    snapshot_download("OpenGVLab/InternViT-6B-448px-V1-5",
+    snapshot_download(os.path.join(models_path_prefix, "OpenGVLab/InternViT-6B-448px-V1-5"),
                      allow_patterns=DOWNLOAD_PATTERN),
 ]

--- a/tests/models/decoder_only/vision_language/test_internvl.py
+++ b/tests/models/decoder_only/vision_language/test_internvl.py
 import types
 from typing import List, Optional, Tuple, Type, Union
+import os
 import pytest
 import torch
 from PIL.Image import Image
@@ -12,6 +13,7 @@ from vllm.utils import is_cpu
 from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
                          _ImageAssets)
 from ...utils import check_logprobs_close
+from ....utils import models_path_prefix
 HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
    "stop_sign":
@@ -22,8 +24,8 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
 HF_MULTIIMAGE_IMAGE_PROMPT = "<|im_start|>User\nImage-1: <image>\nImage-2: <image>\nDescribe the two images in detail.<|im_end|>\n<|im_start|>Assistant\n"  # noqa: E501
 models = [
-    "OpenGVLab/InternVL2-1B",
+    os.path.join(models_path_prefix, "OpenGVLab/InternVL2-1B"),
-    "OpenGVLab/InternVL2-2B",
+    os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B"),
    # Broken due to outdated implementation of Phi-3
    # See: https://huggingface.co/OpenGVLab/InternVL2-4B/discussions/3
    # "OpenGVLab/InternVL2-4B",
@@ -365,7 +367,7 @@ def test_different_num_patches(hf_runner, vllm_runner, image_assets, model,
 @pytest.mark.parametrize(
-    "models", [("OpenGVLab/InternVL2-2B", "OpenGVLab/InternVL2-2B-AWQ")])
+    "models", [(os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B"), os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B-AWQ"))])
 @pytest.mark.parametrize(
    "size_factors",
    [

--- a/tests/models/decoder_only/vision_language/test_llava.py
+++ b/tests/models/decoder_only/vision_language/test_llava.py
 from typing import List, Optional, Tuple, Type, overload
+import os
 import pytest
 from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer,
                          BatchEncoding)
@@ -11,6 +12,7 @@ from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
 from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
                          _ImageAssets)
 from ...utils import check_logprobs_close
+from ....utils import models_path_prefix
 _LIMIT_IMAGE_PER_PROMPT = 4
@@ -22,7 +24,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
 })
 models = [
-    "llava-hf/llava-1.5-7b-hf",
+    os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
    # TODO: Get this model to produce meaningful output in vLLM
    # "TIGER-Lab/Mantis-8B-siglip-llama3",
 ]

--- a/tests/models/decoder_only/vision_language/test_llava_image_embeds.py
+++ b/tests/models/decoder_only/vision_language/test_llava_image_embeds.py
 from typing import List, Optional, Tuple, Type
+import os
 import pytest
 from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
@@ -7,6 +8,7 @@ from vllm.sequence import SampleLogprobs
 from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
 from ...utils import check_logprobs_close
+from ....utils import models_path_prefix
 HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
    "stop_sign":
@@ -16,7 +18,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
 })
 models = [
-    "llava-hf/llava-1.5-7b-hf",
+    os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
 ]

--- a/tests/models/decoder_only/vision_language/test_llava_next.py
+++ b/tests/models/decoder_only/vision_language/test_llava_next.py
 from typing import List, Optional, Tuple, Type, overload
+import os
 import pytest
 from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
@@ -9,6 +10,7 @@ from vllm.sequence import SampleLogprobs
 from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
                          _ImageAssets)
 from ...utils import check_logprobs_close
+from ....utils import models_path_prefix
 _LIMIT_IMAGE_PER_PROMPT = 4
@@ -19,7 +21,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
    "[INST] <image>\nWhat is the season? [/INST]",
 })
-models = ["llava-hf/llava-v1.6-mistral-7b-hf"]
+models = [os.path.join(models_path_prefix, "llava-hf/llava-v1.6-mistral-7b-hf")]
 def vllm_to_hf_output(vllm_output: Tuple[List[int], str,

--- a/tests/models/decoder_only/vision_language/test_llava_next_video.py
+++ b/tests/models/decoder_only/vision_language/test_llava_next_video.py
 from typing import List, Optional, Tuple, Type, overload
+import os
 import pytest
 import transformers
 from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
@@ -10,6 +11,7 @@ from vllm.sequence import SampleLogprobs
 from ....conftest import VIDEO_ASSETS, HfRunner, VllmRunner, _VideoAssets
 from ...utils import check_logprobs_close
+from ....utils import models_path_prefix
 _PREFACE = (
    "A chat between a curious human and an artificial intelligence assistant. "
@@ -21,7 +23,7 @@ HF_VIDEO_PROMPTS = VIDEO_ASSETS.prompts({
    f"{_PREFACE}USER: <video>\nWhy is this video funny? ASSISTANT:"
 })
-models = ["llava-hf/LLaVA-NeXT-Video-7B-hf"]
+models = [os.path.join(models_path_prefix, "llava-hf/LLaVA-NeXT-Video-7B-hf")]
 def vllm_to_hf_output(vllm_output: Tuple[List[int], str,

--- a/tests/models/decoder_only/vision_language/test_llava_onevision.py
+++ b/tests/models/decoder_only/vision_language/test_llava_onevision.py
 from typing import List, Optional, Tuple, Type, overload
+import os
 import pytest
 import transformers
 from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer,
@@ -13,6 +14,7 @@ from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
 from ....conftest import (VIDEO_ASSETS, HfRunner, PromptImageInput, VllmRunner,
                          _VideoAssets)
 from ...utils import check_logprobs_close
+from ....utils import models_path_prefix
 # Video test
 HF_VIDEO_PROMPTS = VIDEO_ASSETS.prompts({
@@ -21,7 +23,7 @@ HF_VIDEO_PROMPTS = VIDEO_ASSETS.prompts({
    <|im_end|><|im_start|>assistant\n"
 })
-models = ["llava-hf/llava-onevision-qwen2-7b-ov-hf"]
+models = [os.path.join(models_path_prefix, "llava-hf/llava-onevision-qwen2-7b-ov-hf")]
 def vllm_to_hf_output(vllm_output: Tuple[List[int], str,

--- a/tests/models/decoder_only/vision_language/test_minicpmv.py
+++ b/tests/models/decoder_only/vision_language/test_minicpmv.py
 from typing import List, Optional, Tuple, Type, Union
+import os
 import pytest
 import torch
 import torch.types
@@ -11,6 +12,7 @@ from vllm.sequence import SampleLogprobs
 from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner
 from ...utils import check_logprobs_close
+from ....utils import models_path_prefix
 # The image token is placed before "user" on purpose so that the test can pass
 HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
@@ -29,7 +31,7 @@ HF_MULTIIMAGE_IMAGE_PROMPT = \
    "Describe these images.<|eot_id|>" \
    "<|start_header_id|>assistant<|end_header_id|>\n\n"
-models = ["openbmb/MiniCPM-Llama3-V-2_5"]
+models = [os.path.join(models_path_prefix, "openbmb/MiniCPM-Llama3-V-2_5")]
 def _wrap_inputs(hf_inputs: BatchEncoding) -> BatchEncoding:

--- a/tests/models/decoder_only/vision_language/test_paligemma.py
+++ b/tests/models/decoder_only/vision_language/test_paligemma.py
@@ -10,6 +10,7 @@ from vllm.utils import is_hip
 from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
 from ...utils import check_logprobs_close
+from ....utils import models_path_prefix
 HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
    "stop_sign":
@@ -18,7 +19,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
    "What is in the picture?",
 })
-models = ["google/paligemma-3b-mix-224"]
+models = [os.path.join(models_path_prefix, "google/paligemma-3b-mix-224")]
 # ROCm Triton FA can run into compilation issues with these models due to,
 # excessive use of shared memory. Use other backends in the meantime.

--- a/tests/models/decoder_only/vision_language/test_phi3v.py
+++ b/tests/models/decoder_only/vision_language/test_phi3v.py
@@ -16,6 +16,7 @@ from vllm.utils import is_cpu, is_hip
 from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
                          _ImageAssets)
 from ...utils import build_model_context, check_logprobs_close
+from ....utils import models_path_prefix
 HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
    "stop_sign":
@@ -25,7 +26,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
 })
 HF_MULTIIMAGE_IMAGE_PROMPT = "<|user|>\n<|image_1|>\n<|image_2|>\nDescribe these images.<|end|>\n<|assistant|>\n"  # noqa: E501
-models = ["microsoft/Phi-3.5-vision-instruct"]
+models = [os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")]
 def vllm_to_hf_output(vllm_output: Tuple[List[int], str,

--- a/tests/models/decoder_only/vision_language/test_pixtral.py
+++ b/tests/models/decoder_only/vision_language/test_pixtral.py
@@ -7,6 +7,7 @@ import uuid
 from dataclasses import asdict
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
+import os
 import pytest
 from mistral_common.protocol.instruct.messages import ImageURLChunk
 from mistral_common.protocol.instruct.request import ChatCompletionRequest
@@ -19,11 +20,12 @@ from vllm.sequence import Logprob, SampleLogprobs
 from ....utils import VLLM_PATH
 from ...utils import check_logprobs_close
+from ....utils import models_path_prefix
 if TYPE_CHECKING:
    from _typeshed import StrPath
-MODELS = ["mistralai/Pixtral-12B-2409"]
+MODELS = [os.path.join(models_path_prefix, "mistralai/Pixtral-12B-2409")]
 IMG_URLS = [
    "https://picsum.photos/id/237/400/300",
    "https://picsum.photos/id/231/200/300",

--- a/tests/models/decoder_only/vision_language/test_qwen.py
+++ b/tests/models/decoder_only/vision_language/test_qwen.py
 import pathlib
 from typing import Dict, List, Optional, Tuple, Type, Union
+import os
 import pytest
 import torch
 from PIL.Image import Image
@@ -12,12 +13,13 @@ from vllm.multimodal.utils import cached_get_tokenizer, rescale_image_size
 from ....conftest import (IMAGE_ASSETS, HfRunner, ImageAsset, PromptImageInput,
                          VllmRunner, _ImageAssets)
 from ...utils import build_model_context, check_logprobs_close
+from ....utils import models_path_prefix
 text_only_models = [
-    "Qwen/Qwen-7B-Chat"  # Has no visual component
+    os.path.join(models_path_prefix, "Qwen/Qwen-7B-Chat")  # Has no visual component
 ]
-multimodal_models = ["Qwen/Qwen-VL"]
+multimodal_models = [os.path.join(models_path_prefix, "Qwen/Qwen-VL")]
 HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
    "stop_sign":

--- a/tests/models/embedding/language/test_embedding.py
+++ b/tests/models/embedding/language/test_embedding.py
@@ -2,12 +2,14 @@
 Run `pytest tests/models/test_llama_embedding.py`.
 """
+import os
 import pytest
 import torch
 import torch.nn.functional as F
+from ....utils import models_path_prefix
 MODELS = [
-    "intfloat/e5-mistral-7b-instruct",
+    os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"),
 ]

--- a/tests/models/encoder_decoder/language/test_bart.py
+++ b/tests/models/encoder_decoder/language/test_bart.py
@@ -2,9 +2,11 @@
 Run `pytest tests/models/encoder_decoder/language/test_bart.py`.
 """
+import os
 from typing import List, Optional, Tuple, Type
 from vllm.utils import is_cpu
+from ....utils import models_path_prefix
 if not is_cpu():
    # CPU backend is not currently supported with encoder/decoder models
@@ -21,7 +23,7 @@ if not is_cpu():
    from ....utils import multi_gpu_test
    from ...utils import check_logprobs_close
-    MODELS = ["facebook/bart-base", "facebook/bart-large-cnn"]
+    MODELS = [os.path.join(models_path_prefix, "facebook/bart-base"), os.path.join(models_path_prefix, "facebook/bart-large-cnn")]
    def vllm_to_hf_output(
        vllm_output: Tuple[List[int], str, Optional[SampleLogprobs]],

--- a/tests/models/encoder_decoder/vision_language/test_mllama.py
+++ b/tests/models/encoder_decoder/vision_language/test_mllama.py
 from typing import List, Optional, Tuple, Type, overload
+import os
 import pytest
 from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer,
                          BatchEncoding)
@@ -11,6 +12,7 @@ from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
                          _ImageAssets)
 from ....utils import multi_gpu_test
 from ...utils import check_logprobs_close
+from ....utils import models_path_prefix
 _LIMIT_IMAGE_PER_PROMPT = 1
@@ -26,7 +28,7 @@ text_only_prompts = [
 ]
 models = [
-    "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    os.path.join(models_path_prefix, "meta-llama/Llama-3.2-11B-Vision-Instruct"),
 ]

--- a/tests/mq_llm_engine/test_abort.py
+++ b/tests/mq_llm_engine/test_abort.py
@@ -5,11 +5,13 @@ import tempfile
 import uuid
 import pytest
+import os
 from tests.mq_llm_engine.utils import RemoteMQLLMEngine, generate
 from vllm.engine.arg_utils import AsyncEngineArgs
+from ..utils import models_path_prefix
-MODEL = "google/gemma-1.1-2b-it"
+MODEL = os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")
 ENGINE_ARGS = AsyncEngineArgs(model=MODEL)
 RAISED_ERROR = KeyError
 RAISED_VALUE = "foo"

--- a/tests/mq_llm_engine/test_error_handling.py
+++ b/tests/mq_llm_engine/test_error_handling.py
@@ -7,6 +7,7 @@ import uuid
 from unittest.mock import Mock
 import pytest
+import os
 from tests.mq_llm_engine.utils import RemoteMQLLMEngine
 from vllm import SamplingParams
@@ -19,8 +20,9 @@ from vllm.entrypoints.openai.cli_args import make_arg_parser
 from vllm.lora.request import LoRARequest
 from vllm.usage.usage_lib import UsageContext
 from vllm.utils import FlexibleArgumentParser
+from ..utils import models_path_prefix
-MODEL = "google/gemma-1.1-2b-it"
+MODEL = os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")
 ENGINE_ARGS = AsyncEngineArgs(model=MODEL)
 RAISED_ERROR = KeyError
 RAISED_VALUE = "foo"

--- a/tests/mq_llm_engine/test_load.py
+++ b/tests/mq_llm_engine/test_load.py
@@ -5,11 +5,13 @@ import tempfile
 import uuid
 import pytest
+import os
 from tests.mq_llm_engine.utils import RemoteMQLLMEngine, generate
 from vllm.engine.arg_utils import AsyncEngineArgs
+from ..utils import models_path_prefix
-MODEL = "google/gemma-1.1-2b-it"
+MODEL = os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")
 NUM_EXPECTED_TOKENS = 10
 NUM_REQUESTS = 10000

--- a/tests/multi_step/test_correctness_async_llm.py
+++ b/tests/multi_step/test_correctness_async_llm.py
@@ -2,15 +2,16 @@
 from typing import List, Optional
 import pytest
+import os
 from tests.kernels.utils import override_backend_env_variable
 from ..models.utils import check_logprobs_close
 from ..utils import (completions_with_server_args, get_client_text_generations,
-                     get_client_text_logprob_generations)
+                     get_client_text_logprob_generations, models_path_prefix)
 MODELS = [
-    "JackFram/llama-160m",
+    os.path.join(models_path_prefix, "JackFram/llama-160m"),
 ]
 NUM_SCHEDULER_STEPS = [8]  # Multi-step decoding steps
 NUM_PROMPTS = [10]

--- a/tests/multi_step/test_correctness_llm.py
+++ b/tests/multi_step/test_correctness_llm.py
@@ -3,11 +3,13 @@
 from typing import Optional
 import pytest
+import os
 from ..models.utils import check_logprobs_close, check_outputs_equal
+from ..utils import models_path_prefix
 MODELS = [
-    "JackFram/llama-160m",
+    os.path.join(models_path_prefix, "JackFram/llama-160m"),
 ]
 NUM_SCHEDULER_STEPS = [8]  # Multi-step decoding steps
 NUM_PROMPTS = [10]