Commit 3c9817d2 authored by zhuwenwen's avatar zhuwenwen
Browse files

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH to load models from local path...

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH  to load models from local path instead of Hugging Face Hub
parent 49204f68
from typing import Optional
import os
import pytest
import torch
import torch.nn as nn
......@@ -7,14 +8,15 @@ from huggingface_hub import snapshot_download
from transformers import AutoConfig, AutoModel, CLIPImageProcessor
from ....conftest import _ImageAssets, cleanup
from ....utils import models_path_prefix
# we use snapshot_download to prevent conflicts between
# dynamic_module and trust_remote_code for hf_runner
DOWNLOAD_PATTERN = ["*.json", "*.py", "*.safetensors", "*.txt", "*.model"]
models = [
snapshot_download("OpenGVLab/InternViT-300M-448px",
snapshot_download(os.path.join(models_path_prefix, "OpenGVLab/InternViT-300M-448px"),
allow_patterns=DOWNLOAD_PATTERN),
snapshot_download("OpenGVLab/InternViT-6B-448px-V1-5",
snapshot_download(os.path.join(models_path_prefix, "OpenGVLab/InternViT-6B-448px-V1-5"),
allow_patterns=DOWNLOAD_PATTERN),
]
......
import types
from typing import List, Optional, Tuple, Type, Union
import os
import pytest
import torch
from PIL.Image import Image
......@@ -12,6 +13,7 @@ from vllm.utils import is_cpu
from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets)
from ...utils import check_logprobs_close
from ....utils import models_path_prefix
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
......@@ -22,8 +24,8 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
HF_MULTIIMAGE_IMAGE_PROMPT = "<|im_start|>User\nImage-1: <image>\nImage-2: <image>\nDescribe the two images in detail.<|im_end|>\n<|im_start|>Assistant\n" # noqa: E501
models = [
"OpenGVLab/InternVL2-1B",
"OpenGVLab/InternVL2-2B",
os.path.join(models_path_prefix, "OpenGVLab/InternVL2-1B"),
os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B"),
# Broken due to outdated implementation of Phi-3
# See: https://huggingface.co/OpenGVLab/InternVL2-4B/discussions/3
# "OpenGVLab/InternVL2-4B",
......@@ -365,7 +367,7 @@ def test_different_num_patches(hf_runner, vllm_runner, image_assets, model,
@pytest.mark.parametrize(
"models", [("OpenGVLab/InternVL2-2B", "OpenGVLab/InternVL2-2B-AWQ")])
"models", [(os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B"), os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B-AWQ"))])
@pytest.mark.parametrize(
"size_factors",
[
......
from typing import List, Optional, Tuple, Type, overload
import os
import pytest
from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer,
BatchEncoding)
......@@ -11,6 +12,7 @@ from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets)
from ...utils import check_logprobs_close
from ....utils import models_path_prefix
_LIMIT_IMAGE_PER_PROMPT = 4
......@@ -22,7 +24,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
})
models = [
"llava-hf/llava-1.5-7b-hf",
os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
# TODO: Get this model to produce meaningful output in vLLM
# "TIGER-Lab/Mantis-8B-siglip-llama3",
]
......
from typing import List, Optional, Tuple, Type
import os
import pytest
from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
......@@ -7,6 +8,7 @@ from vllm.sequence import SampleLogprobs
from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from ...utils import check_logprobs_close
from ....utils import models_path_prefix
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
......@@ -16,7 +18,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
})
models = [
"llava-hf/llava-1.5-7b-hf",
os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
]
......
from typing import List, Optional, Tuple, Type, overload
import os
import pytest
from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
......@@ -9,6 +10,7 @@ from vllm.sequence import SampleLogprobs
from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets)
from ...utils import check_logprobs_close
from ....utils import models_path_prefix
_LIMIT_IMAGE_PER_PROMPT = 4
......@@ -19,7 +21,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"[INST] <image>\nWhat is the season? [/INST]",
})
models = ["llava-hf/llava-v1.6-mistral-7b-hf"]
models = [os.path.join(models_path_prefix, "llava-hf/llava-v1.6-mistral-7b-hf")]
def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
......
from typing import List, Optional, Tuple, Type, overload
import os
import pytest
import transformers
from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
......@@ -10,6 +11,7 @@ from vllm.sequence import SampleLogprobs
from ....conftest import VIDEO_ASSETS, HfRunner, VllmRunner, _VideoAssets
from ...utils import check_logprobs_close
from ....utils import models_path_prefix
_PREFACE = (
"A chat between a curious human and an artificial intelligence assistant. "
......@@ -21,7 +23,7 @@ HF_VIDEO_PROMPTS = VIDEO_ASSETS.prompts({
f"{_PREFACE}USER: <video>\nWhy is this video funny? ASSISTANT:"
})
models = ["llava-hf/LLaVA-NeXT-Video-7B-hf"]
models = [os.path.join(models_path_prefix, "llava-hf/LLaVA-NeXT-Video-7B-hf")]
def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
......
from typing import List, Optional, Tuple, Type, overload
import os
import pytest
import transformers
from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer,
......@@ -13,6 +14,7 @@ from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
from ....conftest import (VIDEO_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_VideoAssets)
from ...utils import check_logprobs_close
from ....utils import models_path_prefix
# Video test
HF_VIDEO_PROMPTS = VIDEO_ASSETS.prompts({
......@@ -21,7 +23,7 @@ HF_VIDEO_PROMPTS = VIDEO_ASSETS.prompts({
<|im_end|><|im_start|>assistant\n"
})
models = ["llava-hf/llava-onevision-qwen2-7b-ov-hf"]
models = [os.path.join(models_path_prefix, "llava-hf/llava-onevision-qwen2-7b-ov-hf")]
def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
......
from typing import List, Optional, Tuple, Type, Union
import os
import pytest
import torch
import torch.types
......@@ -11,6 +12,7 @@ from vllm.sequence import SampleLogprobs
from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner
from ...utils import check_logprobs_close
from ....utils import models_path_prefix
# The image token is placed before "user" on purpose so that the test can pass
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
......@@ -29,7 +31,7 @@ HF_MULTIIMAGE_IMAGE_PROMPT = \
"Describe these images.<|eot_id|>" \
"<|start_header_id|>assistant<|end_header_id|>\n\n"
models = ["openbmb/MiniCPM-Llama3-V-2_5"]
models = [os.path.join(models_path_prefix, "openbmb/MiniCPM-Llama3-V-2_5")]
def _wrap_inputs(hf_inputs: BatchEncoding) -> BatchEncoding:
......
......@@ -10,6 +10,7 @@ from vllm.utils import is_hip
from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from ...utils import check_logprobs_close
from ....utils import models_path_prefix
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
......@@ -18,7 +19,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"What is in the picture?",
})
models = ["google/paligemma-3b-mix-224"]
models = [os.path.join(models_path_prefix, "google/paligemma-3b-mix-224")]
# ROCm Triton FA can run into compilation issues with these models due to,
# excessive use of shared memory. Use other backends in the meantime.
......
......@@ -16,6 +16,7 @@ from vllm.utils import is_cpu, is_hip
from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets)
from ...utils import build_model_context, check_logprobs_close
from ....utils import models_path_prefix
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
......@@ -25,7 +26,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
})
HF_MULTIIMAGE_IMAGE_PROMPT = "<|user|>\n<|image_1|>\n<|image_2|>\nDescribe these images.<|end|>\n<|assistant|>\n" # noqa: E501
models = ["microsoft/Phi-3.5-vision-instruct"]
models = [os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")]
def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
......
......@@ -7,6 +7,7 @@ import uuid
from dataclasses import asdict
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
import os
import pytest
from mistral_common.protocol.instruct.messages import ImageURLChunk
from mistral_common.protocol.instruct.request import ChatCompletionRequest
......@@ -19,11 +20,12 @@ from vllm.sequence import Logprob, SampleLogprobs
from ....utils import VLLM_PATH
from ...utils import check_logprobs_close
from ....utils import models_path_prefix
if TYPE_CHECKING:
from _typeshed import StrPath
MODELS = ["mistralai/Pixtral-12B-2409"]
MODELS = [os.path.join(models_path_prefix, "mistralai/Pixtral-12B-2409")]
IMG_URLS = [
"https://picsum.photos/id/237/400/300",
"https://picsum.photos/id/231/200/300",
......
import pathlib
from typing import Dict, List, Optional, Tuple, Type, Union
import os
import pytest
import torch
from PIL.Image import Image
......@@ -12,12 +13,13 @@ from vllm.multimodal.utils import cached_get_tokenizer, rescale_image_size
from ....conftest import (IMAGE_ASSETS, HfRunner, ImageAsset, PromptImageInput,
VllmRunner, _ImageAssets)
from ...utils import build_model_context, check_logprobs_close
from ....utils import models_path_prefix
text_only_models = [
"Qwen/Qwen-7B-Chat" # Has no visual component
os.path.join(models_path_prefix, "Qwen/Qwen-7B-Chat") # Has no visual component
]
multimodal_models = ["Qwen/Qwen-VL"]
multimodal_models = [os.path.join(models_path_prefix, "Qwen/Qwen-VL")]
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
......
......@@ -2,12 +2,14 @@
Run `pytest tests/models/test_llama_embedding.py`.
"""
import os
import pytest
import torch
import torch.nn.functional as F
from ....utils import models_path_prefix
MODELS = [
"intfloat/e5-mistral-7b-instruct",
os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"),
]
......
......@@ -2,9 +2,11 @@
Run `pytest tests/models/encoder_decoder/language/test_bart.py`.
"""
import os
from typing import List, Optional, Tuple, Type
from vllm.utils import is_cpu
from ....utils import models_path_prefix
if not is_cpu():
# CPU backend is not currently supported with encoder/decoder models
......@@ -21,7 +23,7 @@ if not is_cpu():
from ....utils import multi_gpu_test
from ...utils import check_logprobs_close
MODELS = ["facebook/bart-base", "facebook/bart-large-cnn"]
MODELS = [os.path.join(models_path_prefix, "facebook/bart-base"), os.path.join(models_path_prefix, "facebook/bart-large-cnn")]
def vllm_to_hf_output(
vllm_output: Tuple[List[int], str, Optional[SampleLogprobs]],
......
from typing import List, Optional, Tuple, Type, overload
import os
import pytest
from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer,
BatchEncoding)
......@@ -11,6 +12,7 @@ from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets)
from ....utils import multi_gpu_test
from ...utils import check_logprobs_close
from ....utils import models_path_prefix
_LIMIT_IMAGE_PER_PROMPT = 1
......@@ -26,7 +28,7 @@ text_only_prompts = [
]
models = [
"meta-llama/Llama-3.2-11B-Vision-Instruct",
os.path.join(models_path_prefix, "meta-llama/Llama-3.2-11B-Vision-Instruct"),
]
......
......@@ -5,11 +5,13 @@ import tempfile
import uuid
import pytest
import os
from tests.mq_llm_engine.utils import RemoteMQLLMEngine, generate
from vllm.engine.arg_utils import AsyncEngineArgs
from ..utils import models_path_prefix
MODEL = "google/gemma-1.1-2b-it"
MODEL = os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")
ENGINE_ARGS = AsyncEngineArgs(model=MODEL)
RAISED_ERROR = KeyError
RAISED_VALUE = "foo"
......
......@@ -7,6 +7,7 @@ import uuid
from unittest.mock import Mock
import pytest
import os
from tests.mq_llm_engine.utils import RemoteMQLLMEngine
from vllm import SamplingParams
......@@ -19,8 +20,9 @@ from vllm.entrypoints.openai.cli_args import make_arg_parser
from vllm.lora.request import LoRARequest
from vllm.usage.usage_lib import UsageContext
from vllm.utils import FlexibleArgumentParser
from ..utils import models_path_prefix
MODEL = "google/gemma-1.1-2b-it"
MODEL = os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")
ENGINE_ARGS = AsyncEngineArgs(model=MODEL)
RAISED_ERROR = KeyError
RAISED_VALUE = "foo"
......
......@@ -5,11 +5,13 @@ import tempfile
import uuid
import pytest
import os
from tests.mq_llm_engine.utils import RemoteMQLLMEngine, generate
from vllm.engine.arg_utils import AsyncEngineArgs
from ..utils import models_path_prefix
MODEL = "google/gemma-1.1-2b-it"
MODEL = os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")
NUM_EXPECTED_TOKENS = 10
NUM_REQUESTS = 10000
......
......@@ -2,15 +2,16 @@
from typing import List, Optional
import pytest
import os
from tests.kernels.utils import override_backend_env_variable
from ..models.utils import check_logprobs_close
from ..utils import (completions_with_server_args, get_client_text_generations,
get_client_text_logprob_generations)
get_client_text_logprob_generations, models_path_prefix)
MODELS = [
"JackFram/llama-160m",
os.path.join(models_path_prefix, "JackFram/llama-160m"),
]
NUM_SCHEDULER_STEPS = [8] # Multi-step decoding steps
NUM_PROMPTS = [10]
......
......@@ -3,11 +3,13 @@
from typing import Optional
import pytest
import os
from ..models.utils import check_logprobs_close, check_outputs_equal
from ..utils import models_path_prefix
MODELS = [
"JackFram/llama-160m",
os.path.join(models_path_prefix, "JackFram/llama-160m"),
]
NUM_SCHEDULER_STEPS = [8] # Multi-step decoding steps
NUM_PROMPTS = [10]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment