Commit 3c9817d2 authored by zhuwenwen's avatar zhuwenwen
Browse files

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH to load models from local path...

add VLLM_OPTEST_MODELS_PATH/OPTEST_MODELS_PATH  to load models from local path instead of Hugging Face Hub
parent 49204f68
from typing import Optional from typing import Optional
import os
import pytest import pytest
import torch import torch
import torch.nn as nn import torch.nn as nn
...@@ -7,14 +8,15 @@ from huggingface_hub import snapshot_download ...@@ -7,14 +8,15 @@ from huggingface_hub import snapshot_download
from transformers import AutoConfig, AutoModel, CLIPImageProcessor from transformers import AutoConfig, AutoModel, CLIPImageProcessor
from ....conftest import _ImageAssets, cleanup from ....conftest import _ImageAssets, cleanup
from ....utils import models_path_prefix
# we use snapshot_download to prevent conflicts between # we use snapshot_download to prevent conflicts between
# dynamic_module and trust_remote_code for hf_runner # dynamic_module and trust_remote_code for hf_runner
DOWNLOAD_PATTERN = ["*.json", "*.py", "*.safetensors", "*.txt", "*.model"] DOWNLOAD_PATTERN = ["*.json", "*.py", "*.safetensors", "*.txt", "*.model"]
models = [ models = [
snapshot_download("OpenGVLab/InternViT-300M-448px", snapshot_download(os.path.join(models_path_prefix, "OpenGVLab/InternViT-300M-448px"),
allow_patterns=DOWNLOAD_PATTERN), allow_patterns=DOWNLOAD_PATTERN),
snapshot_download("OpenGVLab/InternViT-6B-448px-V1-5", snapshot_download(os.path.join(models_path_prefix, "OpenGVLab/InternViT-6B-448px-V1-5"),
allow_patterns=DOWNLOAD_PATTERN), allow_patterns=DOWNLOAD_PATTERN),
] ]
......
import types import types
from typing import List, Optional, Tuple, Type, Union from typing import List, Optional, Tuple, Type, Union
import os
import pytest import pytest
import torch import torch
from PIL.Image import Image from PIL.Image import Image
...@@ -12,6 +13,7 @@ from vllm.utils import is_cpu ...@@ -12,6 +13,7 @@ from vllm.utils import is_cpu
from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner, from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets) _ImageAssets)
from ...utils import check_logprobs_close from ...utils import check_logprobs_close
from ....utils import models_path_prefix
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign": "stop_sign":
...@@ -22,8 +24,8 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ ...@@ -22,8 +24,8 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
HF_MULTIIMAGE_IMAGE_PROMPT = "<|im_start|>User\nImage-1: <image>\nImage-2: <image>\nDescribe the two images in detail.<|im_end|>\n<|im_start|>Assistant\n" # noqa: E501 HF_MULTIIMAGE_IMAGE_PROMPT = "<|im_start|>User\nImage-1: <image>\nImage-2: <image>\nDescribe the two images in detail.<|im_end|>\n<|im_start|>Assistant\n" # noqa: E501
models = [ models = [
"OpenGVLab/InternVL2-1B", os.path.join(models_path_prefix, "OpenGVLab/InternVL2-1B"),
"OpenGVLab/InternVL2-2B", os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B"),
# Broken due to outdated implementation of Phi-3 # Broken due to outdated implementation of Phi-3
# See: https://huggingface.co/OpenGVLab/InternVL2-4B/discussions/3 # See: https://huggingface.co/OpenGVLab/InternVL2-4B/discussions/3
# "OpenGVLab/InternVL2-4B", # "OpenGVLab/InternVL2-4B",
...@@ -365,7 +367,7 @@ def test_different_num_patches(hf_runner, vllm_runner, image_assets, model, ...@@ -365,7 +367,7 @@ def test_different_num_patches(hf_runner, vllm_runner, image_assets, model,
@pytest.mark.parametrize( @pytest.mark.parametrize(
"models", [("OpenGVLab/InternVL2-2B", "OpenGVLab/InternVL2-2B-AWQ")]) "models", [(os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B"), os.path.join(models_path_prefix, "OpenGVLab/InternVL2-2B-AWQ"))])
@pytest.mark.parametrize( @pytest.mark.parametrize(
"size_factors", "size_factors",
[ [
......
from typing import List, Optional, Tuple, Type, overload from typing import List, Optional, Tuple, Type, overload
import os
import pytest import pytest
from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer, from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer,
BatchEncoding) BatchEncoding)
...@@ -11,6 +12,7 @@ from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE ...@@ -11,6 +12,7 @@ from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner, from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets) _ImageAssets)
from ...utils import check_logprobs_close from ...utils import check_logprobs_close
from ....utils import models_path_prefix
_LIMIT_IMAGE_PER_PROMPT = 4 _LIMIT_IMAGE_PER_PROMPT = 4
...@@ -22,7 +24,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ ...@@ -22,7 +24,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
}) })
models = [ models = [
"llava-hf/llava-1.5-7b-hf", os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
# TODO: Get this model to produce meaningful output in vLLM # TODO: Get this model to produce meaningful output in vLLM
# "TIGER-Lab/Mantis-8B-siglip-llama3", # "TIGER-Lab/Mantis-8B-siglip-llama3",
] ]
......
from typing import List, Optional, Tuple, Type from typing import List, Optional, Tuple, Type
import os
import pytest import pytest
from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
...@@ -7,6 +8,7 @@ from vllm.sequence import SampleLogprobs ...@@ -7,6 +8,7 @@ from vllm.sequence import SampleLogprobs
from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from ...utils import check_logprobs_close from ...utils import check_logprobs_close
from ....utils import models_path_prefix
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign": "stop_sign":
...@@ -16,7 +18,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ ...@@ -16,7 +18,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
}) })
models = [ models = [
"llava-hf/llava-1.5-7b-hf", os.path.join(models_path_prefix, "llava-hf/llava-1.5-7b-hf"),
] ]
......
from typing import List, Optional, Tuple, Type, overload from typing import List, Optional, Tuple, Type, overload
import os
import pytest import pytest
from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
...@@ -9,6 +10,7 @@ from vllm.sequence import SampleLogprobs ...@@ -9,6 +10,7 @@ from vllm.sequence import SampleLogprobs
from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner, from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets) _ImageAssets)
from ...utils import check_logprobs_close from ...utils import check_logprobs_close
from ....utils import models_path_prefix
_LIMIT_IMAGE_PER_PROMPT = 4 _LIMIT_IMAGE_PER_PROMPT = 4
...@@ -19,7 +21,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ ...@@ -19,7 +21,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"[INST] <image>\nWhat is the season? [/INST]", "[INST] <image>\nWhat is the season? [/INST]",
}) })
models = ["llava-hf/llava-v1.6-mistral-7b-hf"] models = [os.path.join(models_path_prefix, "llava-hf/llava-v1.6-mistral-7b-hf")]
def vllm_to_hf_output(vllm_output: Tuple[List[int], str, def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
......
from typing import List, Optional, Tuple, Type, overload from typing import List, Optional, Tuple, Type, overload
import os
import pytest import pytest
import transformers import transformers
from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
...@@ -10,6 +11,7 @@ from vllm.sequence import SampleLogprobs ...@@ -10,6 +11,7 @@ from vllm.sequence import SampleLogprobs
from ....conftest import VIDEO_ASSETS, HfRunner, VllmRunner, _VideoAssets from ....conftest import VIDEO_ASSETS, HfRunner, VllmRunner, _VideoAssets
from ...utils import check_logprobs_close from ...utils import check_logprobs_close
from ....utils import models_path_prefix
_PREFACE = ( _PREFACE = (
"A chat between a curious human and an artificial intelligence assistant. " "A chat between a curious human and an artificial intelligence assistant. "
...@@ -21,7 +23,7 @@ HF_VIDEO_PROMPTS = VIDEO_ASSETS.prompts({ ...@@ -21,7 +23,7 @@ HF_VIDEO_PROMPTS = VIDEO_ASSETS.prompts({
f"{_PREFACE}USER: <video>\nWhy is this video funny? ASSISTANT:" f"{_PREFACE}USER: <video>\nWhy is this video funny? ASSISTANT:"
}) })
models = ["llava-hf/LLaVA-NeXT-Video-7B-hf"] models = [os.path.join(models_path_prefix, "llava-hf/LLaVA-NeXT-Video-7B-hf")]
def vllm_to_hf_output(vllm_output: Tuple[List[int], str, def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
......
from typing import List, Optional, Tuple, Type, overload from typing import List, Optional, Tuple, Type, overload
import os
import pytest import pytest
import transformers import transformers
from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer, from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer,
...@@ -13,6 +14,7 @@ from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE ...@@ -13,6 +14,7 @@ from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
from ....conftest import (VIDEO_ASSETS, HfRunner, PromptImageInput, VllmRunner, from ....conftest import (VIDEO_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_VideoAssets) _VideoAssets)
from ...utils import check_logprobs_close from ...utils import check_logprobs_close
from ....utils import models_path_prefix
# Video test # Video test
HF_VIDEO_PROMPTS = VIDEO_ASSETS.prompts({ HF_VIDEO_PROMPTS = VIDEO_ASSETS.prompts({
...@@ -21,7 +23,7 @@ HF_VIDEO_PROMPTS = VIDEO_ASSETS.prompts({ ...@@ -21,7 +23,7 @@ HF_VIDEO_PROMPTS = VIDEO_ASSETS.prompts({
<|im_end|><|im_start|>assistant\n" <|im_end|><|im_start|>assistant\n"
}) })
models = ["llava-hf/llava-onevision-qwen2-7b-ov-hf"] models = [os.path.join(models_path_prefix, "llava-hf/llava-onevision-qwen2-7b-ov-hf")]
def vllm_to_hf_output(vllm_output: Tuple[List[int], str, def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
......
from typing import List, Optional, Tuple, Type, Union from typing import List, Optional, Tuple, Type, Union
import os
import pytest import pytest
import torch import torch
import torch.types import torch.types
...@@ -11,6 +12,7 @@ from vllm.sequence import SampleLogprobs ...@@ -11,6 +12,7 @@ from vllm.sequence import SampleLogprobs
from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner
from ...utils import check_logprobs_close from ...utils import check_logprobs_close
from ....utils import models_path_prefix
# The image token is placed before "user" on purpose so that the test can pass # The image token is placed before "user" on purpose so that the test can pass
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
...@@ -29,7 +31,7 @@ HF_MULTIIMAGE_IMAGE_PROMPT = \ ...@@ -29,7 +31,7 @@ HF_MULTIIMAGE_IMAGE_PROMPT = \
"Describe these images.<|eot_id|>" \ "Describe these images.<|eot_id|>" \
"<|start_header_id|>assistant<|end_header_id|>\n\n" "<|start_header_id|>assistant<|end_header_id|>\n\n"
models = ["openbmb/MiniCPM-Llama3-V-2_5"] models = [os.path.join(models_path_prefix, "openbmb/MiniCPM-Llama3-V-2_5")]
def _wrap_inputs(hf_inputs: BatchEncoding) -> BatchEncoding: def _wrap_inputs(hf_inputs: BatchEncoding) -> BatchEncoding:
......
...@@ -10,6 +10,7 @@ from vllm.utils import is_hip ...@@ -10,6 +10,7 @@ from vllm.utils import is_hip
from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from ...utils import check_logprobs_close from ...utils import check_logprobs_close
from ....utils import models_path_prefix
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign": "stop_sign":
...@@ -18,7 +19,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ ...@@ -18,7 +19,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"What is in the picture?", "What is in the picture?",
}) })
models = ["google/paligemma-3b-mix-224"] models = [os.path.join(models_path_prefix, "google/paligemma-3b-mix-224")]
# ROCm Triton FA can run into compilation issues with these models due to, # ROCm Triton FA can run into compilation issues with these models due to,
# excessive use of shared memory. Use other backends in the meantime. # excessive use of shared memory. Use other backends in the meantime.
......
...@@ -16,6 +16,7 @@ from vllm.utils import is_cpu, is_hip ...@@ -16,6 +16,7 @@ from vllm.utils import is_cpu, is_hip
from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner, from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets) _ImageAssets)
from ...utils import build_model_context, check_logprobs_close from ...utils import build_model_context, check_logprobs_close
from ....utils import models_path_prefix
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign": "stop_sign":
...@@ -25,7 +26,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ ...@@ -25,7 +26,7 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
}) })
HF_MULTIIMAGE_IMAGE_PROMPT = "<|user|>\n<|image_1|>\n<|image_2|>\nDescribe these images.<|end|>\n<|assistant|>\n" # noqa: E501 HF_MULTIIMAGE_IMAGE_PROMPT = "<|user|>\n<|image_1|>\n<|image_2|>\nDescribe these images.<|end|>\n<|assistant|>\n" # noqa: E501
models = ["microsoft/Phi-3.5-vision-instruct"] models = [os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")]
def vllm_to_hf_output(vllm_output: Tuple[List[int], str, def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
......
...@@ -7,6 +7,7 @@ import uuid ...@@ -7,6 +7,7 @@ import uuid
from dataclasses import asdict from dataclasses import asdict
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
import os
import pytest import pytest
from mistral_common.protocol.instruct.messages import ImageURLChunk from mistral_common.protocol.instruct.messages import ImageURLChunk
from mistral_common.protocol.instruct.request import ChatCompletionRequest from mistral_common.protocol.instruct.request import ChatCompletionRequest
...@@ -19,11 +20,12 @@ from vllm.sequence import Logprob, SampleLogprobs ...@@ -19,11 +20,12 @@ from vllm.sequence import Logprob, SampleLogprobs
from ....utils import VLLM_PATH from ....utils import VLLM_PATH
from ...utils import check_logprobs_close from ...utils import check_logprobs_close
from ....utils import models_path_prefix
if TYPE_CHECKING: if TYPE_CHECKING:
from _typeshed import StrPath from _typeshed import StrPath
MODELS = ["mistralai/Pixtral-12B-2409"] MODELS = [os.path.join(models_path_prefix, "mistralai/Pixtral-12B-2409")]
IMG_URLS = [ IMG_URLS = [
"https://picsum.photos/id/237/400/300", "https://picsum.photos/id/237/400/300",
"https://picsum.photos/id/231/200/300", "https://picsum.photos/id/231/200/300",
......
import pathlib import pathlib
from typing import Dict, List, Optional, Tuple, Type, Union from typing import Dict, List, Optional, Tuple, Type, Union
import os
import pytest import pytest
import torch import torch
from PIL.Image import Image from PIL.Image import Image
...@@ -12,12 +13,13 @@ from vllm.multimodal.utils import cached_get_tokenizer, rescale_image_size ...@@ -12,12 +13,13 @@ from vllm.multimodal.utils import cached_get_tokenizer, rescale_image_size
from ....conftest import (IMAGE_ASSETS, HfRunner, ImageAsset, PromptImageInput, from ....conftest import (IMAGE_ASSETS, HfRunner, ImageAsset, PromptImageInput,
VllmRunner, _ImageAssets) VllmRunner, _ImageAssets)
from ...utils import build_model_context, check_logprobs_close from ...utils import build_model_context, check_logprobs_close
from ....utils import models_path_prefix
text_only_models = [ text_only_models = [
"Qwen/Qwen-7B-Chat" # Has no visual component os.path.join(models_path_prefix, "Qwen/Qwen-7B-Chat") # Has no visual component
] ]
multimodal_models = ["Qwen/Qwen-VL"] multimodal_models = [os.path.join(models_path_prefix, "Qwen/Qwen-VL")]
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign": "stop_sign":
......
...@@ -2,12 +2,14 @@ ...@@ -2,12 +2,14 @@
Run `pytest tests/models/test_llama_embedding.py`. Run `pytest tests/models/test_llama_embedding.py`.
""" """
import os
import pytest import pytest
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from ....utils import models_path_prefix
MODELS = [ MODELS = [
"intfloat/e5-mistral-7b-instruct", os.path.join(models_path_prefix, "intfloat/e5-mistral-7b-instruct"),
] ]
......
...@@ -2,9 +2,11 @@ ...@@ -2,9 +2,11 @@
Run `pytest tests/models/encoder_decoder/language/test_bart.py`. Run `pytest tests/models/encoder_decoder/language/test_bart.py`.
""" """
import os
from typing import List, Optional, Tuple, Type from typing import List, Optional, Tuple, Type
from vllm.utils import is_cpu from vllm.utils import is_cpu
from ....utils import models_path_prefix
if not is_cpu(): if not is_cpu():
# CPU backend is not currently supported with encoder/decoder models # CPU backend is not currently supported with encoder/decoder models
...@@ -21,7 +23,7 @@ if not is_cpu(): ...@@ -21,7 +23,7 @@ if not is_cpu():
from ....utils import multi_gpu_test from ....utils import multi_gpu_test
from ...utils import check_logprobs_close from ...utils import check_logprobs_close
MODELS = ["facebook/bart-base", "facebook/bart-large-cnn"] MODELS = [os.path.join(models_path_prefix, "facebook/bart-base"), os.path.join(models_path_prefix, "facebook/bart-large-cnn")]
def vllm_to_hf_output( def vllm_to_hf_output(
vllm_output: Tuple[List[int], str, Optional[SampleLogprobs]], vllm_output: Tuple[List[int], str, Optional[SampleLogprobs]],
......
from typing import List, Optional, Tuple, Type, overload from typing import List, Optional, Tuple, Type, overload
import os
import pytest import pytest
from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer, from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer,
BatchEncoding) BatchEncoding)
...@@ -11,6 +12,7 @@ from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner, ...@@ -11,6 +12,7 @@ from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets) _ImageAssets)
from ....utils import multi_gpu_test from ....utils import multi_gpu_test
from ...utils import check_logprobs_close from ...utils import check_logprobs_close
from ....utils import models_path_prefix
_LIMIT_IMAGE_PER_PROMPT = 1 _LIMIT_IMAGE_PER_PROMPT = 1
...@@ -26,7 +28,7 @@ text_only_prompts = [ ...@@ -26,7 +28,7 @@ text_only_prompts = [
] ]
models = [ models = [
"meta-llama/Llama-3.2-11B-Vision-Instruct", os.path.join(models_path_prefix, "meta-llama/Llama-3.2-11B-Vision-Instruct"),
] ]
......
...@@ -5,11 +5,13 @@ import tempfile ...@@ -5,11 +5,13 @@ import tempfile
import uuid import uuid
import pytest import pytest
import os
from tests.mq_llm_engine.utils import RemoteMQLLMEngine, generate from tests.mq_llm_engine.utils import RemoteMQLLMEngine, generate
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from ..utils import models_path_prefix
MODEL = "google/gemma-1.1-2b-it" MODEL = os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")
ENGINE_ARGS = AsyncEngineArgs(model=MODEL) ENGINE_ARGS = AsyncEngineArgs(model=MODEL)
RAISED_ERROR = KeyError RAISED_ERROR = KeyError
RAISED_VALUE = "foo" RAISED_VALUE = "foo"
......
...@@ -7,6 +7,7 @@ import uuid ...@@ -7,6 +7,7 @@ import uuid
from unittest.mock import Mock from unittest.mock import Mock
import pytest import pytest
import os
from tests.mq_llm_engine.utils import RemoteMQLLMEngine from tests.mq_llm_engine.utils import RemoteMQLLMEngine
from vllm import SamplingParams from vllm import SamplingParams
...@@ -19,8 +20,9 @@ from vllm.entrypoints.openai.cli_args import make_arg_parser ...@@ -19,8 +20,9 @@ from vllm.entrypoints.openai.cli_args import make_arg_parser
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils import FlexibleArgumentParser from vllm.utils import FlexibleArgumentParser
from ..utils import models_path_prefix
MODEL = "google/gemma-1.1-2b-it" MODEL = os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")
ENGINE_ARGS = AsyncEngineArgs(model=MODEL) ENGINE_ARGS = AsyncEngineArgs(model=MODEL)
RAISED_ERROR = KeyError RAISED_ERROR = KeyError
RAISED_VALUE = "foo" RAISED_VALUE = "foo"
......
...@@ -5,11 +5,13 @@ import tempfile ...@@ -5,11 +5,13 @@ import tempfile
import uuid import uuid
import pytest import pytest
import os
from tests.mq_llm_engine.utils import RemoteMQLLMEngine, generate from tests.mq_llm_engine.utils import RemoteMQLLMEngine, generate
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from ..utils import models_path_prefix
MODEL = "google/gemma-1.1-2b-it" MODEL = os.path.join(models_path_prefix, "google/gemma-1.1-2b-it")
NUM_EXPECTED_TOKENS = 10 NUM_EXPECTED_TOKENS = 10
NUM_REQUESTS = 10000 NUM_REQUESTS = 10000
......
...@@ -2,15 +2,16 @@ ...@@ -2,15 +2,16 @@
from typing import List, Optional from typing import List, Optional
import pytest import pytest
import os
from tests.kernels.utils import override_backend_env_variable from tests.kernels.utils import override_backend_env_variable
from ..models.utils import check_logprobs_close from ..models.utils import check_logprobs_close
from ..utils import (completions_with_server_args, get_client_text_generations, from ..utils import (completions_with_server_args, get_client_text_generations,
get_client_text_logprob_generations) get_client_text_logprob_generations, models_path_prefix)
MODELS = [ MODELS = [
"JackFram/llama-160m", os.path.join(models_path_prefix, "JackFram/llama-160m"),
] ]
NUM_SCHEDULER_STEPS = [8] # Multi-step decoding steps NUM_SCHEDULER_STEPS = [8] # Multi-step decoding steps
NUM_PROMPTS = [10] NUM_PROMPTS = [10]
......
...@@ -3,11 +3,13 @@ ...@@ -3,11 +3,13 @@
from typing import Optional from typing import Optional
import pytest import pytest
import os
from ..models.utils import check_logprobs_close, check_outputs_equal from ..models.utils import check_logprobs_close, check_outputs_equal
from ..utils import models_path_prefix
MODELS = [ MODELS = [
"JackFram/llama-160m", os.path.join(models_path_prefix, "JackFram/llama-160m"),
] ]
NUM_SCHEDULER_STEPS = [8] # Multi-step decoding steps NUM_SCHEDULER_STEPS = [8] # Multi-step decoding steps
NUM_PROMPTS = [10] NUM_PROMPTS = [10]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment