Unverified Commit cf069aa8 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Update deprecated Python 3.8 typing (#13971)

parent bf33700e
......@@ -3,7 +3,6 @@
# Adapted from
# https://github.com/fmmoret/vllm/blob/fm-support-lora-on-quantized-models/tests/lora/test_llama.py
from dataclasses import dataclass
from typing import List
import pytest
......@@ -19,7 +18,7 @@ class ModelWithQuantization:
quantization: str
MODELS: List[ModelWithQuantization]
MODELS: list[ModelWithQuantization]
#AWQ quantization is currently not supported in ROCm.
if current_platform.is_rocm():
MODELS = [
......@@ -41,7 +40,7 @@ else:
def do_sample(llm: vllm.LLM,
lora_path: str,
lora_id: int,
max_tokens: int = 256) -> List[str]:
max_tokens: int = 256) -> list[str]:
raw_prompts = [
"Give me an orange-ish brown color",
"Give me a neon pink color",
......@@ -61,7 +60,7 @@ def do_sample(llm: vllm.LLM,
lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
if lora_id else None)
# Print the outputs.
generated_texts: List[str] = []
generated_texts: list[str] = []
for output in outputs:
prompt = output.prompt
generated_text = output.outputs[0].text
......
# SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass
from typing import Dict, List, Optional
from typing import Optional
import pytest
from packaging.version import Version
......@@ -20,7 +20,7 @@ class TestConfig:
max_loras: int = 2
max_lora_rank: int = 16
max_model_len: int = 4096
mm_processor_kwargs: Optional[Dict[str, int]] = None
mm_processor_kwargs: Optional[dict[str, int]] = None
def __post_init__(self):
if self.mm_processor_kwargs is None:
......@@ -57,11 +57,11 @@ class Qwen2VLTester:
)
def run_test(self,
images: List[ImageAsset],
expected_outputs: List[str],
images: list[ImageAsset],
expected_outputs: list[str],
lora_id: Optional[int] = None,
temperature: float = 0,
max_tokens: int = 5) -> List[str]:
max_tokens: int = 5) -> list[str]:
sampling_params = vllm.SamplingParams(
temperature=temperature,
......
# SPDX-License-Identifier: Apache-2.0
from typing import List
import pytest
import vllm
......@@ -21,7 +19,7 @@ EXPECTED_LORA_OUTPUT = [
]
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
prompts = [
PROMPT_TEMPLATE.format(query="How many singers do we have?"),
PROMPT_TEMPLATE.format(
......@@ -40,7 +38,7 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
if lora_id else None)
# Print the outputs.
generated_texts: List[str] = []
generated_texts: list[str] = []
for output in outputs:
prompt = output.prompt
generated_text = output.outputs[0].text.strip()
......
......@@ -3,7 +3,6 @@
import shutil
from os import path
from tempfile import TemporaryDirectory
from typing import List, Tuple
import torch
from huggingface_hub import snapshot_download
......@@ -86,8 +85,8 @@ def test_ultravox_lora(vllm_runner):
dtype="bfloat16",
max_model_len=1024,
) as vllm_model:
ultravox_outputs: List[Tuple[
List[int], str]] = vllm_model.generate_greedy(
ultravox_outputs: list[tuple[
list[int], str]] = vllm_model.generate_greedy(
[
_get_prompt(0, PROMPT, VLLM_PLACEHOLDER,
ULTRAVOX_MODEL_NAME)
......@@ -108,7 +107,7 @@ def test_ultravox_lora(vllm_runner):
dtype="bfloat16",
max_model_len=1024,
) as vllm_model:
llama_outputs: List[Tuple[List[int], str]] = (
llama_outputs: list[tuple[list[int], str]] = (
vllm_model.generate_greedy(
[_get_prompt(0, PROMPT, VLLM_PLACEHOLDER, LLMA_MODEL_NAME)],
256,
......
# SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Union
from typing import Optional, Union
import torch
......@@ -12,7 +12,7 @@ class DummyLoRAManager:
def __init__(self, device: torch.device = "cuda:0"):
super().__init__()
self._loras: Dict[str, LoRALayerWeights] = {}
self._loras: dict[str, LoRALayerWeights] = {}
self._device = device
def set_module_lora(self, module_name: str, lora: LoRALayerWeights):
......@@ -77,11 +77,11 @@ class DummyLoRAManager:
self,
module_name: str,
input_dim: int,
output_dims: List[int],
noop_lora_index: Optional[List[int]] = None,
output_dims: list[int],
noop_lora_index: Optional[list[int]] = None,
rank: int = 8,
):
base_loras: List[LoRALayerWeights] = []
base_loras: list[LoRALayerWeights] = []
noop_lora_index_set = set(noop_lora_index or [])
for i, out_dim in enumerate(output_dims):
......@@ -110,7 +110,7 @@ def assert_close(a, b):
@dataclass
class PunicaTensors:
inputs_tensor: torch.Tensor
lora_weights: Union[torch.Tensor, List[torch.Tensor]]
lora_weights: Union[torch.Tensor, list[torch.Tensor]]
our_out_tensor: torch.Tensor
ref_out_tensor: torch.Tensor
b_seq_start_loc: torch.Tensor
......@@ -118,7 +118,7 @@ class PunicaTensors:
seq_len_tensor: torch.Tensor
token_lora_mapping: torch.Tensor
def meta(self) -> Tuple[int, int]:
def meta(self) -> tuple[int, int]:
"""
Infer max_seq_length and token_nums from the tensors
and return them.
......
# SPDX-License-Identifier: Apache-2.0
import time
from typing import List
import pytest
import ray
......@@ -133,7 +132,7 @@ def test_metric_counter_generation_tokens_multi_step(
"served_model_name",
[None, [], ["ModelName0"], ["ModelName0", "ModelName1", "ModelName2"]])
def test_metric_set_tag_model_name(vllm_runner, model: str, dtype: str,
served_model_name: List[str]) -> None:
served_model_name: list[str]) -> None:
with vllm_runner(model,
dtype=dtype,
disable_log_stats=False,
......
# SPDX-License-Identifier: Apache-2.0
from typing import Dict, List, Optional
from typing import Optional
from typing_extensions import TypedDict
class ServerConfig(TypedDict, total=False):
model: str
arguments: List[str]
arguments: list[str]
system_prompt: Optional[str]
supports_parallel: Optional[bool]
supports_rocm: Optional[bool]
ARGS: List[str] = ["--max-model-len", "1024"]
ARGS: list[str] = ["--max-model-len", "1024"]
CONFIGS: Dict[str, ServerConfig] = {
CONFIGS: dict[str, ServerConfig] = {
"mistral": {
"model":
"mistralai/Mistral-7B-Instruct-v0.3",
......
# SPDX-License-Identifier: Apache-2.0
from typing import List
import pytest
from vllm.config import CompilationConfig, VllmConfig, set_current_vllm_config
......@@ -51,7 +49,7 @@ class Relu3(ReLUSquaredActivation):
# All but RMSNorm
("all,-rms_norm", 4, [0, 1, 1, 1], True),
])
def test_enabled_ops(env: str, torch_level: int, ops_enabled: List[int],
def test_enabled_ops(env: str, torch_level: int, ops_enabled: list[int],
default_on: bool):
vllm_config = VllmConfig(compilation_config=CompilationConfig(
level=torch_level, custom_ops=env.split(",")))
......
# SPDX-License-Identifier: Apache-2.0
from typing import List, Optional, Tuple, Type
from typing import Optional
import numpy as np
import pytest
......@@ -17,7 +17,7 @@ from ...utils import check_logprobs_close
MODEL_NAME = "fixie-ai/ultravox-v0_4"
AudioTuple = Tuple[np.ndarray, int]
AudioTuple = tuple[np.ndarray, int]
VLLM_PLACEHOLDER = "<|audio|>"
HF_PLACEHOLDER = "<|audio|>"
......@@ -78,7 +78,7 @@ def _get_prompt(audio_count, question, placeholder):
add_generation_prompt=True)
def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
def vllm_to_hf_output(vllm_output: tuple[list[int], str,
Optional[SampleLogprobs]],
model: str):
"""Sanitize vllm output to be comparable with hf output."""
......@@ -96,9 +96,9 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
def run_test(
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
prompts_and_audios: List[Tuple[str, str, AudioTuple]],
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
prompts_and_audios: list[tuple[str, str, AudioTuple]],
model: str,
*,
dtype: str,
......@@ -158,8 +158,8 @@ def run_test(
def run_multi_audio_test(
vllm_runner: Type[VllmRunner],
prompts_and_audios: List[Tuple[str, List[AudioTuple]]],
vllm_runner: type[VllmRunner],
prompts_and_audios: list[tuple[str, list[AudioTuple]]],
model: str,
*,
dtype: str,
......
......@@ -5,7 +5,7 @@ Note: To pass the test, quantization higher than Q4 should be used
"""
import os
from typing import List, NamedTuple, Type
from typing import NamedTuple
import pytest
from huggingface_hub import hf_hub_download
......@@ -90,8 +90,8 @@ MODELS = [
@pytest.mark.parametrize("tp_size", [1, 2])
def test_models(
num_gpus_available: int,
vllm_runner: Type[VllmRunner],
example_prompts: List[str],
vllm_runner: type[VllmRunner],
example_prompts: list[str],
model: GGUFTestConfig,
dtype: str,
max_tokens: int,
......
......@@ -5,7 +5,6 @@
Note: these tests will only pass on H100
"""
import os
from typing import List
import pytest
from transformers import AutoTokenizer
......@@ -65,7 +64,7 @@ def test_models(example_prompts, model_name) -> None:
for prompt in example_prompts
]
params = SamplingParams(max_tokens=20, temperature=0)
generations: List[str] = []
generations: list[str] = []
# Note: these need to be run 1 at a time due to numerical precision,
# since the expected strs were generated this way.
for prompt in formatted_prompts:
......
# SPDX-License-Identifier: Apache-2.0
from typing import List, Optional, Type
from typing import Optional
import pytest
import torch
......@@ -19,12 +19,12 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
def run_awq_test(
vllm_runner: Type[VllmRunner],
vllm_runner: type[VllmRunner],
image_assets: _ImageAssets,
source_model: str,
quant_model: str,
*,
size_factors: List[float],
size_factors: list[float],
dtype: str,
max_tokens: int,
num_logprobs: int,
......
......@@ -6,7 +6,6 @@ import math
import os
from collections import defaultdict
from pathlib import PosixPath
from typing import Type
import pytest
from packaging.version import Version
......@@ -562,8 +561,8 @@ VLM_TEST_SETTINGS = _mark_splits(VLM_TEST_SETTINGS, num_groups=2)
))
def test_single_image_models(tmp_path: PosixPath, model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: _ImageAssets):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_single_image_test(
......@@ -585,8 +584,8 @@ def test_single_image_models(tmp_path: PosixPath, model_type: str,
))
def test_multi_image_models(tmp_path: PosixPath, model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: _ImageAssets):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_multi_image_test(
......@@ -608,8 +607,8 @@ def test_multi_image_models(tmp_path: PosixPath, model_type: str,
))
def test_image_embedding_models(model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: _ImageAssets):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_embedding_test(
......@@ -629,7 +628,7 @@ def test_image_embedding_models(model_type: str,
fork_new_process_for_each_test=False,
))
def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner], vllm_runner: Type[VllmRunner],
hf_runner: type[HfRunner], vllm_runner: type[VllmRunner],
video_assets: _VideoAssets):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_video_test(
......@@ -651,8 +650,8 @@ def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
def test_custom_inputs_models(
model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_custom_inputs_test(
......@@ -674,8 +673,8 @@ def test_custom_inputs_models(
@fork_new_process_for_each_test
def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: _ImageAssets):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_single_image_test(
......@@ -698,8 +697,8 @@ def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
@fork_new_process_for_each_test
def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: _ImageAssets):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_multi_image_test(
......@@ -722,8 +721,8 @@ def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
@fork_new_process_for_each_test
def test_image_embedding_models_heavy(model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
image_assets: _ImageAssets):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_embedding_test(
......@@ -743,8 +742,8 @@ def test_image_embedding_models_heavy(model_type: str,
fork_new_process_for_each_test=True,
))
def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
video_assets: _VideoAssets):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_video_test(
......@@ -767,8 +766,8 @@ def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
def test_custom_inputs_models_heavy(
model_type: str,
test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
):
model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_custom_inputs_test(
......
......@@ -2,7 +2,7 @@
import os
import re
from typing import List, Optional, Tuple, Type
from typing import Optional
import pytest
from transformers import AutoTokenizer
......@@ -25,7 +25,7 @@ HF_MULTIIMAGE_IMAGE_PROMPT = "<|user|>\n<|image_1|>\n<|image_2|>\nDescribe these
models = ["microsoft/Phi-3.5-vision-instruct"]
def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
def vllm_to_hf_output(vllm_output: tuple[list[int], str,
Optional[SampleLogprobs]],
model: str):
"""Sanitize vllm output to be comparable with hf output."""
......@@ -55,9 +55,9 @@ if current_platform.is_rocm():
def run_test(
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
inputs: List[Tuple[List[str], PromptImageInput]],
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
inputs: list[tuple[list[str], PromptImageInput]],
model: str,
*,
dtype: str,
......
......@@ -6,7 +6,7 @@ Run `pytest tests/models/test_mistral.py`.
import json
import uuid
from dataclasses import asdict
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
from typing import TYPE_CHECKING, Any, Optional
import pytest
from mistral_common.multimodal import download_image
......@@ -38,7 +38,7 @@ IMG_URLS = [
PROMPT = "Describe each image in one short sentence."
def _create_msg_format(urls: List[str]) -> List[Dict[str, Any]]:
def _create_msg_format(urls: list[str]) -> list[dict[str, Any]]:
return [{
"role":
"user",
......@@ -54,7 +54,7 @@ def _create_msg_format(urls: List[str]) -> List[Dict[str, Any]]:
}]
def _create_msg_format_hf(urls: List[str]) -> List[Dict[str, Any]]:
def _create_msg_format_hf(urls: list[str]) -> list[dict[str, Any]]:
return [{
"role":
"user",
......@@ -68,7 +68,7 @@ def _create_msg_format_hf(urls: List[str]) -> List[Dict[str, Any]]:
}]
def _create_engine_inputs(urls: List[str]) -> TokensPrompt:
def _create_engine_inputs(urls: list[str]) -> TokensPrompt:
msg = _create_msg_format(urls)
tokenizer = MistralTokenizer.from_model("pixtral")
......@@ -89,7 +89,7 @@ def _create_engine_inputs(urls: List[str]) -> TokensPrompt:
return engine_inputs
def _create_engine_inputs_hf(urls: List[str]) -> TextPrompt:
def _create_engine_inputs_hf(urls: list[str]) -> TextPrompt:
msg = _create_msg_format_hf(urls)
tokenizer = AutoProcessor.from_pretrained("mistral-community/pixtral-12b")
......@@ -128,7 +128,7 @@ assert FIXTURES_PATH.exists()
FIXTURE_LOGPROBS_CHAT = FIXTURES_PATH / "pixtral_chat.json"
FIXTURE_LOGPROBS_ENGINE = FIXTURES_PATH / "pixtral_chat_engine.json"
OutputsLogprobs = List[Tuple[List[int], str, Optional[SampleLogprobs]]]
OutputsLogprobs = list[tuple[list[int], str, Optional[SampleLogprobs]]]
# For the test author to store golden output in JSON
......
# SPDX-License-Identifier: Apache-2.0
from typing import Any, List, Optional, Tuple, Type, TypedDict, Union
from typing import Any, Optional, TypedDict, Union
import numpy.typing as npt
import pytest
......@@ -69,21 +69,21 @@ class Qwen2VLPromptVideoEmbeddingInput(TypedDict):
def batch_make_image_embeddings(
image_batches: List[Union[Image.Image, List[Image.Image]]], processor,
llm: VllmRunner) -> List[Qwen2VLPromptImageEmbeddingInput]:
image_batches: list[Union[Image.Image, list[Image.Image]]], processor,
llm: VllmRunner) -> list[Qwen2VLPromptImageEmbeddingInput]:
"""batched image embeddings for Qwen2-VL
This will infer all images' embeddings in a single batch,
and split the result according to input batches.
image_batches:
- Single-image batches: `List[Image.Image]`
- Multiple-image batches: `List[List[Image.Image]]]`
- Single-image batches: `list[Image.Image]`
- Multiple-image batches: `list[list[Image.Image]]]`
returns: `List[Qwen2VLPromptImageEmbeddingInput]`
returns: `list[Qwen2VLPromptImageEmbeddingInput]`
"""
image_batches_: List[Any] = image_batches[:]
image_batches_: list[Any] = image_batches[:]
# convert single-image batches to multiple-image batches
for idx in range(len(image_batches_)):
......@@ -93,7 +93,7 @@ def batch_make_image_embeddings(
assert isinstance(image_batches_[idx], list)
# append all images into a list (as a batch)
images: List[Image.Image] = []
images: list[Image.Image] = []
for image_batch in image_batches_:
images += image_batch
......@@ -121,7 +121,7 @@ def batch_make_image_embeddings(
image_embeds = torch.concat(llm.apply_model(get_image_embeds))
# split into original batches
result: List[Qwen2VLPromptImageEmbeddingInput] = []
result: list[Qwen2VLPromptImageEmbeddingInput] = []
image_counter = 0
embed_counter = 0
for image_batch in image_batches_:
......@@ -153,7 +153,7 @@ def batch_make_image_embeddings(
def batch_make_video_embeddings(
video_batches: PromptVideoInput, processor,
llm: VllmRunner) -> List[Qwen2VLPromptVideoEmbeddingInput]:
llm: VllmRunner) -> list[Qwen2VLPromptVideoEmbeddingInput]:
"""batched video embeddings for Qwen2-VL
A NDArray represents a single video's all frames.
......@@ -162,21 +162,21 @@ def batch_make_video_embeddings(
and split the result according to input batches.
video_batches:
- Single-video batches: `List[NDArray]`
- Multiple-video batches: `List[List[NDArray]]`
- Single-video batches: `list[NDArray]`
- Multiple-video batches: `list[list[NDArray]]`
"""
video_batches_: List[Any] = video_batches[:]
video_batches_: list[Any] = video_batches[:]
for idx in range(len(video_batches_)):
if not isinstance(video_batches_[idx], list):
single_video_batch: List[npt.NDArray] = [video_batches_[idx]]
single_video_batch: list[npt.NDArray] = [video_batches_[idx]]
video_batches_[idx] = single_video_batch
assert isinstance(video_batches_[idx], list)
# append all videos into a list (as a batch)
videos: List[npt.NDArray] = []
videos: list[npt.NDArray] = []
for video_batch in video_batches_:
videos += video_batch
......@@ -204,7 +204,7 @@ def batch_make_video_embeddings(
video_embeds = torch.concat(llm.apply_model(get_image_embeds))
# split into original batches
result: List[Qwen2VLPromptVideoEmbeddingInput] = []
result: list[Qwen2VLPromptVideoEmbeddingInput] = []
video_counter = 0
embed_counter = 0
for video_batch in video_batches_:
......@@ -235,8 +235,8 @@ def batch_make_video_embeddings(
def run_embedding_input_test(
vllm_runner: Type[VllmRunner],
inputs: List[Tuple[List[str], PromptImageInput, PromptVideoInput]],
vllm_runner: type[VllmRunner],
inputs: list[tuple[list[str], PromptImageInput, PromptVideoInput]],
model: str,
*,
dtype: str,
......@@ -323,8 +323,8 @@ def test_qwen2_vl_image_embeddings_input(vllm_runner, image_assets, model,
num_logprobs: int) -> None:
images = [asset.pil_image for asset in image_assets]
inputs_per_case: List[Tuple[
List[str], PromptImageInput, PromptVideoInput]] = [(
inputs_per_case: list[tuple[
list[str], PromptImageInput, PromptVideoInput]] = [(
[prompt for _ in size_factors],
[rescale_image_size(image, factor) for factor in size_factors],
[],
......@@ -365,7 +365,7 @@ def test_qwen2_vl_multiple_image_embeddings_input(vllm_runner, image_assets,
num_logprobs: int) -> None:
images = [asset.pil_image for asset in image_assets]
inputs_per_case: List[Tuple[List[str], PromptImageInput,
inputs_per_case: list[tuple[list[str], PromptImageInput,
PromptVideoInput]] = [(
[MULTIIMAGE_PROMPT for _ in size_factors],
[[
......@@ -413,8 +413,8 @@ def test_qwen2_vl_video_embeddings_input(vllm_runner, video_assets, model,
for asset in video_assets
]
inputs_per_case: List[Tuple[
List[str], PromptImageInput, PromptVideoInput]] = [(
inputs_per_case: list[tuple[
list[str], PromptImageInput, PromptVideoInput]] = [(
[prompt for _ in size_factors],
[],
[rescale_video_size(video, factor) for factor in size_factors],
......
# SPDX-License-Identifier: Apache-2.0
"""Helpers for building inputs that can be leveraged for different test types.
"""
from collections.abc import Iterable
from pathlib import PosixPath
from typing import Callable, Iterable, List, Optional, Tuple, Union
from typing import Callable, Optional, Union
import torch
......@@ -33,7 +34,7 @@ def replace_test_placeholder(prompt: str, img_idx_to_prompt: Callable[[int],
def get_model_prompts(base_prompts: Iterable[str],
img_idx_to_prompt: Optional[Callable[[int], str]],
video_idx_to_prompt: Optional[Callable[[int], str]],
prompt_formatter: Callable[[str], str]) -> List[str]:
prompt_formatter: Callable[[str], str]) -> list[str]:
"""Given a model-agnostic base prompt and test configuration for a model(s)
to be tested, update the media placeholders and apply the prompt formatting
to get the test prompt string for this model.
......@@ -218,7 +219,7 @@ def build_video_inputs_from_test_info(
) for video, prompt in zip(sampled_vids, model_prompts)]
def apply_image_size_scaling(image, size: Union[float, Tuple[int, int]],
def apply_image_size_scaling(image, size: Union[float, tuple[int, int]],
size_type: SizeType):
"""Applies a size scaler to one image; this can be a an image size factor,
which scales the image while maintaining the aspect ratio"""
......
......@@ -5,7 +5,7 @@ handling multimodal placeholder substitution, and so on.
"""
import itertools
from collections import OrderedDict
from typing import Dict, Iterable, Tuple
from collections.abc import Iterable
import pytest
......@@ -13,9 +13,9 @@ from .types import (EMBEDDING_SIZE_FACTORS, ExpandableVLMTestArgs,
ImageSizeWrapper, SizeType, VLMTestInfo, VLMTestType)
def get_filtered_test_settings(test_settings: Dict[str, VLMTestInfo],
def get_filtered_test_settings(test_settings: dict[str, VLMTestInfo],
test_type: VLMTestType,
fork_per_test: bool) -> Dict[str, VLMTestInfo]:
fork_per_test: bool) -> dict[str, VLMTestInfo]:
"""Given the dict of potential test settings to run, return a subdict
of tests who have the current test type enabled with the matching val for
fork_per_test.
......@@ -49,7 +49,7 @@ def get_filtered_test_settings(test_settings: Dict[str, VLMTestInfo],
return matching_tests
def get_parametrized_options(test_settings: Dict[str, VLMTestInfo],
def get_parametrized_options(test_settings: dict[str, VLMTestInfo],
test_type: VLMTestType,
fork_new_process_for_each_test: bool):
"""Converts all of our VLMTestInfo into an expanded list of parameters.
......@@ -121,7 +121,7 @@ def get_parametrized_options(test_settings: Dict[str, VLMTestInfo],
def get_wrapped_test_sizes(
test_info: VLMTestInfo,
test_type: VLMTestType) -> Tuple[ImageSizeWrapper, ...]:
test_type: VLMTestType) -> tuple[ImageSizeWrapper, ...]:
"""Given a test info which may have size factors or fixed sizes, wrap them
and combine them into an iterable, each of which will be used in parameter
expansion.
......
# SPDX-License-Identifier: Apache-2.0
"""Core test implementation to be shared across modalities."""
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
from typing import Any, Callable, Optional, Union
import torch
from PIL.Image import Image
......@@ -17,9 +17,9 @@ from .types import RunnerOutput
def run_test(
*,
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
inputs: List[Tuple[List[str], List[Union[List[Image], Image]]]],
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
inputs: list[tuple[list[str], list[Union[list[Image], Image]]]],
model: str,
dtype: str,
max_tokens: int,
......@@ -29,15 +29,15 @@ def run_test(
max_num_seqs: int,
hf_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]],
vllm_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]],
auto_cls: Type[_BaseAutoModelClass],
auto_cls: type[_BaseAutoModelClass],
use_tokenizer_eos: bool,
postprocess_inputs: Callable[[BatchEncoding], BatchEncoding],
comparator: Callable[..., None],
get_stop_token_ids: Optional[Callable[[AnyTokenizer], list[int]]],
stop_str: Optional[List[str]],
limit_mm_per_prompt: Dict[str, int],
vllm_runner_kwargs: Optional[Dict[str, Any]],
hf_model_kwargs: Optional[Dict[str, Any]],
stop_str: Optional[list[str]],
limit_mm_per_prompt: dict[str, int],
vllm_runner_kwargs: Optional[dict[str, Any]],
hf_model_kwargs: Optional[dict[str, Any]],
patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]],
task: TaskOption = "auto",
runner_mm_key: str = "images",
......@@ -61,7 +61,7 @@ def run_test(
# if we run HF first, the cuda initialization will be done and it
# will hurt multiprocessing backend with fork method (the default method).
vllm_runner_kwargs_: Dict[str, Any] = {}
vllm_runner_kwargs_: dict[str, Any] = {}
if model_info.tokenizer:
vllm_runner_kwargs_["tokenizer"] = model_info.tokenizer
if model_info.tokenizer_mode:
......@@ -84,7 +84,7 @@ def run_test(
**vllm_runner_kwargs_) as vllm_model:
tokenizer = vllm_model.model.get_tokenizer()
vllm_kwargs: Dict[str, Any] = {}
vllm_kwargs: dict[str, Any] = {}
if get_stop_token_ids is not None:
vllm_kwargs["stop_token_ids"] = get_stop_token_ids(tokenizer)
if stop_str:
......
......@@ -6,7 +6,7 @@ typically specific to a small subset of models.
import re
import types
from pathlib import PosixPath
from typing import Callable, List, Optional, Tuple, Union
from typing import Callable, Optional, Union
import torch
from PIL.Image import Image
......@@ -49,7 +49,7 @@ def fuyu_vllm_to_hf_output(vllm_output: RunnerOutput,
def qwen_vllm_to_hf_output(
vllm_output: RunnerOutput,
model: str) -> Tuple[List[int], str, Optional[SampleLogprobs]]:
model: str) -> tuple[list[int], str, Optional[SampleLogprobs]]:
"""Sanitize vllm output [qwen models] to be comparable with hf output."""
output_ids, output_str, out_logprobs = vllm_output
......@@ -60,7 +60,7 @@ def qwen_vllm_to_hf_output(
def qwen2_vllm_to_hf_output(
vllm_output: RunnerOutput,
model: str) -> Tuple[List[int], str, Optional[SampleLogprobs]]:
model: str) -> tuple[list[int], str, Optional[SampleLogprobs]]:
"""Sanitize vllm output [qwen2 models] to be comparable with hf output."""
output_ids, output_str, out_logprobs = vllm_output
......@@ -78,7 +78,7 @@ def llava_image_vllm_to_hf_output(vllm_output: RunnerOutput,
def llava_video_vllm_to_hf_output(
vllm_output: RunnerOutput,
model: str) -> Tuple[List[int], str, Optional[SampleLogprobs]]:
model: str) -> tuple[list[int], str, Optional[SampleLogprobs]]:
config = AutoConfig.from_pretrained(model)
mm_token_id = config.video_token_index
return _llava_vllm_to_hf_output(vllm_output, model, mm_token_id)
......@@ -247,7 +247,7 @@ def molmo_post_processor(hf_inputs: BatchEncoding, dtype: str):
####### Prompt path encoders for models that need models on disk
def qwen_prompt_path_encoder(
tmp_path: PosixPath, prompt: str, assets: Union[List[ImageAsset],
tmp_path: PosixPath, prompt: str, assets: Union[list[ImageAsset],
_ImageAssets]) -> str:
"""Given a temporary dir path, export one or more image assets into the
tempdir & replace its contents with the local path to the string so that
......@@ -257,7 +257,7 @@ def qwen_prompt_path_encoder(
Args:
tmp_path: Tempdir for test under consideration.
prompt: Prompt with image placeholders.
assets: List of image assets whose len equals the num placeholders.
assets: list of image assets whose len equals the num placeholders.
"""
# Ensure that the number of placeholders matches the number of assets;
# If this is not true, the test is probably written incorrectly.
......@@ -350,7 +350,7 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
self.max_num = self.config.max_dynamic_patch
self.image_size = self.vision_config.image_size
def __call__(self, text: str, images: Union[Image, List[Image]],
def __call__(self, text: str, images: Union[Image, list[Image]],
**kwargs):
# yapf: disable
from vllm.model_executor.models.h2ovl import (
......@@ -410,7 +410,7 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
self.max_num = self.config.max_dynamic_patch
self.image_size = self.vision_config.image_size
def __call__(self, text: str, images: Union[Image, List[Image]],
def __call__(self, text: str, images: Union[Image, list[Image]],
**kwargs):
from vllm.model_executor.models.internvl import (
IMG_CONTEXT, IMG_END, IMG_START,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment