Unverified Commit cf069aa8 authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Update deprecated Python 3.8 typing (#13971)

parent bf33700e
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
# Adapted from # Adapted from
# https://github.com/fmmoret/vllm/blob/fm-support-lora-on-quantized-models/tests/lora/test_llama.py # https://github.com/fmmoret/vllm/blob/fm-support-lora-on-quantized-models/tests/lora/test_llama.py
from dataclasses import dataclass from dataclasses import dataclass
from typing import List
import pytest import pytest
...@@ -19,7 +18,7 @@ class ModelWithQuantization: ...@@ -19,7 +18,7 @@ class ModelWithQuantization:
quantization: str quantization: str
MODELS: List[ModelWithQuantization] MODELS: list[ModelWithQuantization]
#AWQ quantization is currently not supported in ROCm. #AWQ quantization is currently not supported in ROCm.
if current_platform.is_rocm(): if current_platform.is_rocm():
MODELS = [ MODELS = [
...@@ -41,7 +40,7 @@ else: ...@@ -41,7 +40,7 @@ else:
def do_sample(llm: vllm.LLM, def do_sample(llm: vllm.LLM,
lora_path: str, lora_path: str,
lora_id: int, lora_id: int,
max_tokens: int = 256) -> List[str]: max_tokens: int = 256) -> list[str]:
raw_prompts = [ raw_prompts = [
"Give me an orange-ish brown color", "Give me an orange-ish brown color",
"Give me a neon pink color", "Give me a neon pink color",
...@@ -61,7 +60,7 @@ def do_sample(llm: vllm.LLM, ...@@ -61,7 +60,7 @@ def do_sample(llm: vllm.LLM,
lora_request=LoRARequest(str(lora_id), lora_id, lora_path) lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
if lora_id else None) if lora_id else None)
# Print the outputs. # Print the outputs.
generated_texts: List[str] = [] generated_texts: list[str] = []
for output in outputs: for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text generated_text = output.outputs[0].text
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Optional from typing import Optional
import pytest import pytest
from packaging.version import Version from packaging.version import Version
...@@ -20,7 +20,7 @@ class TestConfig: ...@@ -20,7 +20,7 @@ class TestConfig:
max_loras: int = 2 max_loras: int = 2
max_lora_rank: int = 16 max_lora_rank: int = 16
max_model_len: int = 4096 max_model_len: int = 4096
mm_processor_kwargs: Optional[Dict[str, int]] = None mm_processor_kwargs: Optional[dict[str, int]] = None
def __post_init__(self): def __post_init__(self):
if self.mm_processor_kwargs is None: if self.mm_processor_kwargs is None:
...@@ -57,11 +57,11 @@ class Qwen2VLTester: ...@@ -57,11 +57,11 @@ class Qwen2VLTester:
) )
def run_test(self, def run_test(self,
images: List[ImageAsset], images: list[ImageAsset],
expected_outputs: List[str], expected_outputs: list[str],
lora_id: Optional[int] = None, lora_id: Optional[int] = None,
temperature: float = 0, temperature: float = 0,
max_tokens: int = 5) -> List[str]: max_tokens: int = 5) -> list[str]:
sampling_params = vllm.SamplingParams( sampling_params = vllm.SamplingParams(
temperature=temperature, temperature=temperature,
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from typing import List
import pytest import pytest
import vllm import vllm
...@@ -21,7 +19,7 @@ EXPECTED_LORA_OUTPUT = [ ...@@ -21,7 +19,7 @@ EXPECTED_LORA_OUTPUT = [
] ]
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]: def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
prompts = [ prompts = [
PROMPT_TEMPLATE.format(query="How many singers do we have?"), PROMPT_TEMPLATE.format(query="How many singers do we have?"),
PROMPT_TEMPLATE.format( PROMPT_TEMPLATE.format(
...@@ -40,7 +38,7 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]: ...@@ -40,7 +38,7 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
lora_request=LoRARequest(str(lora_id), lora_id, lora_path) lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
if lora_id else None) if lora_id else None)
# Print the outputs. # Print the outputs.
generated_texts: List[str] = [] generated_texts: list[str] = []
for output in outputs: for output in outputs:
prompt = output.prompt prompt = output.prompt
generated_text = output.outputs[0].text.strip() generated_text = output.outputs[0].text.strip()
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
import shutil import shutil
from os import path from os import path
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
from typing import List, Tuple
import torch import torch
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download
...@@ -86,8 +85,8 @@ def test_ultravox_lora(vllm_runner): ...@@ -86,8 +85,8 @@ def test_ultravox_lora(vllm_runner):
dtype="bfloat16", dtype="bfloat16",
max_model_len=1024, max_model_len=1024,
) as vllm_model: ) as vllm_model:
ultravox_outputs: List[Tuple[ ultravox_outputs: list[tuple[
List[int], str]] = vllm_model.generate_greedy( list[int], str]] = vllm_model.generate_greedy(
[ [
_get_prompt(0, PROMPT, VLLM_PLACEHOLDER, _get_prompt(0, PROMPT, VLLM_PLACEHOLDER,
ULTRAVOX_MODEL_NAME) ULTRAVOX_MODEL_NAME)
...@@ -108,7 +107,7 @@ def test_ultravox_lora(vllm_runner): ...@@ -108,7 +107,7 @@ def test_ultravox_lora(vllm_runner):
dtype="bfloat16", dtype="bfloat16",
max_model_len=1024, max_model_len=1024,
) as vllm_model: ) as vllm_model:
llama_outputs: List[Tuple[List[int], str]] = ( llama_outputs: list[tuple[list[int], str]] = (
vllm_model.generate_greedy( vllm_model.generate_greedy(
[_get_prompt(0, PROMPT, VLLM_PLACEHOLDER, LLMA_MODEL_NAME)], [_get_prompt(0, PROMPT, VLLM_PLACEHOLDER, LLMA_MODEL_NAME)],
256, 256,
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Union from typing import Optional, Union
import torch import torch
...@@ -12,7 +12,7 @@ class DummyLoRAManager: ...@@ -12,7 +12,7 @@ class DummyLoRAManager:
def __init__(self, device: torch.device = "cuda:0"): def __init__(self, device: torch.device = "cuda:0"):
super().__init__() super().__init__()
self._loras: Dict[str, LoRALayerWeights] = {} self._loras: dict[str, LoRALayerWeights] = {}
self._device = device self._device = device
def set_module_lora(self, module_name: str, lora: LoRALayerWeights): def set_module_lora(self, module_name: str, lora: LoRALayerWeights):
...@@ -77,11 +77,11 @@ class DummyLoRAManager: ...@@ -77,11 +77,11 @@ class DummyLoRAManager:
self, self,
module_name: str, module_name: str,
input_dim: int, input_dim: int,
output_dims: List[int], output_dims: list[int],
noop_lora_index: Optional[List[int]] = None, noop_lora_index: Optional[list[int]] = None,
rank: int = 8, rank: int = 8,
): ):
base_loras: List[LoRALayerWeights] = [] base_loras: list[LoRALayerWeights] = []
noop_lora_index_set = set(noop_lora_index or []) noop_lora_index_set = set(noop_lora_index or [])
for i, out_dim in enumerate(output_dims): for i, out_dim in enumerate(output_dims):
...@@ -110,7 +110,7 @@ def assert_close(a, b): ...@@ -110,7 +110,7 @@ def assert_close(a, b):
@dataclass @dataclass
class PunicaTensors: class PunicaTensors:
inputs_tensor: torch.Tensor inputs_tensor: torch.Tensor
lora_weights: Union[torch.Tensor, List[torch.Tensor]] lora_weights: Union[torch.Tensor, list[torch.Tensor]]
our_out_tensor: torch.Tensor our_out_tensor: torch.Tensor
ref_out_tensor: torch.Tensor ref_out_tensor: torch.Tensor
b_seq_start_loc: torch.Tensor b_seq_start_loc: torch.Tensor
...@@ -118,7 +118,7 @@ class PunicaTensors: ...@@ -118,7 +118,7 @@ class PunicaTensors:
seq_len_tensor: torch.Tensor seq_len_tensor: torch.Tensor
token_lora_mapping: torch.Tensor token_lora_mapping: torch.Tensor
def meta(self) -> Tuple[int, int]: def meta(self) -> tuple[int, int]:
""" """
Infer max_seq_length and token_nums from the tensors Infer max_seq_length and token_nums from the tensors
and return them. and return them.
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import time import time
from typing import List
import pytest import pytest
import ray import ray
...@@ -133,7 +132,7 @@ def test_metric_counter_generation_tokens_multi_step( ...@@ -133,7 +132,7 @@ def test_metric_counter_generation_tokens_multi_step(
"served_model_name", "served_model_name",
[None, [], ["ModelName0"], ["ModelName0", "ModelName1", "ModelName2"]]) [None, [], ["ModelName0"], ["ModelName0", "ModelName1", "ModelName2"]])
def test_metric_set_tag_model_name(vllm_runner, model: str, dtype: str, def test_metric_set_tag_model_name(vllm_runner, model: str, dtype: str,
served_model_name: List[str]) -> None: served_model_name: list[str]) -> None:
with vllm_runner(model, with vllm_runner(model,
dtype=dtype, dtype=dtype,
disable_log_stats=False, disable_log_stats=False,
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from typing import Dict, List, Optional from typing import Optional
from typing_extensions import TypedDict from typing_extensions import TypedDict
class ServerConfig(TypedDict, total=False): class ServerConfig(TypedDict, total=False):
model: str model: str
arguments: List[str] arguments: list[str]
system_prompt: Optional[str] system_prompt: Optional[str]
supports_parallel: Optional[bool] supports_parallel: Optional[bool]
supports_rocm: Optional[bool] supports_rocm: Optional[bool]
ARGS: List[str] = ["--max-model-len", "1024"] ARGS: list[str] = ["--max-model-len", "1024"]
CONFIGS: Dict[str, ServerConfig] = { CONFIGS: dict[str, ServerConfig] = {
"mistral": { "mistral": {
"model": "model":
"mistralai/Mistral-7B-Instruct-v0.3", "mistralai/Mistral-7B-Instruct-v0.3",
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from typing import List
import pytest import pytest
from vllm.config import CompilationConfig, VllmConfig, set_current_vllm_config from vllm.config import CompilationConfig, VllmConfig, set_current_vllm_config
...@@ -51,7 +49,7 @@ class Relu3(ReLUSquaredActivation): ...@@ -51,7 +49,7 @@ class Relu3(ReLUSquaredActivation):
# All but RMSNorm # All but RMSNorm
("all,-rms_norm", 4, [0, 1, 1, 1], True), ("all,-rms_norm", 4, [0, 1, 1, 1], True),
]) ])
def test_enabled_ops(env: str, torch_level: int, ops_enabled: List[int], def test_enabled_ops(env: str, torch_level: int, ops_enabled: list[int],
default_on: bool): default_on: bool):
vllm_config = VllmConfig(compilation_config=CompilationConfig( vllm_config = VllmConfig(compilation_config=CompilationConfig(
level=torch_level, custom_ops=env.split(","))) level=torch_level, custom_ops=env.split(",")))
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from typing import List, Optional, Tuple, Type from typing import Optional
import numpy as np import numpy as np
import pytest import pytest
...@@ -17,7 +17,7 @@ from ...utils import check_logprobs_close ...@@ -17,7 +17,7 @@ from ...utils import check_logprobs_close
MODEL_NAME = "fixie-ai/ultravox-v0_4" MODEL_NAME = "fixie-ai/ultravox-v0_4"
AudioTuple = Tuple[np.ndarray, int] AudioTuple = tuple[np.ndarray, int]
VLLM_PLACEHOLDER = "<|audio|>" VLLM_PLACEHOLDER = "<|audio|>"
HF_PLACEHOLDER = "<|audio|>" HF_PLACEHOLDER = "<|audio|>"
...@@ -78,7 +78,7 @@ def _get_prompt(audio_count, question, placeholder): ...@@ -78,7 +78,7 @@ def _get_prompt(audio_count, question, placeholder):
add_generation_prompt=True) add_generation_prompt=True)
def vllm_to_hf_output(vllm_output: Tuple[List[int], str, def vllm_to_hf_output(vllm_output: tuple[list[int], str,
Optional[SampleLogprobs]], Optional[SampleLogprobs]],
model: str): model: str):
"""Sanitize vllm output to be comparable with hf output.""" """Sanitize vllm output to be comparable with hf output."""
...@@ -96,9 +96,9 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str, ...@@ -96,9 +96,9 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
def run_test( def run_test(
hf_runner: Type[HfRunner], hf_runner: type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
prompts_and_audios: List[Tuple[str, str, AudioTuple]], prompts_and_audios: list[tuple[str, str, AudioTuple]],
model: str, model: str,
*, *,
dtype: str, dtype: str,
...@@ -158,8 +158,8 @@ def run_test( ...@@ -158,8 +158,8 @@ def run_test(
def run_multi_audio_test( def run_multi_audio_test(
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
prompts_and_audios: List[Tuple[str, List[AudioTuple]]], prompts_and_audios: list[tuple[str, list[AudioTuple]]],
model: str, model: str,
*, *,
dtype: str, dtype: str,
......
...@@ -5,7 +5,7 @@ Note: To pass the test, quantization higher than Q4 should be used ...@@ -5,7 +5,7 @@ Note: To pass the test, quantization higher than Q4 should be used
""" """
import os import os
from typing import List, NamedTuple, Type from typing import NamedTuple
import pytest import pytest
from huggingface_hub import hf_hub_download from huggingface_hub import hf_hub_download
...@@ -90,8 +90,8 @@ MODELS = [ ...@@ -90,8 +90,8 @@ MODELS = [
@pytest.mark.parametrize("tp_size", [1, 2]) @pytest.mark.parametrize("tp_size", [1, 2])
def test_models( def test_models(
num_gpus_available: int, num_gpus_available: int,
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
example_prompts: List[str], example_prompts: list[str],
model: GGUFTestConfig, model: GGUFTestConfig,
dtype: str, dtype: str,
max_tokens: int, max_tokens: int,
......
...@@ -5,7 +5,6 @@ ...@@ -5,7 +5,6 @@
Note: these tests will only pass on H100 Note: these tests will only pass on H100
""" """
import os import os
from typing import List
import pytest import pytest
from transformers import AutoTokenizer from transformers import AutoTokenizer
...@@ -65,7 +64,7 @@ def test_models(example_prompts, model_name) -> None: ...@@ -65,7 +64,7 @@ def test_models(example_prompts, model_name) -> None:
for prompt in example_prompts for prompt in example_prompts
] ]
params = SamplingParams(max_tokens=20, temperature=0) params = SamplingParams(max_tokens=20, temperature=0)
generations: List[str] = [] generations: list[str] = []
# Note: these need to be run 1 at a time due to numerical precision, # Note: these need to be run 1 at a time due to numerical precision,
# since the expected strs were generated this way. # since the expected strs were generated this way.
for prompt in formatted_prompts: for prompt in formatted_prompts:
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from typing import List, Optional, Type from typing import Optional
import pytest import pytest
import torch import torch
...@@ -19,12 +19,12 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ ...@@ -19,12 +19,12 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
def run_awq_test( def run_awq_test(
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
image_assets: _ImageAssets, image_assets: _ImageAssets,
source_model: str, source_model: str,
quant_model: str, quant_model: str,
*, *,
size_factors: List[float], size_factors: list[float],
dtype: str, dtype: str,
max_tokens: int, max_tokens: int,
num_logprobs: int, num_logprobs: int,
......
...@@ -6,7 +6,6 @@ import math ...@@ -6,7 +6,6 @@ import math
import os import os
from collections import defaultdict from collections import defaultdict
from pathlib import PosixPath from pathlib import PosixPath
from typing import Type
import pytest import pytest
from packaging.version import Version from packaging.version import Version
...@@ -562,8 +561,8 @@ VLM_TEST_SETTINGS = _mark_splits(VLM_TEST_SETTINGS, num_groups=2) ...@@ -562,8 +561,8 @@ VLM_TEST_SETTINGS = _mark_splits(VLM_TEST_SETTINGS, num_groups=2)
)) ))
def test_single_image_models(tmp_path: PosixPath, model_type: str, def test_single_image_models(tmp_path: PosixPath, model_type: str,
test_case: ExpandableVLMTestArgs, test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner], hf_runner: type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
image_assets: _ImageAssets): image_assets: _ImageAssets):
model_test_info = VLM_TEST_SETTINGS[model_type] model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_single_image_test( runners.run_single_image_test(
...@@ -585,8 +584,8 @@ def test_single_image_models(tmp_path: PosixPath, model_type: str, ...@@ -585,8 +584,8 @@ def test_single_image_models(tmp_path: PosixPath, model_type: str,
)) ))
def test_multi_image_models(tmp_path: PosixPath, model_type: str, def test_multi_image_models(tmp_path: PosixPath, model_type: str,
test_case: ExpandableVLMTestArgs, test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner], hf_runner: type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
image_assets: _ImageAssets): image_assets: _ImageAssets):
model_test_info = VLM_TEST_SETTINGS[model_type] model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_multi_image_test( runners.run_multi_image_test(
...@@ -608,8 +607,8 @@ def test_multi_image_models(tmp_path: PosixPath, model_type: str, ...@@ -608,8 +607,8 @@ def test_multi_image_models(tmp_path: PosixPath, model_type: str,
)) ))
def test_image_embedding_models(model_type: str, def test_image_embedding_models(model_type: str,
test_case: ExpandableVLMTestArgs, test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner], hf_runner: type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
image_assets: _ImageAssets): image_assets: _ImageAssets):
model_test_info = VLM_TEST_SETTINGS[model_type] model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_embedding_test( runners.run_embedding_test(
...@@ -629,7 +628,7 @@ def test_image_embedding_models(model_type: str, ...@@ -629,7 +628,7 @@ def test_image_embedding_models(model_type: str,
fork_new_process_for_each_test=False, fork_new_process_for_each_test=False,
)) ))
def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs, def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner], vllm_runner: Type[VllmRunner], hf_runner: type[HfRunner], vllm_runner: type[VllmRunner],
video_assets: _VideoAssets): video_assets: _VideoAssets):
model_test_info = VLM_TEST_SETTINGS[model_type] model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_video_test( runners.run_video_test(
...@@ -651,8 +650,8 @@ def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs, ...@@ -651,8 +650,8 @@ def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
def test_custom_inputs_models( def test_custom_inputs_models(
model_type: str, model_type: str,
test_case: ExpandableVLMTestArgs, test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner], hf_runner: type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
): ):
model_test_info = VLM_TEST_SETTINGS[model_type] model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_custom_inputs_test( runners.run_custom_inputs_test(
...@@ -674,8 +673,8 @@ def test_custom_inputs_models( ...@@ -674,8 +673,8 @@ def test_custom_inputs_models(
@fork_new_process_for_each_test @fork_new_process_for_each_test
def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str, def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
test_case: ExpandableVLMTestArgs, test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner], hf_runner: type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
image_assets: _ImageAssets): image_assets: _ImageAssets):
model_test_info = VLM_TEST_SETTINGS[model_type] model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_single_image_test( runners.run_single_image_test(
...@@ -698,8 +697,8 @@ def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str, ...@@ -698,8 +697,8 @@ def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
@fork_new_process_for_each_test @fork_new_process_for_each_test
def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str, def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
test_case: ExpandableVLMTestArgs, test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner], hf_runner: type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
image_assets: _ImageAssets): image_assets: _ImageAssets):
model_test_info = VLM_TEST_SETTINGS[model_type] model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_multi_image_test( runners.run_multi_image_test(
...@@ -722,8 +721,8 @@ def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str, ...@@ -722,8 +721,8 @@ def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
@fork_new_process_for_each_test @fork_new_process_for_each_test
def test_image_embedding_models_heavy(model_type: str, def test_image_embedding_models_heavy(model_type: str,
test_case: ExpandableVLMTestArgs, test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner], hf_runner: type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
image_assets: _ImageAssets): image_assets: _ImageAssets):
model_test_info = VLM_TEST_SETTINGS[model_type] model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_embedding_test( runners.run_embedding_test(
...@@ -743,8 +742,8 @@ def test_image_embedding_models_heavy(model_type: str, ...@@ -743,8 +742,8 @@ def test_image_embedding_models_heavy(model_type: str,
fork_new_process_for_each_test=True, fork_new_process_for_each_test=True,
)) ))
def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs, def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner], hf_runner: type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
video_assets: _VideoAssets): video_assets: _VideoAssets):
model_test_info = VLM_TEST_SETTINGS[model_type] model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_video_test( runners.run_video_test(
...@@ -767,8 +766,8 @@ def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs, ...@@ -767,8 +766,8 @@ def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
def test_custom_inputs_models_heavy( def test_custom_inputs_models_heavy(
model_type: str, model_type: str,
test_case: ExpandableVLMTestArgs, test_case: ExpandableVLMTestArgs,
hf_runner: Type[HfRunner], hf_runner: type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
): ):
model_test_info = VLM_TEST_SETTINGS[model_type] model_test_info = VLM_TEST_SETTINGS[model_type]
runners.run_custom_inputs_test( runners.run_custom_inputs_test(
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
import os import os
import re import re
from typing import List, Optional, Tuple, Type from typing import Optional
import pytest import pytest
from transformers import AutoTokenizer from transformers import AutoTokenizer
...@@ -25,7 +25,7 @@ HF_MULTIIMAGE_IMAGE_PROMPT = "<|user|>\n<|image_1|>\n<|image_2|>\nDescribe these ...@@ -25,7 +25,7 @@ HF_MULTIIMAGE_IMAGE_PROMPT = "<|user|>\n<|image_1|>\n<|image_2|>\nDescribe these
models = ["microsoft/Phi-3.5-vision-instruct"] models = ["microsoft/Phi-3.5-vision-instruct"]
def vllm_to_hf_output(vllm_output: Tuple[List[int], str, def vllm_to_hf_output(vllm_output: tuple[list[int], str,
Optional[SampleLogprobs]], Optional[SampleLogprobs]],
model: str): model: str):
"""Sanitize vllm output to be comparable with hf output.""" """Sanitize vllm output to be comparable with hf output."""
...@@ -55,9 +55,9 @@ if current_platform.is_rocm(): ...@@ -55,9 +55,9 @@ if current_platform.is_rocm():
def run_test( def run_test(
hf_runner: Type[HfRunner], hf_runner: type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
inputs: List[Tuple[List[str], PromptImageInput]], inputs: list[tuple[list[str], PromptImageInput]],
model: str, model: str,
*, *,
dtype: str, dtype: str,
......
...@@ -6,7 +6,7 @@ Run `pytest tests/models/test_mistral.py`. ...@@ -6,7 +6,7 @@ Run `pytest tests/models/test_mistral.py`.
import json import json
import uuid import uuid
from dataclasses import asdict from dataclasses import asdict
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple from typing import TYPE_CHECKING, Any, Optional
import pytest import pytest
from mistral_common.multimodal import download_image from mistral_common.multimodal import download_image
...@@ -38,7 +38,7 @@ IMG_URLS = [ ...@@ -38,7 +38,7 @@ IMG_URLS = [
PROMPT = "Describe each image in one short sentence." PROMPT = "Describe each image in one short sentence."
def _create_msg_format(urls: List[str]) -> List[Dict[str, Any]]: def _create_msg_format(urls: list[str]) -> list[dict[str, Any]]:
return [{ return [{
"role": "role":
"user", "user",
...@@ -54,7 +54,7 @@ def _create_msg_format(urls: List[str]) -> List[Dict[str, Any]]: ...@@ -54,7 +54,7 @@ def _create_msg_format(urls: List[str]) -> List[Dict[str, Any]]:
}] }]
def _create_msg_format_hf(urls: List[str]) -> List[Dict[str, Any]]: def _create_msg_format_hf(urls: list[str]) -> list[dict[str, Any]]:
return [{ return [{
"role": "role":
"user", "user",
...@@ -68,7 +68,7 @@ def _create_msg_format_hf(urls: List[str]) -> List[Dict[str, Any]]: ...@@ -68,7 +68,7 @@ def _create_msg_format_hf(urls: List[str]) -> List[Dict[str, Any]]:
}] }]
def _create_engine_inputs(urls: List[str]) -> TokensPrompt: def _create_engine_inputs(urls: list[str]) -> TokensPrompt:
msg = _create_msg_format(urls) msg = _create_msg_format(urls)
tokenizer = MistralTokenizer.from_model("pixtral") tokenizer = MistralTokenizer.from_model("pixtral")
...@@ -89,7 +89,7 @@ def _create_engine_inputs(urls: List[str]) -> TokensPrompt: ...@@ -89,7 +89,7 @@ def _create_engine_inputs(urls: List[str]) -> TokensPrompt:
return engine_inputs return engine_inputs
def _create_engine_inputs_hf(urls: List[str]) -> TextPrompt: def _create_engine_inputs_hf(urls: list[str]) -> TextPrompt:
msg = _create_msg_format_hf(urls) msg = _create_msg_format_hf(urls)
tokenizer = AutoProcessor.from_pretrained("mistral-community/pixtral-12b") tokenizer = AutoProcessor.from_pretrained("mistral-community/pixtral-12b")
...@@ -128,7 +128,7 @@ assert FIXTURES_PATH.exists() ...@@ -128,7 +128,7 @@ assert FIXTURES_PATH.exists()
FIXTURE_LOGPROBS_CHAT = FIXTURES_PATH / "pixtral_chat.json" FIXTURE_LOGPROBS_CHAT = FIXTURES_PATH / "pixtral_chat.json"
FIXTURE_LOGPROBS_ENGINE = FIXTURES_PATH / "pixtral_chat_engine.json" FIXTURE_LOGPROBS_ENGINE = FIXTURES_PATH / "pixtral_chat_engine.json"
OutputsLogprobs = List[Tuple[List[int], str, Optional[SampleLogprobs]]] OutputsLogprobs = list[tuple[list[int], str, Optional[SampleLogprobs]]]
# For the test author to store golden output in JSON # For the test author to store golden output in JSON
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from typing import Any, List, Optional, Tuple, Type, TypedDict, Union from typing import Any, Optional, TypedDict, Union
import numpy.typing as npt import numpy.typing as npt
import pytest import pytest
...@@ -69,21 +69,21 @@ class Qwen2VLPromptVideoEmbeddingInput(TypedDict): ...@@ -69,21 +69,21 @@ class Qwen2VLPromptVideoEmbeddingInput(TypedDict):
def batch_make_image_embeddings( def batch_make_image_embeddings(
image_batches: List[Union[Image.Image, List[Image.Image]]], processor, image_batches: list[Union[Image.Image, list[Image.Image]]], processor,
llm: VllmRunner) -> List[Qwen2VLPromptImageEmbeddingInput]: llm: VllmRunner) -> list[Qwen2VLPromptImageEmbeddingInput]:
"""batched image embeddings for Qwen2-VL """batched image embeddings for Qwen2-VL
This will infer all images' embeddings in a single batch, This will infer all images' embeddings in a single batch,
and split the result according to input batches. and split the result according to input batches.
image_batches: image_batches:
- Single-image batches: `List[Image.Image]` - Single-image batches: `list[Image.Image]`
- Multiple-image batches: `List[List[Image.Image]]]` - Multiple-image batches: `list[list[Image.Image]]]`
returns: `List[Qwen2VLPromptImageEmbeddingInput]` returns: `list[Qwen2VLPromptImageEmbeddingInput]`
""" """
image_batches_: List[Any] = image_batches[:] image_batches_: list[Any] = image_batches[:]
# convert single-image batches to multiple-image batches # convert single-image batches to multiple-image batches
for idx in range(len(image_batches_)): for idx in range(len(image_batches_)):
...@@ -93,7 +93,7 @@ def batch_make_image_embeddings( ...@@ -93,7 +93,7 @@ def batch_make_image_embeddings(
assert isinstance(image_batches_[idx], list) assert isinstance(image_batches_[idx], list)
# append all images into a list (as a batch) # append all images into a list (as a batch)
images: List[Image.Image] = [] images: list[Image.Image] = []
for image_batch in image_batches_: for image_batch in image_batches_:
images += image_batch images += image_batch
...@@ -121,7 +121,7 @@ def batch_make_image_embeddings( ...@@ -121,7 +121,7 @@ def batch_make_image_embeddings(
image_embeds = torch.concat(llm.apply_model(get_image_embeds)) image_embeds = torch.concat(llm.apply_model(get_image_embeds))
# split into original batches # split into original batches
result: List[Qwen2VLPromptImageEmbeddingInput] = [] result: list[Qwen2VLPromptImageEmbeddingInput] = []
image_counter = 0 image_counter = 0
embed_counter = 0 embed_counter = 0
for image_batch in image_batches_: for image_batch in image_batches_:
...@@ -153,7 +153,7 @@ def batch_make_image_embeddings( ...@@ -153,7 +153,7 @@ def batch_make_image_embeddings(
def batch_make_video_embeddings( def batch_make_video_embeddings(
video_batches: PromptVideoInput, processor, video_batches: PromptVideoInput, processor,
llm: VllmRunner) -> List[Qwen2VLPromptVideoEmbeddingInput]: llm: VllmRunner) -> list[Qwen2VLPromptVideoEmbeddingInput]:
"""batched video embeddings for Qwen2-VL """batched video embeddings for Qwen2-VL
A NDArray represents a single video's all frames. A NDArray represents a single video's all frames.
...@@ -162,21 +162,21 @@ def batch_make_video_embeddings( ...@@ -162,21 +162,21 @@ def batch_make_video_embeddings(
and split the result according to input batches. and split the result according to input batches.
video_batches: video_batches:
- Single-video batches: `List[NDArray]` - Single-video batches: `list[NDArray]`
- Multiple-video batches: `List[List[NDArray]]` - Multiple-video batches: `list[list[NDArray]]`
""" """
video_batches_: List[Any] = video_batches[:] video_batches_: list[Any] = video_batches[:]
for idx in range(len(video_batches_)): for idx in range(len(video_batches_)):
if not isinstance(video_batches_[idx], list): if not isinstance(video_batches_[idx], list):
single_video_batch: List[npt.NDArray] = [video_batches_[idx]] single_video_batch: list[npt.NDArray] = [video_batches_[idx]]
video_batches_[idx] = single_video_batch video_batches_[idx] = single_video_batch
assert isinstance(video_batches_[idx], list) assert isinstance(video_batches_[idx], list)
# append all videos into a list (as a batch) # append all videos into a list (as a batch)
videos: List[npt.NDArray] = [] videos: list[npt.NDArray] = []
for video_batch in video_batches_: for video_batch in video_batches_:
videos += video_batch videos += video_batch
...@@ -204,7 +204,7 @@ def batch_make_video_embeddings( ...@@ -204,7 +204,7 @@ def batch_make_video_embeddings(
video_embeds = torch.concat(llm.apply_model(get_image_embeds)) video_embeds = torch.concat(llm.apply_model(get_image_embeds))
# split into original batches # split into original batches
result: List[Qwen2VLPromptVideoEmbeddingInput] = [] result: list[Qwen2VLPromptVideoEmbeddingInput] = []
video_counter = 0 video_counter = 0
embed_counter = 0 embed_counter = 0
for video_batch in video_batches_: for video_batch in video_batches_:
...@@ -235,8 +235,8 @@ def batch_make_video_embeddings( ...@@ -235,8 +235,8 @@ def batch_make_video_embeddings(
def run_embedding_input_test( def run_embedding_input_test(
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
inputs: List[Tuple[List[str], PromptImageInput, PromptVideoInput]], inputs: list[tuple[list[str], PromptImageInput, PromptVideoInput]],
model: str, model: str,
*, *,
dtype: str, dtype: str,
...@@ -323,8 +323,8 @@ def test_qwen2_vl_image_embeddings_input(vllm_runner, image_assets, model, ...@@ -323,8 +323,8 @@ def test_qwen2_vl_image_embeddings_input(vllm_runner, image_assets, model,
num_logprobs: int) -> None: num_logprobs: int) -> None:
images = [asset.pil_image for asset in image_assets] images = [asset.pil_image for asset in image_assets]
inputs_per_case: List[Tuple[ inputs_per_case: list[tuple[
List[str], PromptImageInput, PromptVideoInput]] = [( list[str], PromptImageInput, PromptVideoInput]] = [(
[prompt for _ in size_factors], [prompt for _ in size_factors],
[rescale_image_size(image, factor) for factor in size_factors], [rescale_image_size(image, factor) for factor in size_factors],
[], [],
...@@ -365,7 +365,7 @@ def test_qwen2_vl_multiple_image_embeddings_input(vllm_runner, image_assets, ...@@ -365,7 +365,7 @@ def test_qwen2_vl_multiple_image_embeddings_input(vllm_runner, image_assets,
num_logprobs: int) -> None: num_logprobs: int) -> None:
images = [asset.pil_image for asset in image_assets] images = [asset.pil_image for asset in image_assets]
inputs_per_case: List[Tuple[List[str], PromptImageInput, inputs_per_case: list[tuple[list[str], PromptImageInput,
PromptVideoInput]] = [( PromptVideoInput]] = [(
[MULTIIMAGE_PROMPT for _ in size_factors], [MULTIIMAGE_PROMPT for _ in size_factors],
[[ [[
...@@ -413,8 +413,8 @@ def test_qwen2_vl_video_embeddings_input(vllm_runner, video_assets, model, ...@@ -413,8 +413,8 @@ def test_qwen2_vl_video_embeddings_input(vllm_runner, video_assets, model,
for asset in video_assets for asset in video_assets
] ]
inputs_per_case: List[Tuple[ inputs_per_case: list[tuple[
List[str], PromptImageInput, PromptVideoInput]] = [( list[str], PromptImageInput, PromptVideoInput]] = [(
[prompt for _ in size_factors], [prompt for _ in size_factors],
[], [],
[rescale_video_size(video, factor) for factor in size_factors], [rescale_video_size(video, factor) for factor in size_factors],
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Helpers for building inputs that can be leveraged for different test types. """Helpers for building inputs that can be leveraged for different test types.
""" """
from collections.abc import Iterable
from pathlib import PosixPath from pathlib import PosixPath
from typing import Callable, Iterable, List, Optional, Tuple, Union from typing import Callable, Optional, Union
import torch import torch
...@@ -33,7 +34,7 @@ def replace_test_placeholder(prompt: str, img_idx_to_prompt: Callable[[int], ...@@ -33,7 +34,7 @@ def replace_test_placeholder(prompt: str, img_idx_to_prompt: Callable[[int],
def get_model_prompts(base_prompts: Iterable[str], def get_model_prompts(base_prompts: Iterable[str],
img_idx_to_prompt: Optional[Callable[[int], str]], img_idx_to_prompt: Optional[Callable[[int], str]],
video_idx_to_prompt: Optional[Callable[[int], str]], video_idx_to_prompt: Optional[Callable[[int], str]],
prompt_formatter: Callable[[str], str]) -> List[str]: prompt_formatter: Callable[[str], str]) -> list[str]:
"""Given a model-agnostic base prompt and test configuration for a model(s) """Given a model-agnostic base prompt and test configuration for a model(s)
to be tested, update the media placeholders and apply the prompt formatting to be tested, update the media placeholders and apply the prompt formatting
to get the test prompt string for this model. to get the test prompt string for this model.
...@@ -218,7 +219,7 @@ def build_video_inputs_from_test_info( ...@@ -218,7 +219,7 @@ def build_video_inputs_from_test_info(
) for video, prompt in zip(sampled_vids, model_prompts)] ) for video, prompt in zip(sampled_vids, model_prompts)]
def apply_image_size_scaling(image, size: Union[float, Tuple[int, int]], def apply_image_size_scaling(image, size: Union[float, tuple[int, int]],
size_type: SizeType): size_type: SizeType):
"""Applies a size scaler to one image; this can be a an image size factor, """Applies a size scaler to one image; this can be a an image size factor,
which scales the image while maintaining the aspect ratio""" which scales the image while maintaining the aspect ratio"""
......
...@@ -5,7 +5,7 @@ handling multimodal placeholder substitution, and so on. ...@@ -5,7 +5,7 @@ handling multimodal placeholder substitution, and so on.
""" """
import itertools import itertools
from collections import OrderedDict from collections import OrderedDict
from typing import Dict, Iterable, Tuple from collections.abc import Iterable
import pytest import pytest
...@@ -13,9 +13,9 @@ from .types import (EMBEDDING_SIZE_FACTORS, ExpandableVLMTestArgs, ...@@ -13,9 +13,9 @@ from .types import (EMBEDDING_SIZE_FACTORS, ExpandableVLMTestArgs,
ImageSizeWrapper, SizeType, VLMTestInfo, VLMTestType) ImageSizeWrapper, SizeType, VLMTestInfo, VLMTestType)
def get_filtered_test_settings(test_settings: Dict[str, VLMTestInfo], def get_filtered_test_settings(test_settings: dict[str, VLMTestInfo],
test_type: VLMTestType, test_type: VLMTestType,
fork_per_test: bool) -> Dict[str, VLMTestInfo]: fork_per_test: bool) -> dict[str, VLMTestInfo]:
"""Given the dict of potential test settings to run, return a subdict """Given the dict of potential test settings to run, return a subdict
of tests who have the current test type enabled with the matching val for of tests who have the current test type enabled with the matching val for
fork_per_test. fork_per_test.
...@@ -49,7 +49,7 @@ def get_filtered_test_settings(test_settings: Dict[str, VLMTestInfo], ...@@ -49,7 +49,7 @@ def get_filtered_test_settings(test_settings: Dict[str, VLMTestInfo],
return matching_tests return matching_tests
def get_parametrized_options(test_settings: Dict[str, VLMTestInfo], def get_parametrized_options(test_settings: dict[str, VLMTestInfo],
test_type: VLMTestType, test_type: VLMTestType,
fork_new_process_for_each_test: bool): fork_new_process_for_each_test: bool):
"""Converts all of our VLMTestInfo into an expanded list of parameters. """Converts all of our VLMTestInfo into an expanded list of parameters.
...@@ -121,7 +121,7 @@ def get_parametrized_options(test_settings: Dict[str, VLMTestInfo], ...@@ -121,7 +121,7 @@ def get_parametrized_options(test_settings: Dict[str, VLMTestInfo],
def get_wrapped_test_sizes( def get_wrapped_test_sizes(
test_info: VLMTestInfo, test_info: VLMTestInfo,
test_type: VLMTestType) -> Tuple[ImageSizeWrapper, ...]: test_type: VLMTestType) -> tuple[ImageSizeWrapper, ...]:
"""Given a test info which may have size factors or fixed sizes, wrap them """Given a test info which may have size factors or fixed sizes, wrap them
and combine them into an iterable, each of which will be used in parameter and combine them into an iterable, each of which will be used in parameter
expansion. expansion.
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""Core test implementation to be shared across modalities.""" """Core test implementation to be shared across modalities."""
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union from typing import Any, Callable, Optional, Union
import torch import torch
from PIL.Image import Image from PIL.Image import Image
...@@ -17,9 +17,9 @@ from .types import RunnerOutput ...@@ -17,9 +17,9 @@ from .types import RunnerOutput
def run_test( def run_test(
*, *,
hf_runner: Type[HfRunner], hf_runner: type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: type[VllmRunner],
inputs: List[Tuple[List[str], List[Union[List[Image], Image]]]], inputs: list[tuple[list[str], list[Union[list[Image], Image]]]],
model: str, model: str,
dtype: str, dtype: str,
max_tokens: int, max_tokens: int,
...@@ -29,15 +29,15 @@ def run_test( ...@@ -29,15 +29,15 @@ def run_test(
max_num_seqs: int, max_num_seqs: int,
hf_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]], hf_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]],
vllm_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]], vllm_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]],
auto_cls: Type[_BaseAutoModelClass], auto_cls: type[_BaseAutoModelClass],
use_tokenizer_eos: bool, use_tokenizer_eos: bool,
postprocess_inputs: Callable[[BatchEncoding], BatchEncoding], postprocess_inputs: Callable[[BatchEncoding], BatchEncoding],
comparator: Callable[..., None], comparator: Callable[..., None],
get_stop_token_ids: Optional[Callable[[AnyTokenizer], list[int]]], get_stop_token_ids: Optional[Callable[[AnyTokenizer], list[int]]],
stop_str: Optional[List[str]], stop_str: Optional[list[str]],
limit_mm_per_prompt: Dict[str, int], limit_mm_per_prompt: dict[str, int],
vllm_runner_kwargs: Optional[Dict[str, Any]], vllm_runner_kwargs: Optional[dict[str, Any]],
hf_model_kwargs: Optional[Dict[str, Any]], hf_model_kwargs: Optional[dict[str, Any]],
patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]], patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]],
task: TaskOption = "auto", task: TaskOption = "auto",
runner_mm_key: str = "images", runner_mm_key: str = "images",
...@@ -61,7 +61,7 @@ def run_test( ...@@ -61,7 +61,7 @@ def run_test(
# if we run HF first, the cuda initialization will be done and it # if we run HF first, the cuda initialization will be done and it
# will hurt multiprocessing backend with fork method (the default method). # will hurt multiprocessing backend with fork method (the default method).
vllm_runner_kwargs_: Dict[str, Any] = {} vllm_runner_kwargs_: dict[str, Any] = {}
if model_info.tokenizer: if model_info.tokenizer:
vllm_runner_kwargs_["tokenizer"] = model_info.tokenizer vllm_runner_kwargs_["tokenizer"] = model_info.tokenizer
if model_info.tokenizer_mode: if model_info.tokenizer_mode:
...@@ -84,7 +84,7 @@ def run_test( ...@@ -84,7 +84,7 @@ def run_test(
**vllm_runner_kwargs_) as vllm_model: **vllm_runner_kwargs_) as vllm_model:
tokenizer = vllm_model.model.get_tokenizer() tokenizer = vllm_model.model.get_tokenizer()
vllm_kwargs: Dict[str, Any] = {} vllm_kwargs: dict[str, Any] = {}
if get_stop_token_ids is not None: if get_stop_token_ids is not None:
vllm_kwargs["stop_token_ids"] = get_stop_token_ids(tokenizer) vllm_kwargs["stop_token_ids"] = get_stop_token_ids(tokenizer)
if stop_str: if stop_str:
......
...@@ -6,7 +6,7 @@ typically specific to a small subset of models. ...@@ -6,7 +6,7 @@ typically specific to a small subset of models.
import re import re
import types import types
from pathlib import PosixPath from pathlib import PosixPath
from typing import Callable, List, Optional, Tuple, Union from typing import Callable, Optional, Union
import torch import torch
from PIL.Image import Image from PIL.Image import Image
...@@ -49,7 +49,7 @@ def fuyu_vllm_to_hf_output(vllm_output: RunnerOutput, ...@@ -49,7 +49,7 @@ def fuyu_vllm_to_hf_output(vllm_output: RunnerOutput,
def qwen_vllm_to_hf_output( def qwen_vllm_to_hf_output(
vllm_output: RunnerOutput, vllm_output: RunnerOutput,
model: str) -> Tuple[List[int], str, Optional[SampleLogprobs]]: model: str) -> tuple[list[int], str, Optional[SampleLogprobs]]:
"""Sanitize vllm output [qwen models] to be comparable with hf output.""" """Sanitize vllm output [qwen models] to be comparable with hf output."""
output_ids, output_str, out_logprobs = vllm_output output_ids, output_str, out_logprobs = vllm_output
...@@ -60,7 +60,7 @@ def qwen_vllm_to_hf_output( ...@@ -60,7 +60,7 @@ def qwen_vllm_to_hf_output(
def qwen2_vllm_to_hf_output( def qwen2_vllm_to_hf_output(
vllm_output: RunnerOutput, vllm_output: RunnerOutput,
model: str) -> Tuple[List[int], str, Optional[SampleLogprobs]]: model: str) -> tuple[list[int], str, Optional[SampleLogprobs]]:
"""Sanitize vllm output [qwen2 models] to be comparable with hf output.""" """Sanitize vllm output [qwen2 models] to be comparable with hf output."""
output_ids, output_str, out_logprobs = vllm_output output_ids, output_str, out_logprobs = vllm_output
...@@ -78,7 +78,7 @@ def llava_image_vllm_to_hf_output(vllm_output: RunnerOutput, ...@@ -78,7 +78,7 @@ def llava_image_vllm_to_hf_output(vllm_output: RunnerOutput,
def llava_video_vllm_to_hf_output( def llava_video_vllm_to_hf_output(
vllm_output: RunnerOutput, vllm_output: RunnerOutput,
model: str) -> Tuple[List[int], str, Optional[SampleLogprobs]]: model: str) -> tuple[list[int], str, Optional[SampleLogprobs]]:
config = AutoConfig.from_pretrained(model) config = AutoConfig.from_pretrained(model)
mm_token_id = config.video_token_index mm_token_id = config.video_token_index
return _llava_vllm_to_hf_output(vllm_output, model, mm_token_id) return _llava_vllm_to_hf_output(vllm_output, model, mm_token_id)
...@@ -247,7 +247,7 @@ def molmo_post_processor(hf_inputs: BatchEncoding, dtype: str): ...@@ -247,7 +247,7 @@ def molmo_post_processor(hf_inputs: BatchEncoding, dtype: str):
####### Prompt path encoders for models that need models on disk ####### Prompt path encoders for models that need models on disk
def qwen_prompt_path_encoder( def qwen_prompt_path_encoder(
tmp_path: PosixPath, prompt: str, assets: Union[List[ImageAsset], tmp_path: PosixPath, prompt: str, assets: Union[list[ImageAsset],
_ImageAssets]) -> str: _ImageAssets]) -> str:
"""Given a temporary dir path, export one or more image assets into the """Given a temporary dir path, export one or more image assets into the
tempdir & replace its contents with the local path to the string so that tempdir & replace its contents with the local path to the string so that
...@@ -257,7 +257,7 @@ def qwen_prompt_path_encoder( ...@@ -257,7 +257,7 @@ def qwen_prompt_path_encoder(
Args: Args:
tmp_path: Tempdir for test under consideration. tmp_path: Tempdir for test under consideration.
prompt: Prompt with image placeholders. prompt: Prompt with image placeholders.
assets: List of image assets whose len equals the num placeholders. assets: list of image assets whose len equals the num placeholders.
""" """
# Ensure that the number of placeholders matches the number of assets; # Ensure that the number of placeholders matches the number of assets;
# If this is not true, the test is probably written incorrectly. # If this is not true, the test is probably written incorrectly.
...@@ -350,7 +350,7 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner: ...@@ -350,7 +350,7 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
self.max_num = self.config.max_dynamic_patch self.max_num = self.config.max_dynamic_patch
self.image_size = self.vision_config.image_size self.image_size = self.vision_config.image_size
def __call__(self, text: str, images: Union[Image, List[Image]], def __call__(self, text: str, images: Union[Image, list[Image]],
**kwargs): **kwargs):
# yapf: disable # yapf: disable
from vllm.model_executor.models.h2ovl import ( from vllm.model_executor.models.h2ovl import (
...@@ -410,7 +410,7 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner: ...@@ -410,7 +410,7 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
self.max_num = self.config.max_dynamic_patch self.max_num = self.config.max_dynamic_patch
self.image_size = self.vision_config.image_size self.image_size = self.vision_config.image_size
def __call__(self, text: str, images: Union[Image, List[Image]], def __call__(self, text: str, images: Union[Image, list[Image]],
**kwargs): **kwargs):
from vllm.model_executor.models.internvl import ( from vllm.model_executor.models.internvl import (
IMG_CONTEXT, IMG_END, IMG_START, IMG_CONTEXT, IMG_END, IMG_START,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment