Update deprecated Python 3.8 typing (#13971)

cf069aa8 · Harry Mellor · GitHub · bf33700e · cf069aa8 · cf069aa8
Unverified Commit cf069aa8 authored Mar 03, 2025 by Harry Mellor Committed by GitHub Mar 02, 2025
20 changed files
--- a/tests/models/decoder_only/vision_language/vlm_utils/runners.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/runners.py
@@ -3,7 +3,6 @@
 types / modalities.
 """
 from pathlib import PosixPath
-from typing import Type

 from .....conftest import HfRunner, VllmRunner, _ImageAssets, _VideoAssets
 from . import builders, core
@@ -13,8 +12,8 @@ from .types import ExpandableVLMTestArgs, VLMTestInfo
 ####### Entrypoints for running different test types
 def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
                          test_case: ExpandableVLMTestArgs,
-                          hf_runner: Type[HfRunner],
-                          vllm_runner: Type[VllmRunner],
+                          hf_runner: type[HfRunner],
+                          vllm_runner: type[VllmRunner],
                          image_assets: _ImageAssets):
    assert test_case.size_wrapper is not None
    inputs = builders.build_single_image_inputs_from_test_info(
@@ -36,8 +35,8 @@ def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,

 def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
                         test_case: ExpandableVLMTestArgs,
-                         hf_runner: Type[HfRunner],
-                         vllm_runner: Type[VllmRunner],
+                         hf_runner: type[HfRunner],
+                         vllm_runner: type[VllmRunner],
                         image_assets: _ImageAssets):
    assert test_case.size_wrapper is not None
    inputs = builders.build_multi_image_inputs_from_test_info(
@@ -59,8 +58,8 @@ def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,

 def run_embedding_test(*, model_test_info: VLMTestInfo,
                       test_case: ExpandableVLMTestArgs,
-                       hf_runner: Type[HfRunner],
-                       vllm_runner: Type[VllmRunner],
+                       hf_runner: type[HfRunner],
+                       vllm_runner: type[VllmRunner],
                       image_assets: _ImageAssets):
    assert test_case.size_wrapper is not None
    inputs, vllm_embeddings = builders.build_embedding_inputs_from_test_info(
@@ -85,8 +84,8 @@ def run_video_test(
    *,
    model_test_info: VLMTestInfo,
    test_case: ExpandableVLMTestArgs,
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
    video_assets: _VideoAssets,
 ):
    assert test_case.size_wrapper is not None
@@ -111,8 +110,8 @@ def run_video_test(

 def run_custom_inputs_test(*, model_test_info: VLMTestInfo,
                           test_case: ExpandableVLMTestArgs,
-                           hf_runner: Type[HfRunner],
-                           vllm_runner: Type[VllmRunner]):
+                           hf_runner: type[HfRunner],
+                           vllm_runner: type[VllmRunner]):
    # Custom test cases can provide inputs directly, but they need to
    # explicitly provided a CustomTestConfig, which wraps the inputs and
    # the limit_mm_per_prompt

--- a/tests/models/decoder_only/vision_language/vlm_utils/types.py
+++ b/tests/models/decoder_only/vision_language/vlm_utils/types.py
 # SPDX-License-Identifier: Apache-2.0
 """Types for writing multimodal model tests."""
+from collections.abc import Iterable
 from enum import Enum
 from pathlib import PosixPath
-from typing import (Any, Callable, Dict, Iterable, List, NamedTuple, Optional,
-                    Tuple, Type, Union)
+from typing import Any, Callable, NamedTuple, Optional, Union

 import torch
 from PIL.Image import Image
@@ -35,7 +35,7 @@ VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"

 IMAGE_SIZE_FACTORS = [(), (1.0, ), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)]
 EMBEDDING_SIZE_FACTORS = [(), (1.0, ), (1.0, 1.0, 1.0)]
-RunnerOutput = Tuple[List[int], str, Optional[SampleLogprobs]]
+RunnerOutput = tuple[list[int], str, Optional[SampleLogprobs]]
 # yapf: enable


@@ -53,8 +53,8 @@ class SizeType(Enum):


 class CustomTestOptions(NamedTuple):
-    inputs: List[Tuple[List[str], List[Union[List[Image], Image]]]]
-    limit_mm_per_prompt: Dict[str, int]
+    inputs: list[tuple[list[str], list[Union[list[Image], Image]]]]
+    limit_mm_per_prompt: dict[str, int]
    # kwarg to pass multimodal data in as to vllm/hf runner instances.
    runner_mm_key: str = "images"

@@ -63,13 +63,13 @@ class ImageSizeWrapper(NamedTuple):
    type: SizeType
    # A size factor is a wrapper of 0+ floats,
    # while a fixed size contains an iterable of integer pairs
-    data: Union[Iterable[float], Iterable[Tuple[int, int]]]
+    data: Union[Iterable[float], Iterable[tuple[int, int]]]


 class VLMTestInfo(NamedTuple):
    """Holds the configuration for 1+ tests for one model architecture."""

-    models: List[str]
+    models: list[str]
    test_type: Union[VLMTestType, Iterable[VLMTestType]]

    # Should be None only if this is a CUSTOM_INPUTS test
@@ -97,19 +97,19 @@ class VLMTestInfo(NamedTuple):
    max_num_seqs: int = 256
    task: TaskOption = "auto"
    tensor_parallel_size: int = 1
-    vllm_runner_kwargs: Optional[Dict[str, Any]] = None
+    vllm_runner_kwargs: Optional[dict[str, Any]] = None

    # Optional callable which gets a list of token IDs from the model tokenizer
    get_stop_token_ids: Optional[Callable[[AnyTokenizer], list[int]]] = None
    # Optional list of strings to stop generation, useful when stop tokens are
    # not special tokens in the tokenizer
-    stop_str: Optional[List[str]] = None
+    stop_str: Optional[list[str]] = None

    # Exposed options for HF runner
-    hf_model_kwargs: Optional[Dict[str, Any]] = None
+    hf_model_kwargs: Optional[dict[str, Any]] = None
    # Indicates we should explicitly pass the EOS from the tokenizer
    use_tokenizer_eos: bool = False
-    auto_cls: Type[_BaseAutoModelClass] = AutoModelForCausalLM
+    auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM
    # Callable to pass to the HF runner to run on inputs; for now, we also pass
    # the data type to input post processing, because almost all of the uses of
    # postprocess_inputs are to fix the data types of BatchEncoding values.
@@ -128,12 +128,12 @@ class VLMTestInfo(NamedTuple):
    # Default expandable params per test; these defaults can be overridden in
    # instances of this object; the complete set of test cases for the model
    # is all combinations of .models + all fields below
-    max_tokens: Union[int, Tuple[int]] = 128
-    num_logprobs: Union[int, Tuple[int]] = 5
+    max_tokens: Union[int, tuple[int]] = 128
+    num_logprobs: Union[int, tuple[int]] = 5
    dtype: Union[str, Iterable[str]] = "half"
    distributed_executor_backend: Optional[Union[str, Iterable[str]]] = None
    # Only expanded in video tests
-    num_video_frames: Union[int, Tuple[int]] = 16
+    num_video_frames: Union[int, tuple[int]] = 16

    # Fixed image sizes / image size factors; most tests use image_size_factors
    # The values provided for these two fields will be stacked and expanded
@@ -141,19 +141,19 @@ class VLMTestInfo(NamedTuple):
    # once per tests (much like concatenating and wrapping in one parametrize
    # call)
    image_size_factors: Iterable[Iterable[float]] = IMAGE_SIZE_FACTORS
-    image_sizes: Optional[Iterable[Iterable[Tuple[int, int]]]] = None
+    image_sizes: Optional[Iterable[Iterable[tuple[int, int]]]] = None

    # Hack for updating a prompt to take into a local path; currently only used
    # for Qwen-VL, which requires encoding the image path / url into the prompt
    # for HF runner
    prompt_path_encoder: Optional[
-        Callable[[PosixPath, str, Union[List[ImageAsset], _ImageAssets]],
+        Callable[[PosixPath, str, Union[list[ImageAsset], _ImageAssets]],
                 str]] = None  # noqa: E501

    # Allows configuring a test to run with custom inputs
-    custom_test_opts: Optional[List[CustomTestOptions]] = None
+    custom_test_opts: Optional[list[CustomTestOptions]] = None

-    marks: Optional[List[MarkDecorator]] = None
+    marks: Optional[list[MarkDecorator]] = None

    def get_non_parametrized_runner_kwargs(self):
        """Returns a dictionary of expandable kwargs for items that are used

--- a/tests/models/embedding/language/test_gritlm.py
+++ b/tests/models/embedding/language/test_gritlm.py
@@ -3,7 +3,6 @@
 import importlib.util
 import math
 from array import array
-from typing import List

 import openai
 import pytest
@@ -81,14 +80,14 @@ async def client_generate(server_generate: RemoteOpenAIServer):
        yield async_client


-def run_llm_encode(llm: vllm.LLM, queries: List[str],
-                   instruction: str) -> List[float]:
+def run_llm_encode(llm: vllm.LLM, queries: list[str],
+                   instruction: str) -> list[float]:
    outputs = llm.encode([instruction + q for q in queries], )
    return [output.outputs.embedding for output in outputs]


-async def run_client_embeddings(client: vllm.LLM, queries: List[str],
-                                instruction: str) -> List[float]:
+async def run_client_embeddings(client: vllm.LLM, queries: list[str],
+                                instruction: str) -> list[float]:
    outputs = await client.embeddings.create(
        model=MODEL_NAME,
        input=[instruction + q for q in queries],
@@ -123,7 +122,7 @@ def get_test_data():
    return queries, q_instruction, documents, d_instruction


-def validate_embed_output(q_rep: List[float], d_rep: List[float]):
+def validate_embed_output(q_rep: list[float], d_rep: list[float]):
    cosine_sim_q0_d0 = 1 - cosine(q_rep[0], d_rep[0])
    assert math.isclose(cosine_sim_q0_d0, 0.609, abs_tol=0.001)


--- a/tests/models/embedding/utils.py
+++ b/tests/models/embedding/utils.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import List, Sequence
+from collections.abc import Sequence

 import torch
 import torch.nn.functional as F
@@ -8,8 +8,8 @@ import torch.nn.functional as F

 def check_embeddings_close(
    *,
-    embeddings_0_lst: Sequence[List[float]],
-    embeddings_1_lst: Sequence[List[float]],
+    embeddings_0_lst: Sequence[list[float]],
+    embeddings_1_lst: Sequence[list[float]],
    name_0: str,
    name_1: str,
    tol: float = 1e-3,

--- a/tests/models/embedding/vision_language/test_dse_qwen2_vl.py
+++ b/tests/models/embedding/vision_language/test_dse_qwen2_vl.py
 # SPDX-License-Identifier: Apache-2.0

 from functools import partial
-from typing import Callable, Dict, List, Type
+from typing import Callable

 import pytest
 import torch
@@ -67,7 +67,7 @@ def get_messages(image: Image.Image, text: str, embed_text: bool):


 def apply_chat_template_and_add_eos(
-    messages: List[Dict],
+    messages: list[dict],
    apply_chat_template_fn: Callable,
 ):
    prompt = apply_chat_template_fn(
@@ -80,11 +80,11 @@ def postprocess_inputs(hf_model: HfRunner, inputs: BatchEncoding, **kwargs):


 def _run_test(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
-    input_texts: List[str],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
+    input_texts: list[str],
    input_images: PromptImageInput,
-    embed_texts: List[bool],
+    embed_texts: list[bool],
    model: str,
    *,
    dtype: str,

--- a/tests/models/embedding/vision_language/test_llava_next.py
+++ b/tests/models/embedding/vision_language/test_llava_next.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import List, Type
-
 import pytest
 import torch.nn.functional as F
 from transformers import AutoModelForVision2Seq
@@ -35,9 +33,9 @@ MODELS = ["royokong/e5-v"]


 def _run_test(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
-    input_texts: List[str],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
+    input_texts: list[str],
    input_images: PromptImageInput,
    model: str,
    *,

--- a/tests/models/embedding/vision_language/test_phi3v.py
+++ b/tests/models/embedding/vision_language/test_phi3v.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import List, Type
-
 import pytest
 import torch.nn.functional as F

@@ -29,9 +27,9 @@ MODELS = ["TIGER-Lab/VLM2Vec-Full"]


 def _run_test(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
-    input_texts: List[str],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
+    input_texts: list[str],
    input_images: PromptImageInput,
    model: str,
    *,

--- a/tests/models/encoder_decoder/language/test_bart.py
+++ b/tests/models/encoder_decoder/language/test_bart.py
@@ -3,7 +3,7 @@

 Run `pytest tests/models/encoder_decoder/language/test_bart.py`.
 """
-from typing import List, Optional, Tuple, Type
+from typing import Optional

 import pytest
 from transformers import AutoModelForSeq2SeqLM
@@ -17,7 +17,7 @@ from ...utils import check_logprobs_close


 def vllm_to_hf_output(
-    vllm_output: Tuple[List[int], str, Optional[SampleLogprobs]],
+    vllm_output: tuple[list[int], str, Optional[SampleLogprobs]],
    decoder_prompt_type: DecoderPromptType,
 ):
    """Sanitize vllm output to be comparable with hf output."""
@@ -31,9 +31,9 @@ def vllm_to_hf_output(


 def run_test(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
-    prompts: List[ExplicitEncoderDecoderPrompt[str, str]],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
+    prompts: list[ExplicitEncoderDecoderPrompt[str, str]],
    decoder_prompt_type: DecoderPromptType,
    model: str,
    *,

--- a/tests/models/encoder_decoder/vision_language/test_florence2.py
+++ b/tests/models/encoder_decoder/vision_language/test_florence2.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import Optional, Type
+from typing import Optional

 import pytest
 from PIL import Image
@@ -51,8 +51,8 @@ def hf_to_vllm_output(hf_output: tuple[list[int], str,


 def run_test(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
    inputs: list[list[ExplicitEncoderDecoderPrompt]],
    model: str,
    *,
@@ -114,7 +114,7 @@ def run_test(
 @pytest.mark.parametrize("dtype", ["float"])
 @pytest.mark.parametrize("max_tokens", [64])
 @pytest.mark.parametrize("num_logprobs", [5])
-def test_models(hf_runner: Type[HfRunner], vllm_runner: Type[VllmRunner],
+def test_models(hf_runner: type[HfRunner], vllm_runner: type[VllmRunner],
                image_assets: _ImageAssets, model: str,
                size_factors: list[int], dtype: str, max_tokens: int,
                num_logprobs: int) -> None:

--- a/tests/models/encoder_decoder/vision_language/test_mllama.py
+++ b/tests/models/encoder_decoder/vision_language/test_mllama.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import List, Optional, Tuple, Type, overload
+from typing import Optional, overload

 import pytest
 import torch
@@ -64,7 +64,7 @@ prompt_data = {
 }


-def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
+def vllm_to_hf_output(vllm_output: tuple[list[int], str,
                                         Optional[SampleLogprobs]],
                      model: str):
    """Sanitize vllm output to be comparable with hf output."""
@@ -91,9 +91,9 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
 def _get_inputs(
    image_assets: _ImageAssets,
    *,
-    size_factors: Optional[List[float]] = None,
-    sizes: Optional[List[Tuple[int, int]]] = None,
-) -> List[Tuple[List[str], PromptImageInput]]:
+    size_factors: Optional[list[float]] = None,
+    sizes: Optional[list[tuple[int, int]]] = None,
+) -> list[tuple[list[str], PromptImageInput]]:
    images = [asset.pil_image for asset in image_assets]

    if size_factors is not None:
@@ -123,12 +123,12 @@ def _get_inputs(

 @overload
 def run_test(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
    image_assets: _ImageAssets,
    model: str,
    *,
-    size_factors: List[float],
+    size_factors: list[float],
    dtype: str,
    max_tokens: int,
    num_logprobs: int,
@@ -140,12 +140,12 @@ def run_test(

 @overload
 def run_test(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
    image_assets: _ImageAssets,
    model: str,
    *,
-    sizes: List[Tuple[int, int]],
+    sizes: list[tuple[int, int]],
    dtype: str,
    max_tokens: int,
    num_logprobs: int,
@@ -156,13 +156,13 @@ def run_test(


 def run_test(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
    image_assets: _ImageAssets,
    model: str,
    *,
-    size_factors: Optional[List[float]] = None,
-    sizes: Optional[List[Tuple[int, int]]] = None,
+    size_factors: Optional[list[float]] = None,
+    sizes: Optional[list[tuple[int, int]]] = None,
    dtype: str,
    max_tokens: int,
    num_logprobs: int,
@@ -183,9 +183,9 @@ def run_test(


 def _run_test(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
-    inputs: List[Tuple[List[str], PromptImageInput]],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
+    inputs: list[tuple[list[str], PromptImageInput]],
    model: str,
    *,
    dtype: str,

--- a/tests/models/multimodal/processing/test_h2ovl.py
+++ b/tests/models/multimodal/processing/test_h2ovl.py
 # SPDX-License-Identifier: Apache-2.0
 """Tests for H2OVL's multimodal preprocessing kwargs."""
-from typing import Mapping, Optional
+from collections.abc import Mapping
+from typing import Optional

 import pytest
 from PIL import Image

--- a/tests/models/multimodal/processing/test_internvl.py
+++ b/tests/models/multimodal/processing/test_internvl.py
 # SPDX-License-Identifier: Apache-2.0
 """Tests for InternVL's multimodal preprocessing kwargs."""
-from typing import Mapping, Optional
+from collections.abc import Mapping
+from typing import Optional

 import pytest
 from PIL import Image

--- a/tests/models/registry.py
+++ b/tests/models/registry.py
 # SPDX-License-Identifier: Apache-2.0

+from collections.abc import Mapping, Set
 from dataclasses import dataclass, field
-from typing import AbstractSet, Any, Literal, Mapping, Optional
+from typing import Any, Literal, Optional

 import pytest
 from packaging.version import Version
@@ -324,7 +325,7 @@ class HfExampleModels:

        self.hf_models = hf_models

-    def get_supported_archs(self) -> AbstractSet[str]:
+    def get_supported_archs(self) -> Set[str]:
        return self.hf_models.keys()

    def get_hf_info(self, model_arch: str) -> _HfExamplesInfo:

--- a/tests/models/test_transformers.py
+++ b/tests/models/test_transformers.py
@@ -4,7 +4,6 @@
 Run `pytest tests/models/test_transformers.py`.
 """
 from contextlib import nullcontext
-from typing import Type

 import pytest

@@ -14,8 +13,8 @@ from .utils import check_logprobs_close


 def check_implementation(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
    example_prompts: list[str],
    model: str,
    **kwargs,
@@ -47,8 +46,8 @@ def check_implementation(
        ("ArthurZ/Ilama-3.2-1B", "auto"),  # CUSTOM CODE
    ])  # trust_remote_code=True by default
 def test_models(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
    example_prompts: list[str],
    model: str,
    model_impl: str,
@@ -71,8 +70,8 @@ def test_models(

 @multi_gpu_test(num_gpus=2)
 def test_distributed(
-    hf_runner: Type[HfRunner],
-    vllm_runner: Type[VllmRunner],
+    hf_runner: type[HfRunner],
+    vllm_runner: type[VllmRunner],
    example_prompts,
 ):
    kwargs = {"model_impl": "transformers", "tensor_parallel_size": 2}
@@ -92,7 +91,7 @@ def test_distributed(
 @pytest.mark.parametrize("max_tokens", [32])
 @pytest.mark.parametrize("num_logprobs", [5])
 def test_quantization(
-    vllm_runner: Type[VllmRunner],
+    vllm_runner: type[VllmRunner],
    example_prompts: list[str],
    model: str,
    quantization_kwargs: dict[str, str],

--- a/tests/models/utils.py
+++ b/tests/models/utils.py
 # SPDX-License-Identifier: Apache-2.0

 import warnings
-from typing import Dict, List, Optional, Sequence, Tuple, Union
+from collections.abc import Sequence
+from typing import Optional, Union

 import torch

@@ -9,7 +10,7 @@ from vllm.config import ModelConfig, TaskOption
 from vllm.inputs import InputContext
 from vllm.sequence import Logprob, PromptLogprobs, SampleLogprobs

-TokensText = Tuple[List[int], str]
+TokensText = tuple[list[int], str]


 def check_outputs_equal(
@@ -46,7 +47,7 @@ def check_outputs_equal(
 # * List of top sample logprobs for each sampled token
 #
 # Assumes prompt logprobs were not requested.
-TokensTextLogprobs = Tuple[List[int], str, Optional[Union[List[Dict[int,
+TokensTextLogprobs = tuple[list[int], str, Optional[Union[list[dict[int,
                                                                    float]],
                                                          SampleLogprobs]]]

@@ -57,8 +58,8 @@ TokensTextLogprobs = Tuple[List[int], str, Optional[Union[List[Dict[int,
 # * Optional list of top sample logprobs for each sampled token
 #
 # Assumes prompt logprobs were not requested.
-TextTextLogprobs = Tuple[List[str], str, Optional[Union[List[Dict[str, float]],
-                                                        List[Dict[str,
+TextTextLogprobs = tuple[list[str], str, Optional[Union[list[dict[str, float]],
+                                                        list[dict[str,
                                                                  Logprob]]]]]

 # Representation of generated sequence as a tuple of
@@ -68,9 +69,9 @@ TextTextLogprobs = Tuple[List[str], str, Optional[Union[List[Dict[str, float]],
 # * Optional list of top prompt logprobs for each prompt token
 #
 # Allows prompt logprobs to be requested.
-TokensTextLogprobsPromptLogprobs = Tuple[
-    List[int], str, Optional[Union[List[Dict[int, float]], SampleLogprobs]],
-    Optional[Union[List[Optional[Dict[int, float]]], PromptLogprobs]]]
+TokensTextLogprobsPromptLogprobs = tuple[
+    list[int], str, Optional[Union[list[dict[int, float]], SampleLogprobs]],
+    Optional[Union[list[Optional[dict[int, float]]], PromptLogprobs]]]


 def check_logprobs_close(
@@ -254,8 +255,8 @@ def build_model_context(
    tokenizer_name: Optional[str] = None,
    trust_remote_code: bool = False,
    dtype: Optional[Union[str, torch.dtype]] = None,
-    mm_processor_kwargs: Optional[Dict] = None,
-    limit_mm_per_prompt: Optional[Dict] = None,
+    mm_processor_kwargs: Optional[dict] = None,
+    limit_mm_per_prompt: Optional[dict] = None,
    disable_mm_preprocessor_cache: bool = True,
 ):
    """Creates an InputContext for a given model.

--- a/tests/mq_llm_engine/utils.py
+++ b/tests/mq_llm_engine/utils.py
@@ -2,7 +2,7 @@

 import asyncio
 import multiprocessing
-from typing import Callable, Tuple, Union
+from typing import Callable, Union

 from vllm import SamplingParams
 from vllm.engine.arg_utils import AsyncEngineArgs
@@ -16,7 +16,7 @@ async def generate(
        client: MQLLMEngineClient,
        request_id: str,
        num_tokens: int,
-        return_output: bool = False) -> Union[RequestOutput, Tuple[int, str]]:
+        return_output: bool = False) -> Union[RequestOutput, tuple[int, str]]:

    final_output = None
    count = 0

--- a/tests/multi_step/test_correctness_async_llm.py
+++ b/tests/multi_step/test_correctness_async_llm.py
 # SPDX-License-Identifier: Apache-2.0

 # Test the AsyncLLMEngine with multi-step-decoding
-from typing import List, Optional
+from typing import Optional

 import pytest

@@ -17,7 +17,7 @@ MODELS = [
 NUM_SCHEDULER_STEPS = [8]  # Multi-step decoding steps
 NUM_PROMPTS = [10]

-DEFAULT_SERVER_ARGS: List[str] = [
+DEFAULT_SERVER_ARGS: list[str] = [
    "--distributed-executor-backend",
    "ray",
    "--gpu-memory-utilization",

--- a/tests/multimodal/test_utils.py
+++ b/tests/multimodal/test_utils.py
@@ -4,7 +4,7 @@ import base64
 import mimetypes
 import os
 from tempfile import NamedTemporaryFile, TemporaryDirectory
-from typing import TYPE_CHECKING, Dict, NamedTuple, Optional, Tuple
+from typing import TYPE_CHECKING, NamedTuple, Optional

 import numpy as np
 import pytest
@@ -30,7 +30,7 @@ TEST_IMAGE_URLS = [


 @pytest.fixture(scope="module")
-def url_images() -> Dict[str, Image.Image]:
+def url_images() -> dict[str, Image.Image]:
    connector = MediaConnector()

    return {
@@ -39,7 +39,7 @@ def url_images() -> Dict[str, Image.Image]:
    }


-def get_supported_suffixes() -> Tuple[str, ...]:
+def get_supported_suffixes() -> tuple[str, ...]:
    # We should at least test the file types mentioned in GPT-4 with Vision
    OPENAI_SUPPORTED_SUFFIXES = ('.png', '.jpeg', '.jpg', '.webp', '.gif')

@@ -66,7 +66,7 @@ async def test_fetch_image_http(image_url: str):
 @pytest.mark.asyncio
 @pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
 @pytest.mark.parametrize("suffix", get_supported_suffixes())
-async def test_fetch_image_base64(url_images: Dict[str, Image.Image],
+async def test_fetch_image_base64(url_images: dict[str, Image.Image],
                                  image_url: str, suffix: str):
    connector = MediaConnector()
    url_image = url_images[image_url]

--- a/tests/neuron/test_logits_processor.py
+++ b/tests/neuron/test_logits_processor.py
 # SPDX-License-Identifier: Apache-2.0

 import random
-from typing import Tuple
 from unittest.mock import patch

 import pytest
@@ -33,7 +32,7 @@ class MockLogitsProcessor(LogitsProcessor):

 def _prepare_test(
        batch_size: int
-) -> Tuple[torch.Tensor, torch.Tensor, MockLogitsProcessor]:
+) -> tuple[torch.Tensor, torch.Tensor, MockLogitsProcessor]:
    vocab_size = 32000
    input_tensor = torch.rand((batch_size, 1024), dtype=torch.float16)
    fake_logits = torch.full((batch_size, vocab_size),

--- a/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py
+++ b/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_gemma_embedding.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import Iterable, Optional, Tuple, Union
+from collections.abc import Iterable
+from typing import Optional, Union

 import torch
 import torch.nn as nn
@@ -59,7 +60,7 @@ class MyGemma2Embedding(nn.Module):
    ) -> Optional[PoolerOutput]:
        return self._pooler(hidden_states, pooling_metadata)

-    def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):

        weights = self.hf_to_vllm_mapper.apply(weights)
        weights = ((name, data) for name, data in weights