Update deprecated Python 3.8 typing (#13971)

cf069aa8 · Harry Mellor · GitHub · bf33700e · cf069aa8 · cf069aa8
Unverified Commit cf069aa8 authored Mar 03, 2025 by Harry Mellor Committed by GitHub Mar 02, 2025
20 changed files
--- a/tests/engine/test_executor.py
+++ b/tests/engine/test_executor.py
@@ -2,7 +2,7 @@

 import asyncio
 import os
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Optional, Union

 import pytest

@@ -22,8 +22,8 @@ class CustomUniExecutor(UniProcExecutor):
    def collective_rpc(self,
                       method: Union[str, Callable],
                       timeout: Optional[float] = None,
-                       args: Tuple = (),
-                       kwargs: Optional[Dict] = None) -> List[Any]:
+                       args: tuple = (),
+                       kwargs: Optional[dict] = None) -> list[Any]:
        # Drop marker to show that this was ran
        with open(".marker", "w"):
            ...

--- a/tests/engine/test_multiproc_workers.py
+++ b/tests/engine/test_multiproc_workers.py
@@ -4,7 +4,7 @@ import asyncio
 from concurrent.futures import ThreadPoolExecutor
 from functools import partial
 from time import sleep
-from typing import Any, List, Tuple
+from typing import Any

 import pytest

@@ -17,7 +17,7 @@ from vllm.worker.worker_base import WorkerWrapperBase
 class DummyWorkerWrapper(WorkerWrapperBase):
    """Dummy version of vllm.worker.worker.Worker"""

-    def worker_method(self, worker_input: Any) -> Tuple[int, Any]:
+    def worker_method(self, worker_input: Any) -> tuple[int, Any]:
        sleep(0.05)

        if isinstance(worker_input, Exception):
@@ -27,7 +27,7 @@ class DummyWorkerWrapper(WorkerWrapperBase):
        return self.rpc_rank, input


-def _start_workers() -> Tuple[List[ProcessWorkerWrapper], WorkerMonitor]:
+def _start_workers() -> tuple[list[ProcessWorkerWrapper], WorkerMonitor]:
    result_handler = ResultHandler()
    vllm_config = VllmConfig()
    workers = [

--- a/tests/engine/test_stop_strings.py
+++ b/tests/engine/test_stop_strings.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import Any, List, Optional
+from typing import Any, Optional

 import pytest

@@ -21,8 +21,8 @@ def vllm_model(vllm_runner):
 def _test_stopping(llm_engine: LLMEngine,
                   expected_output: str,
                   expected_reason: Any,
-                   stop: Optional[List[str]] = None,
-                   stop_token_ids: Optional[List[int]] = None,
+                   stop: Optional[list[str]] = None,
+                   stop_token_ids: Optional[list[int]] = None,
                   include_in_output: bool = False,
                   use_async_output_proc: bool = False) -> None:
    llm_engine.add_request(

--- a/tests/entrypoints/llm/test_chat.py
+++ b/tests/entrypoints/llm/test_chat.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import List
-
 import pytest

 from vllm import LLM
@@ -63,7 +61,7 @@ def test_multi_chat():

 @pytest.mark.parametrize("image_urls",
                         [[TEST_IMAGE_URLS[0], TEST_IMAGE_URLS[1]]])
-def test_chat_multi_image(image_urls: List[str]):
+def test_chat_multi_image(image_urls: list[str]):
    llm = LLM(
        model="microsoft/Phi-3.5-vision-instruct",
        dtype="bfloat16",

--- a/tests/entrypoints/llm/test_encode.py
+++ b/tests/entrypoints/llm/test_encode.py
 # SPDX-License-Identifier: Apache-2.0

 import weakref
-from typing import List

 import pytest

@@ -45,8 +44,8 @@ def llm():
    cleanup_dist_env_and_memory()


-def assert_outputs_equal(o1: List[PoolingRequestOutput],
-                         o2: List[PoolingRequestOutput]):
+def assert_outputs_equal(o1: list[PoolingRequestOutput],
+                         o2: list[PoolingRequestOutput]):
    assert [o.outputs for o in o1] == [o.outputs for o in o2]



--- a/tests/entrypoints/llm/test_generate.py
+++ b/tests/entrypoints/llm/test_generate.py
 # SPDX-License-Identifier: Apache-2.0

 import weakref
-from typing import List

 import pytest

@@ -43,7 +42,7 @@ def llm():
    cleanup_dist_env_and_memory()


-def assert_outputs_equal(o1: List[RequestOutput], o2: List[RequestOutput]):
+def assert_outputs_equal(o1: list[RequestOutput], o2: list[RequestOutput]):
    assert [o.outputs for o in o1] == [o.outputs for o in o2]



--- a/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py
+++ b/tests/entrypoints/openai/correctness/test_transcription_api_correctness.py
@@ -10,7 +10,6 @@ import asyncio
 import io
 import time
 from statistics import mean, median
-from typing import List

 import librosa
 import pytest
@@ -67,7 +66,7 @@ async def process_dataset(model, client, data, concurrent_request):
    audio, sr = data[0]["audio"]["array"], data[0]["audio"]["sampling_rate"]
    _ = await bound_transcribe(model, sem, client, (audio, sr), "")

-    tasks: List[asyncio.Task] = []
+    tasks: list[asyncio.Task] = []
    for sample in data:
        audio, sr = sample["audio"]["array"], sample["audio"]["sampling_rate"]
        task = asyncio.create_task(

--- a/tests/entrypoints/openai/reasoning_parsers/test_deepseekr1_reasoning_parser.py
+++ b/tests/entrypoints/openai/reasoning_parsers/test_deepseekr1_reasoning_parser.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import List
-
 import pytest
 from transformers import AutoTokenizer

@@ -180,7 +178,7 @@ def test_reasoning(
 ):
    output = tokenizer.tokenize(param_dict["output"])
    # decode everything to tokens
-    output_tokens: List[str] = [
+    output_tokens: list[str] = [
        tokenizer.convert_tokens_to_string([token]) for token in output
    ]
    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(

--- a/tests/entrypoints/openai/reasoning_parsers/utils.py
+++ b/tests/entrypoints/openai/reasoning_parsers/utils.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import List, Optional, Tuple, Union
+from typing import Optional, Union

 from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              DeltaMessage)
@@ -33,10 +33,10 @@ class StreamingReasoningReconstructor:

 def run_reasoning_extraction(
    reasoning_parser: ReasoningParser,
-    model_output: List[str],
+    model_output: list[str],
    request: Union[ChatCompletionRequest, None] = None,
    streaming: bool = False,
-) -> Tuple[Optional[str], Optional[str]]:
+) -> tuple[Optional[str], Optional[str]]:
    if streaming:
        reconstructor = run_reasoning_extraction_streaming(
            reasoning_parser,
@@ -55,9 +55,9 @@ def run_reasoning_extraction(

 def run_reasoning_extraction_nonstreaming(
    reasoning_parser: ReasoningParser,
-    model_output: List[str],
+    model_output: list[str],
    request: Union[ChatCompletionRequest, None] = None,
-) -> Tuple[Optional[str], Optional[str]]:
+) -> tuple[Optional[str], Optional[str]]:
    request = request or ChatCompletionRequest(messages=[], model="test-model")
    return reasoning_parser.extract_reasoning_content(
        model_output=''.join(model_output), request=request)
@@ -65,13 +65,13 @@ def run_reasoning_extraction_nonstreaming(

 def run_reasoning_extraction_streaming(
    reasoning_parser: ReasoningParser,
-    model_deltas: List[str],
+    model_deltas: list[str],
    request: Union[ChatCompletionRequest, None] = None,
 ) -> StreamingReasoningReconstructor:
    request = request or ChatCompletionRequest(messages=[], model="test-model")
    reconstructor = StreamingReasoningReconstructor()
    previous_text = ""
-    previous_tokens: List[int] = []
+    previous_tokens: list[int] = []
    for delta in model_deltas:
        token_delta = [
            reasoning_parser.vocab.get(token)

--- a/tests/entrypoints/openai/test_audio.py
+++ b/tests/entrypoints/openai/test_audio.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import Dict, List
-
 import openai
 import pytest
 import pytest_asyncio
@@ -41,7 +39,7 @@ async def client(server):


 @pytest.fixture(scope="session")
-def base64_encoded_audio() -> Dict[str, str]:
+def base64_encoded_audio() -> dict[str, str]:
    return {
        audio_url: encode_audio_base64(*fetch_audio(audio_url))
        for audio_url in TEST_AUDIO_URLS
@@ -107,7 +105,7 @@ async def test_single_chat_session_audio(client: openai.AsyncOpenAI,
 @pytest.mark.parametrize("audio_url", TEST_AUDIO_URLS)
 async def test_single_chat_session_audio_base64encoded(
        client: openai.AsyncOpenAI, model_name: str, audio_url: str,
-        base64_encoded_audio: Dict[str, str]):
+        base64_encoded_audio: dict[str, str]):

    messages = [{
        "role":
@@ -165,7 +163,7 @@ async def test_single_chat_session_audio_base64encoded(
 @pytest.mark.parametrize("audio_url", TEST_AUDIO_URLS)
 async def test_single_chat_session_input_audio(
        client: openai.AsyncOpenAI, model_name: str, audio_url: str,
-        base64_encoded_audio: Dict[str, str]):
+        base64_encoded_audio: dict[str, str]):
    messages = [{
        "role":
        "user",
@@ -255,7 +253,7 @@ async def test_chat_streaming_audio(client: openai.AsyncOpenAI,
        temperature=0.0,
        stream=True,
    )
-    chunks: List[str] = []
+    chunks: list[str] = []
    finish_reason_count = 0
    async for chunk in stream:
        delta = chunk.choices[0].delta
@@ -277,7 +275,7 @@ async def test_chat_streaming_audio(client: openai.AsyncOpenAI,
 @pytest.mark.parametrize("audio_url", TEST_AUDIO_URLS)
 async def test_chat_streaming_input_audio(client: openai.AsyncOpenAI,
                                          model_name: str, audio_url: str,
-                                          base64_encoded_audio: Dict[str,
+                                          base64_encoded_audio: dict[str,
                                                                     str]):
    messages = [{
        "role":
@@ -315,7 +313,7 @@ async def test_chat_streaming_input_audio(client: openai.AsyncOpenAI,
        temperature=0.0,
        stream=True,
    )
-    chunks: List[str] = []
+    chunks: list[str] = []
    finish_reason_count = 0
    async for chunk in stream:
        delta = chunk.choices[0].delta
@@ -337,7 +335,7 @@ async def test_chat_streaming_input_audio(client: openai.AsyncOpenAI,
 @pytest.mark.parametrize("audio_url", TEST_AUDIO_URLS)
 async def test_multi_audio_input(client: openai.AsyncOpenAI, model_name: str,
                                 audio_url: str,
-                                 base64_encoded_audio: Dict[str, str]):
+                                 base64_encoded_audio: dict[str, str]):

    messages = [{
        "role":

--- a/tests/entrypoints/openai/test_basic.py
+++ b/tests/entrypoints/openai/test_basic.py
@@ -2,7 +2,6 @@

 import asyncio
 from http import HTTPStatus
-from typing import List

 import openai
 import pytest
@@ -17,7 +16,7 @@ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"


 @pytest.fixture(scope='module')
-def server_args(request: pytest.FixtureRequest) -> List[str]:
+def server_args(request: pytest.FixtureRequest) -> list[str]:
    """ Provide extra arguments to the server via indirect parametrization

    Usage:

--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -3,7 +3,7 @@
 # imports for guided decoding tests
 import json
 import re
-from typing import Dict, List, Optional
+from typing import Optional

 import jsonschema
 import openai  # use the official client for correctness check
@@ -190,7 +190,7 @@ async def test_too_many_chat_logprobs(client: openai.AsyncOpenAI,
 async def test_prompt_logprobs_chat(client: openai.AsyncOpenAI,
                                    model_name: str,
                                    prompt_logprobs: Optional[int]):
-    params: Dict = {
+    params: dict = {
        "messages": [{
            "role": "system",
            "content": "You are a helpful assistant."
@@ -232,7 +232,7 @@ async def test_prompt_logprobs_chat(client: openai.AsyncOpenAI,
 )
 async def test_more_than_one_prompt_logprobs_chat(client: openai.AsyncOpenAI,
                                                  model_name: str):
-    params: Dict = {
+    params: dict = {
        "messages": [{
            "role": "system",
            "content": "You are a helpful assistant."
@@ -343,7 +343,7 @@ async def test_chat_streaming(client: openai.AsyncOpenAI, model_name: str):
        temperature=0.0,
        stream=True,
    )
-    chunks: List[str] = []
+    chunks: list[str] = []
    finish_reason_count = 0
    async for chunk in stream:
        delta = chunk.choices[0].delta

--- a/tests/entrypoints/openai/test_completion.py
+++ b/tests/entrypoints/openai/test_completion.py
@@ -5,7 +5,7 @@ import json
 import re
 import shutil
 from tempfile import TemporaryDirectory
-from typing import Dict, List, Optional
+from typing import Optional

 import jsonschema
 import openai  # use the official client for correctness check
@@ -287,7 +287,7 @@ async def test_too_many_completion_logprobs(client: openai.AsyncOpenAI,
 async def test_prompt_logprobs_completion(client: openai.AsyncOpenAI,
                                          model_name: str,
                                          prompt_logprobs: Optional[int]):
-    params: Dict = {
+    params: dict = {
        "prompt": ["A robot may not injure another robot", "My name is"],
        "model": model_name,
    }
@@ -331,7 +331,7 @@ async def test_completion_streaming(client: openai.AsyncOpenAI,
                                             max_tokens=5,
                                             temperature=0.0,
                                             stream=True)
-    chunks: List[str] = []
+    chunks: list[str] = []
    finish_reason_count = 0
    async for chunk in stream:
        chunks.append(chunk.choices[0].text)
@@ -364,7 +364,7 @@ async def test_parallel_streaming(client: openai.AsyncOpenAI, model_name: str):
                                             max_tokens=max_tokens,
                                             n=n,
                                             stream=True)
-    chunks: List[List[str]] = [[] for i in range(n)]
+    chunks: list[list[str]] = [[] for i in range(n)]
    finish_reason_count = 0
    async for chunk in stream:
        index = chunk.choices[0].index

--- a/tests/entrypoints/openai/test_embedding.py
+++ b/tests/entrypoints/openai/test_embedding.py
@@ -86,7 +86,7 @@ async def test_single_embedding(client: openai.AsyncOpenAI, model_name: str):
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_batch_embedding(client: openai.AsyncOpenAI, model_name: str):
-    # test List[str]
+    # test list[str]
    input_texts = [
        "The cat sat on the mat.", "A feline was resting on a rug.",
        "Stars twinkle brightly in the night sky."
@@ -106,7 +106,7 @@ async def test_batch_embedding(client: openai.AsyncOpenAI, model_name: str):
    assert embeddings.usage.prompt_tokens == 33
    assert embeddings.usage.total_tokens == 33

-    # test List[List[int]]
+    # test list[list[int]]
    input_tokens = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24],
                    [25, 32, 64, 77]]
    embedding_response = await client.embeddings.create(

--- a/tests/entrypoints/openai/test_pooling.py
+++ b/tests/entrypoints/openai/test_pooling.py
@@ -84,7 +84,7 @@ async def test_single_pooling(server: RemoteOpenAIServer, model_name: str):
 @pytest.mark.asyncio
 @pytest.mark.parametrize("model_name", [MODEL_NAME])
 async def test_batch_pooling(server: RemoteOpenAIServer, model_name: str):
-    # test List[str]
+    # test list[str]
    input_texts = [
        "The cat sat on the mat.", "A feline was resting on a rug.",
        "Stars twinkle brightly in the night sky."
@@ -107,7 +107,7 @@ async def test_batch_pooling(server: RemoteOpenAIServer, model_name: str):
    assert poolings.usage.prompt_tokens == 25
    assert poolings.usage.total_tokens == 25

-    # test List[List[int]]
+    # test list[list[int]]
    input_tokens = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24],
                    [25, 32, 64, 77]]
    response = requests.post(

--- a/tests/entrypoints/openai/test_root_path.py
+++ b/tests/entrypoints/openai/test_root_path.py
@@ -2,7 +2,7 @@

 import contextlib
 import os
-from typing import Any, List, NamedTuple
+from typing import Any, NamedTuple

 import openai  # use the official client for correctness check
 import pytest
@@ -40,7 +40,7 @@ def server():

 class TestCase(NamedTuple):
    model_name: str
-    base_url: List[str]
+    base_url: list[str]
    api_key: str
    expected_error: Any


--- a/tests/entrypoints/openai/test_video.py
+++ b/tests/entrypoints/openai/test_video.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import Dict, List
-
 import openai
 import pytest
 import pytest_asyncio
@@ -49,7 +47,7 @@ async def client(server):


 @pytest.fixture(scope="session")
-def base64_encoded_video() -> Dict[str, str]:
+def base64_encoded_video() -> dict[str, str]:
    return {
        video_url: encode_video_base64(fetch_video(video_url))
        for video_url in TEST_VIDEO_URLS
@@ -151,7 +149,7 @@ async def test_single_chat_session_video_beamsearch(client: openai.AsyncOpenAI,
 @pytest.mark.parametrize("video_url", TEST_VIDEO_URLS)
 async def test_single_chat_session_video_base64encoded(
        client: openai.AsyncOpenAI, model_name: str, video_url: str,
-        base64_encoded_video: Dict[str, str]):
+        base64_encoded_video: dict[str, str]):

    messages = [{
        "role":
@@ -209,7 +207,7 @@ async def test_single_chat_session_video_base64encoded(
 @pytest.mark.parametrize("video_url", TEST_VIDEO_URLS)
 async def test_single_chat_session_video_base64encoded_beamsearch(
        client: openai.AsyncOpenAI, model_name: str, video_url: str,
-        base64_encoded_video: Dict[str, str]):
+        base64_encoded_video: dict[str, str]):

    messages = [{
        "role":
@@ -279,7 +277,7 @@ async def test_chat_streaming_video(client: openai.AsyncOpenAI,
        temperature=0.0,
        stream=True,
    )
-    chunks: List[str] = []
+    chunks: list[str] = []
    finish_reason_count = 0
    async for chunk in stream:
        delta = chunk.choices[0].delta
@@ -302,7 +300,7 @@ async def test_chat_streaming_video(client: openai.AsyncOpenAI,
    "video_urls",
    [TEST_VIDEO_URLS[:i] for i in range(2, len(TEST_VIDEO_URLS))])
 async def test_multi_video_input(client: openai.AsyncOpenAI, model_name: str,
-                                 video_urls: List[str]):
+                                 video_urls: list[str]):

    messages = [{
        "role":

--- a/tests/entrypoints/openai/test_vision.py
+++ b/tests/entrypoints/openai/test_vision.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import Dict, List
-
 import openai
 import pytest
 import pytest_asyncio
@@ -50,7 +48,7 @@ async def client(server):


 @pytest.fixture(scope="session")
-def base64_encoded_image() -> Dict[str, str]:
+def base64_encoded_image() -> dict[str, str]:
    return {
        image_url: encode_image_base64(fetch_image(image_url))
        for image_url in TEST_IMAGE_URLS
@@ -152,7 +150,7 @@ async def test_single_chat_session_image_beamsearch(client: openai.AsyncOpenAI,
 @pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
 async def test_single_chat_session_image_base64encoded(
        client: openai.AsyncOpenAI, model_name: str, image_url: str,
-        base64_encoded_image: Dict[str, str]):
+        base64_encoded_image: dict[str, str]):

    messages = [{
        "role":
@@ -210,7 +208,7 @@ async def test_single_chat_session_image_base64encoded(
 @pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
 async def test_single_chat_session_image_base64encoded_beamsearch(
        client: openai.AsyncOpenAI, model_name: str, image_url: str,
-        base64_encoded_image: Dict[str, str]):
+        base64_encoded_image: dict[str, str]):

    messages = [{
        "role":
@@ -280,7 +278,7 @@ async def test_chat_streaming_image(client: openai.AsyncOpenAI,
        temperature=0.0,
        stream=True,
    )
-    chunks: List[str] = []
+    chunks: list[str] = []
    finish_reason_count = 0
    async for chunk in stream:
        delta = chunk.choices[0].delta
@@ -303,7 +301,7 @@ async def test_chat_streaming_image(client: openai.AsyncOpenAI,
    "image_urls",
    [TEST_IMAGE_URLS[:i] for i in range(2, len(TEST_IMAGE_URLS))])
 async def test_multi_image_input(client: openai.AsyncOpenAI, model_name: str,
-                                 image_urls: List[str]):
+                                 image_urls: list[str]):

    messages = [{
        "role":

--- a/tests/entrypoints/openai/test_vision_embedding.py
+++ b/tests/entrypoints/openai/test_vision_embedding.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import Dict
-
 import pytest
 import requests

@@ -49,7 +47,7 @@ def server():


 @pytest.fixture(scope="session")
-def base64_encoded_image() -> Dict[str, str]:
+def base64_encoded_image() -> dict[str, str]:
    return {
        image_url: encode_image_base64(fetch_image(image_url))
        for image_url in TEST_IMAGE_URLS

--- a/tests/entrypoints/openai/tool_parsers/test_pythonic_tool_parser.py
+++ b/tests/entrypoints/openai/tool_parsers/test_pythonic_tool_parser.py
 # SPDX-License-Identifier: Apache-2.0

-from typing import List
 from unittest.mock import MagicMock

 import pytest
@@ -125,7 +124,7 @@ TEST_CASES = [
 @pytest.mark.parametrize("streaming, model_output, expected_tool_calls",
                         TEST_CASES)
 def test_tool_call(streaming: bool, model_output: str,
-                   expected_tool_calls: List[FunctionCall]):
+                   expected_tool_calls: list[FunctionCall]):
    mock_tokenizer = MagicMock()
    tool_parser: ToolParser = ToolParserManager.get_tool_parser("pythonic")(
        mock_tokenizer)