[Misc] Upgrade more code to Python 3.10 (#26463)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[Misc] Upgrade more code to Python 3.10 (#26463)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
dc7976dd · Cyrus Leung · GitHub · e4791438 · dc7976dd · dc7976dd
Unverified Commit dc7976dd authored Oct 09, 2025 by Cyrus Leung Committed by GitHub Oct 09, 2025
7 changed files
--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@@ -369,7 +369,7 @@ async def test_chat_completion_stream_options(
            assert chunk.usage is None
        else:
            assert chunk.usage is None
-            final_chunk = await stream.__anext__()
+            final_chunk = await anext(stream)
            assert final_chunk.usage is not None
            assert final_chunk.usage.prompt_tokens > 0
            assert final_chunk.usage.completion_tokens > 0

--- a/tests/entrypoints/test_context.py
+++ b/tests/entrypoints/test_context.py
@@ -10,12 +10,6 @@ from vllm.entrypoints.context import HarmonyContext, StreamingHarmonyContext
 from vllm.outputs import CompletionOutput, RequestOutput


-# Helper function for Python < 3.10 compatibility
-async def async_next(async_iterator):
-    """Compatibility function equivalent to Python 3.10's anext()."""
-    return await async_iterator.__anext__()
-
-
 def create_mock_request_output(
    prompt_token_ids=None,
    output_token_ids=None,
@@ -129,7 +123,7 @@ async def test_multi_turn_token_counting():
    )

    # First turn - initial prompt and response
-    mock_output1 = await async_next(mock_generator)
+    mock_output1 = await anext(mock_generator)
    context.append_output(mock_output1)

    # At this point, we should have 5 prompt tokens and 3 output tokens
@@ -138,7 +132,7 @@ async def test_multi_turn_token_counting():
    assert context.num_tool_output_tokens == 0

    # Second turn - after tool output
-    mock_output2 = await async_next(mock_generator)
+    mock_output2 = await anext(mock_generator)
    context.append_output(mock_output2)
    # Current prompt tokens (15) - last_turn_input_tokens (5) -
    # last_turn_output_tokens (3) = 7
@@ -150,7 +144,7 @@ async def test_multi_turn_token_counting():
    assert context.num_cached_tokens == 5

    # Third turn - final response
-    mock_output3 = await async_next(mock_generator)
+    mock_output3 = await anext(mock_generator)
    context.append_output(mock_output3)
    # Additional tool output tokens from third turn:
    # Current prompt (20) - last_turn_input_tokens (15) -

--- a/tests/utils_/test_utils.py
+++ b/tests/utils_/test_utils.py
@@ -75,8 +75,7 @@ async def test_merge_async_iterators():

    for iterator in iterators:
        try:
-            # Can use anext() in python >= 3.10
-            await asyncio.wait_for(iterator.__anext__(), 1)
+            await asyncio.wait_for(anext(iterator), 1)
        except StopAsyncIteration:
            # All iterators should be cancelled and print this message.
            print("Iterator was cancelled normally")

--- a/tests/v1/entrypoints/openai/test_completion.py
+++ b/tests/v1/entrypoints/openai/test_completion.py
@@ -420,7 +420,7 @@ async def test_completion_stream_options(client: openai.AsyncOpenAI, model_name:
            assert chunk.usage is None
        else:
            assert chunk.usage is None
-            final_chunk = await stream.__anext__()
+            final_chunk = await anext(stream)
            assert final_chunk.usage is not None
            assert final_chunk.usage.prompt_tokens > 0
            assert final_chunk.usage.completion_tokens > 0
@@ -450,7 +450,7 @@ async def test_completion_stream_options(client: openai.AsyncOpenAI, model_name:
            chunk.usage.prompt_tokens + chunk.usage.completion_tokens
        )
        if chunk.choices[0].finish_reason is not None:
-            final_chunk = await stream.__anext__()
+            final_chunk = await anext(stream)
            assert final_chunk.usage is not None
            assert final_chunk.usage.prompt_tokens > 0
            assert final_chunk.usage.completion_tokens > 0

--- a/vllm/benchmarks/serve.py
+++ b/vllm/benchmarks/serve.py
@@ -18,6 +18,7 @@ On the client side, run:

 import argparse
 import asyncio
+import contextlib
 import gc
 import importlib.util
 import json
@@ -605,17 +606,13 @@ async def benchmark(

    pbar = None if disable_tqdm else tqdm(total=len(input_requests))

-    # This can be used once the minimum Python version is 3.10 or higher,
-    # and it will simplify the code in limited_request_func.
-    #    semaphore = (asyncio.Semaphore(max_concurrency)
-    #                 if max_concurrency else contextlib.nullcontext())
-    semaphore = asyncio.Semaphore(max_concurrency) if max_concurrency else None
+    semaphore = (
+        asyncio.Semaphore(max_concurrency)
+        if max_concurrency
+        else contextlib.nullcontext()
+    )

    async def limited_request_func(request_func_input, session, pbar):
-        if semaphore is None:
-            return await request_func(
-                request_func_input=request_func_input, session=session, pbar=pbar
-            )
        async with semaphore:
            return await request_func(
                request_func_input=request_func_input, session=session, pbar=pbar

--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -469,11 +469,6 @@ def make_async(
    return _async_wrapper


-def _next_task(iterator: AsyncGenerator[T, None], loop: AbstractEventLoop) -> Task:
-    # Can use anext() in python >= 3.10
-    return loop.create_task(iterator.__anext__())  # type: ignore[arg-type]
-
-
 async def merge_async_iterators(
    *iterators: AsyncGenerator[T, None],
 ) -> AsyncGenerator[tuple[int, T], None]:
@@ -491,7 +486,7 @@ async def merge_async_iterators(

    loop = asyncio.get_running_loop()

-    awaits = {_next_task(pair[1], loop): pair for pair in enumerate(iterators)}
+    awaits = {loop.create_task(anext(it)): (i, it) for i, it in enumerate(iterators)}
    try:
        while awaits:
            done, _ = await asyncio.wait(awaits.keys(), return_when=FIRST_COMPLETED)
@@ -500,7 +495,7 @@ async def merge_async_iterators(
                try:
                    item = await d
                    i, it = pair
-                    awaits[_next_task(it, loop)] = pair
+                    awaits[loop.create_task(anext(it))] = pair
                    yield i, item
                except StopAsyncIteration:
                    pass

--- a/vllm/v1/serial_utils.py
+++ b/vllm/v1/serial_utils.py
@@ -290,9 +290,7 @@ class MsgpackDecoder:
            _log_insecure_serialization_warning()

    def decode(self, bufs: Union[bytestr, Sequence[bytestr]]) -> Any:
-        if isinstance(bufs, (bytes, bytearray, memoryview, zmq.Frame)):
-            # TODO - This check can become `isinstance(bufs, bytestr)`
-            # as of Python 3.10.
+        if isinstance(bufs, bytestr):  # type: ignore
            return self.decoder.decode(bufs)

        self.aux_buffers = bufs