Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
dc7976dd
Unverified
Commit
dc7976dd
authored
Oct 09, 2025
by
Cyrus Leung
Committed by
GitHub
Oct 09, 2025
Browse files
[Misc] Upgrade more code to Python 3.10 (#26463)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
e4791438
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
16 additions
and
33 deletions
+16
-33
tests/entrypoints/openai/test_chat.py
tests/entrypoints/openai/test_chat.py
+1
-1
tests/entrypoints/test_context.py
tests/entrypoints/test_context.py
+3
-9
tests/utils_/test_utils.py
tests/utils_/test_utils.py
+1
-2
tests/v1/entrypoints/openai/test_completion.py
tests/v1/entrypoints/openai/test_completion.py
+2
-2
vllm/benchmarks/serve.py
vllm/benchmarks/serve.py
+6
-9
vllm/utils/__init__.py
vllm/utils/__init__.py
+2
-7
vllm/v1/serial_utils.py
vllm/v1/serial_utils.py
+1
-3
No files found.
tests/entrypoints/openai/test_chat.py
View file @
dc7976dd
...
...
@@ -369,7 +369,7 @@ async def test_chat_completion_stream_options(
assert
chunk
.
usage
is
None
else
:
assert
chunk
.
usage
is
None
final_chunk
=
await
stream
.
__anext__
(
)
final_chunk
=
await
anext
(
stream
)
assert
final_chunk
.
usage
is
not
None
assert
final_chunk
.
usage
.
prompt_tokens
>
0
assert
final_chunk
.
usage
.
completion_tokens
>
0
...
...
tests/entrypoints/test_context.py
View file @
dc7976dd
...
...
@@ -10,12 +10,6 @@ from vllm.entrypoints.context import HarmonyContext, StreamingHarmonyContext
from
vllm.outputs
import
CompletionOutput
,
RequestOutput
# Helper function for Python < 3.10 compatibility
async
def
async_next
(
async_iterator
):
"""Compatibility function equivalent to Python 3.10's anext()."""
return
await
async_iterator
.
__anext__
()
def
create_mock_request_output
(
prompt_token_ids
=
None
,
output_token_ids
=
None
,
...
...
@@ -129,7 +123,7 @@ async def test_multi_turn_token_counting():
)
# First turn - initial prompt and response
mock_output1
=
await
a
sync_
next
(
mock_generator
)
mock_output1
=
await
anext
(
mock_generator
)
context
.
append_output
(
mock_output1
)
# At this point, we should have 5 prompt tokens and 3 output tokens
...
...
@@ -138,7 +132,7 @@ async def test_multi_turn_token_counting():
assert
context
.
num_tool_output_tokens
==
0
# Second turn - after tool output
mock_output2
=
await
a
sync_
next
(
mock_generator
)
mock_output2
=
await
anext
(
mock_generator
)
context
.
append_output
(
mock_output2
)
# Current prompt tokens (15) - last_turn_input_tokens (5) -
# last_turn_output_tokens (3) = 7
...
...
@@ -150,7 +144,7 @@ async def test_multi_turn_token_counting():
assert
context
.
num_cached_tokens
==
5
# Third turn - final response
mock_output3
=
await
a
sync_
next
(
mock_generator
)
mock_output3
=
await
anext
(
mock_generator
)
context
.
append_output
(
mock_output3
)
# Additional tool output tokens from third turn:
# Current prompt (20) - last_turn_input_tokens (15) -
...
...
tests/utils_/test_utils.py
View file @
dc7976dd
...
...
@@ -75,8 +75,7 @@ async def test_merge_async_iterators():
for
iterator
in
iterators
:
try
:
# Can use anext() in python >= 3.10
await
asyncio
.
wait_for
(
iterator
.
__anext__
(),
1
)
await
asyncio
.
wait_for
(
anext
(
iterator
),
1
)
except
StopAsyncIteration
:
# All iterators should be cancelled and print this message.
print
(
"Iterator was cancelled normally"
)
...
...
tests/v1/entrypoints/openai/test_completion.py
View file @
dc7976dd
...
...
@@ -420,7 +420,7 @@ async def test_completion_stream_options(client: openai.AsyncOpenAI, model_name:
assert
chunk
.
usage
is
None
else
:
assert
chunk
.
usage
is
None
final_chunk
=
await
stream
.
__anext__
(
)
final_chunk
=
await
anext
(
stream
)
assert
final_chunk
.
usage
is
not
None
assert
final_chunk
.
usage
.
prompt_tokens
>
0
assert
final_chunk
.
usage
.
completion_tokens
>
0
...
...
@@ -450,7 +450,7 @@ async def test_completion_stream_options(client: openai.AsyncOpenAI, model_name:
chunk
.
usage
.
prompt_tokens
+
chunk
.
usage
.
completion_tokens
)
if
chunk
.
choices
[
0
].
finish_reason
is
not
None
:
final_chunk
=
await
stream
.
__anext__
(
)
final_chunk
=
await
anext
(
stream
)
assert
final_chunk
.
usage
is
not
None
assert
final_chunk
.
usage
.
prompt_tokens
>
0
assert
final_chunk
.
usage
.
completion_tokens
>
0
...
...
vllm/benchmarks/serve.py
View file @
dc7976dd
...
...
@@ -18,6 +18,7 @@ On the client side, run:
import
argparse
import
asyncio
import
contextlib
import
gc
import
importlib.util
import
json
...
...
@@ -605,17 +606,13 @@ async def benchmark(
pbar
=
None
if
disable_tqdm
else
tqdm
(
total
=
len
(
input_requests
))
# This can be used once the minimum Python version is 3.10 or higher,
# and it will simplify the code in limited_request_func.
#
semaphore = (asyncio.Semaphore(
max_concurrency
)
#
if max_concurrency
else contextlib.nullcontext()
)
semaphore
=
asyncio
.
Semaphore
(
max_concurrency
)
if
max_concurrency
else
None
semaphore
=
(
asyncio
.
Semaphore
(
max_concurrency
)
if
max_concurrency
else
contextlib
.
nullcontext
()
)
async
def
limited_request_func
(
request_func_input
,
session
,
pbar
):
if
semaphore
is
None
:
return
await
request_func
(
request_func_input
=
request_func_input
,
session
=
session
,
pbar
=
pbar
)
async
with
semaphore
:
return
await
request_func
(
request_func_input
=
request_func_input
,
session
=
session
,
pbar
=
pbar
...
...
vllm/utils/__init__.py
View file @
dc7976dd
...
...
@@ -469,11 +469,6 @@ def make_async(
return
_async_wrapper
def
_next_task
(
iterator
:
AsyncGenerator
[
T
,
None
],
loop
:
AbstractEventLoop
)
->
Task
:
# Can use anext() in python >= 3.10
return
loop
.
create_task
(
iterator
.
__anext__
())
# type: ignore[arg-type]
async
def
merge_async_iterators
(
*
iterators
:
AsyncGenerator
[
T
,
None
],
)
->
AsyncGenerator
[
tuple
[
int
,
T
],
None
]:
...
...
@@ -491,7 +486,7 @@ async def merge_async_iterators(
loop
=
asyncio
.
get_running_loop
()
awaits
=
{
_next_task
(
pair
[
1
],
loop
):
pair
for
pair
in
enumerate
(
iterators
)}
awaits
=
{
loop
.
create_task
(
anext
(
it
)):
(
i
,
it
)
for
i
,
it
in
enumerate
(
iterators
)}
try
:
while
awaits
:
done
,
_
=
await
asyncio
.
wait
(
awaits
.
keys
(),
return_when
=
FIRST_COMPLETED
)
...
...
@@ -500,7 +495,7 @@ async def merge_async_iterators(
try
:
item
=
await
d
i
,
it
=
pair
awaits
[
_next_task
(
it
,
loop
)]
=
pair
awaits
[
loop
.
create_task
(
anext
(
it
)
)]
=
pair
yield
i
,
item
except
StopAsyncIteration
:
pass
...
...
vllm/v1/serial_utils.py
View file @
dc7976dd
...
...
@@ -290,9 +290,7 @@ class MsgpackDecoder:
_log_insecure_serialization_warning
()
def
decode
(
self
,
bufs
:
Union
[
bytestr
,
Sequence
[
bytestr
]])
->
Any
:
if
isinstance
(
bufs
,
(
bytes
,
bytearray
,
memoryview
,
zmq
.
Frame
)):
# TODO - This check can become `isinstance(bufs, bytestr)`
# as of Python 3.10.
if
isinstance
(
bufs
,
bytestr
):
# type: ignore
return
self
.
decoder
.
decode
(
bufs
)
self
.
aux_buffers
=
bufs
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment