Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
28ae32a5
Unverified
Commit
28ae32a5
authored
Jan 09, 2026
by
Wentao Ye
Committed by
GitHub
Jan 09, 2026
Browse files
[Refactor] Remove numpy split in async scheduling (#32034)
Signed-off-by:
yewentao256
<
zhyanwentao@126.com
>
parent
f32c629e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
12 deletions
+6
-12
vllm/v1/engine/async_llm.py
vllm/v1/engine/async_llm.py
+6
-12
No files found.
vllm/v1/engine/async_llm.py
View file @
28ae32a5
...
@@ -9,7 +9,6 @@ from collections.abc import AsyncGenerator, Iterable, Mapping
...
@@ -9,7 +9,6 @@ from collections.abc import AsyncGenerator, Iterable, Mapping
from
copy
import
copy
from
copy
import
copy
from
typing
import
Any
,
cast
from
typing
import
Any
,
cast
import
numpy
as
np
import
torch
import
torch
import
vllm.envs
as
envs
import
vllm.envs
as
envs
...
@@ -32,7 +31,6 @@ from vllm.transformers_utils.config import maybe_register_config_serialize_by_va
...
@@ -32,7 +31,6 @@ from vllm.transformers_utils.config import maybe_register_config_serialize_by_va
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils.async_utils
import
cancel_task_threadsafe
from
vllm.utils.async_utils
import
cancel_task_threadsafe
from
vllm.utils.collection_utils
import
as_list
from
vllm.utils.collection_utils
import
as_list
from
vllm.utils.math_utils
import
cdiv
from
vllm.v1.engine
import
EngineCoreRequest
from
vllm.v1.engine
import
EngineCoreRequest
from
vllm.v1.engine.core_client
import
EngineCoreClient
from
vllm.v1.engine.core_client
import
EngineCoreClient
from
vllm.v1.engine.exceptions
import
EngineDeadError
,
EngineGenerateError
from
vllm.v1.engine.exceptions
import
EngineDeadError
,
EngineGenerateError
...
@@ -495,6 +493,7 @@ class AsyncLLM(EngineClient):
...
@@ -495,6 +493,7 @@ class AsyncLLM(EngineClient):
log_stats
=
self
.
log_stats
log_stats
=
self
.
log_stats
logger_manager
=
self
.
logger_manager
logger_manager
=
self
.
logger_manager
input_processor
=
self
.
input_processor
input_processor
=
self
.
input_processor
chunk_size
=
envs
.
VLLM_V1_OUTPUT_PROC_CHUNK_SIZE
async
def
output_handler
():
async
def
output_handler
():
try
:
try
:
...
@@ -510,15 +509,10 @@ class AsyncLLM(EngineClient):
...
@@ -510,15 +509,10 @@ class AsyncLLM(EngineClient):
# Split outputs into chunks of at most
# Split outputs into chunks of at most
# VLLM_V1_OUTPUT_PROC_CHUNK_SIZE, so that we don't block the
# VLLM_V1_OUTPUT_PROC_CHUNK_SIZE, so that we don't block the
# event loop for too long.
# event loop for too long.
if
num_outputs
<=
envs
.
VLLM_V1_OUTPUT_PROC_CHUNK_SIZE
:
engine_core_outputs
=
outputs
.
outputs
slices
=
(
outputs
.
outputs
,)
for
start
in
range
(
0
,
num_outputs
,
chunk_size
):
else
:
end
=
start
+
chunk_size
slices
=
np
.
array_split
(
outputs_slice
=
engine_core_outputs
[
start
:
end
]
outputs
.
outputs
,
cdiv
(
num_outputs
,
envs
.
VLLM_V1_OUTPUT_PROC_CHUNK_SIZE
),
)
for
i
,
outputs_slice
in
enumerate
(
slices
):
# 2) Process EngineCoreOutputs.
# 2) Process EngineCoreOutputs.
processed_outputs
=
output_processor
.
process_outputs
(
processed_outputs
=
output_processor
.
process_outputs
(
outputs_slice
,
outputs
.
timestamp
,
iteration_stats
outputs_slice
,
outputs
.
timestamp
,
iteration_stats
...
@@ -527,7 +521,7 @@ class AsyncLLM(EngineClient):
...
@@ -527,7 +521,7 @@ class AsyncLLM(EngineClient):
assert
not
processed_outputs
.
request_outputs
assert
not
processed_outputs
.
request_outputs
# Allow other asyncio tasks to run between chunks
# Allow other asyncio tasks to run between chunks
if
i
+
1
<
len
(
slices
)
:
if
end
<
num_outputs
:
await
asyncio
.
sleep
(
0
)
await
asyncio
.
sleep
(
0
)
# 3) Abort any reqs that finished due to stop strings.
# 3) Abort any reqs that finished due to stop strings.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment