Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c5830381
Unverified
Commit
c5830381
authored
Aug 12, 2025
by
Woosuk Kwon
Committed by
GitHub
Aug 12, 2025
Browse files
[V0 Deprecation] Remove args for multi-step scheduling (#22779)
Signed-off-by:
Woosuk Kwon
<
woosuk@thinkingmachines.ai
>
parent
d31f97cf
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
1 addition
and
27 deletions
+1
-27
tests/utils_/test_utils.py
tests/utils_/test_utils.py
+0
-1
vllm/config/scheduler.py
vllm/config/scheduler.py
+1
-26
No files found.
tests/utils_/test_utils.py
View file @
c5830381
...
...
@@ -161,7 +161,6 @@ def parser_with_config():
parser
.
add_argument
(
'--port'
,
type
=
int
)
parser
.
add_argument
(
'--tensor-parallel-size'
,
type
=
int
)
parser
.
add_argument
(
'--trust-remote-code'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--multi-step-stream-outputs'
,
action
=
StoreBoolean
)
return
parser
...
...
vllm/config/scheduler.py
View file @
c5830381
...
...
@@ -115,12 +115,6 @@ class SchedulerConfig:
(e.g., beam search), recomputation is not currently supported. In
such a case, we use swapping instead."""
num_scheduler_steps
:
int
=
1
"""Maximum number of forward steps per scheduler call."""
multi_step_stream_outputs
:
bool
=
True
"""If False, then multi-step will stream outputs at the end of all steps"""
send_delta_data
:
bool
=
False
"""Private API. If used, scheduler sends delta data to
workers instead of an entire data. It should be enabled only
...
...
@@ -193,16 +187,7 @@ class SchedulerConfig:
if
self
.
max_num_batched_tokens
is
None
:
if
self
.
enable_chunked_prefill
:
if
self
.
num_scheduler_steps
>
1
:
# Multi-step Chunked-Prefill doesn't allow prompt-chunking
# for now. Have max_num_batched_tokens set to max_model_len
# so we don't reject sequences on account of a short
# max_num_batched_tokens.
self
.
max_num_batched_tokens
=
max
(
self
.
max_model_len
,
DEFAULT_MAX_NUM_BATCHED_TOKENS
)
else
:
self
.
max_num_batched_tokens
=
(
DEFAULT_MAX_NUM_BATCHED_TOKENS
)
self
.
max_num_batched_tokens
=
DEFAULT_MAX_NUM_BATCHED_TOKENS
else
:
# If max_model_len is too short, use
# DEFAULT_MAX_NUM_BATCHED_TOKENS as the default value
...
...
@@ -293,12 +278,6 @@ class SchedulerConfig:
f
"(
{
self
.
num_lookahead_slots
}
) must be greater than or "
"equal to 0."
)
if
self
.
num_scheduler_steps
<
1
:
raise
ValueError
(
"num_scheduler_steps "
f
"(
{
self
.
num_scheduler_steps
}
) must be greater than or "
"equal to 1."
)
if
self
.
max_num_partial_prefills
<
1
:
raise
ValueError
(
f
"max_num_partial_prefills (
{
self
.
max_num_partial_prefills
}
) "
...
...
@@ -323,7 +302,3 @@ class SchedulerConfig:
f
"max_num_partial_prefills (
{
self
.
max_num_partial_prefills
}
)."
)
return
self
@
property
def
is_multi_step
(
self
)
->
bool
:
return
self
.
num_scheduler_steps
>
1
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment