Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
c5830381
Unverified
Commit
c5830381
authored
Aug 12, 2025
by
Woosuk Kwon
Committed by
GitHub
Aug 12, 2025
Browse files
[V0 Deprecation] Remove args for multi-step scheduling (#22779)
Signed-off-by:
Woosuk Kwon
<
woosuk@thinkingmachines.ai
>
parent
d31f97cf
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
1 addition
and
27 deletions
+1
-27
tests/utils_/test_utils.py
tests/utils_/test_utils.py
+0
-1
vllm/config/scheduler.py
vllm/config/scheduler.py
+1
-26
No files found.
tests/utils_/test_utils.py
View file @
c5830381
...
@@ -161,7 +161,6 @@ def parser_with_config():
...
@@ -161,7 +161,6 @@ def parser_with_config():
parser
.
add_argument
(
'--port'
,
type
=
int
)
parser
.
add_argument
(
'--port'
,
type
=
int
)
parser
.
add_argument
(
'--tensor-parallel-size'
,
type
=
int
)
parser
.
add_argument
(
'--tensor-parallel-size'
,
type
=
int
)
parser
.
add_argument
(
'--trust-remote-code'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--trust-remote-code'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--multi-step-stream-outputs'
,
action
=
StoreBoolean
)
return
parser
return
parser
...
...
vllm/config/scheduler.py
View file @
c5830381
...
@@ -115,12 +115,6 @@ class SchedulerConfig:
...
@@ -115,12 +115,6 @@ class SchedulerConfig:
(e.g., beam search), recomputation is not currently supported. In
(e.g., beam search), recomputation is not currently supported. In
such a case, we use swapping instead."""
such a case, we use swapping instead."""
num_scheduler_steps
:
int
=
1
"""Maximum number of forward steps per scheduler call."""
multi_step_stream_outputs
:
bool
=
True
"""If False, then multi-step will stream outputs at the end of all steps"""
send_delta_data
:
bool
=
False
send_delta_data
:
bool
=
False
"""Private API. If used, scheduler sends delta data to
"""Private API. If used, scheduler sends delta data to
workers instead of an entire data. It should be enabled only
workers instead of an entire data. It should be enabled only
...
@@ -193,16 +187,7 @@ class SchedulerConfig:
...
@@ -193,16 +187,7 @@ class SchedulerConfig:
if
self
.
max_num_batched_tokens
is
None
:
if
self
.
max_num_batched_tokens
is
None
:
if
self
.
enable_chunked_prefill
:
if
self
.
enable_chunked_prefill
:
if
self
.
num_scheduler_steps
>
1
:
self
.
max_num_batched_tokens
=
DEFAULT_MAX_NUM_BATCHED_TOKENS
# Multi-step Chunked-Prefill doesn't allow prompt-chunking
# for now. Have max_num_batched_tokens set to max_model_len
# so we don't reject sequences on account of a short
# max_num_batched_tokens.
self
.
max_num_batched_tokens
=
max
(
self
.
max_model_len
,
DEFAULT_MAX_NUM_BATCHED_TOKENS
)
else
:
self
.
max_num_batched_tokens
=
(
DEFAULT_MAX_NUM_BATCHED_TOKENS
)
else
:
else
:
# If max_model_len is too short, use
# If max_model_len is too short, use
# DEFAULT_MAX_NUM_BATCHED_TOKENS as the default value
# DEFAULT_MAX_NUM_BATCHED_TOKENS as the default value
...
@@ -293,12 +278,6 @@ class SchedulerConfig:
...
@@ -293,12 +278,6 @@ class SchedulerConfig:
f
"(
{
self
.
num_lookahead_slots
}
) must be greater than or "
f
"(
{
self
.
num_lookahead_slots
}
) must be greater than or "
"equal to 0."
)
"equal to 0."
)
if
self
.
num_scheduler_steps
<
1
:
raise
ValueError
(
"num_scheduler_steps "
f
"(
{
self
.
num_scheduler_steps
}
) must be greater than or "
"equal to 1."
)
if
self
.
max_num_partial_prefills
<
1
:
if
self
.
max_num_partial_prefills
<
1
:
raise
ValueError
(
raise
ValueError
(
f
"max_num_partial_prefills (
{
self
.
max_num_partial_prefills
}
) "
f
"max_num_partial_prefills (
{
self
.
max_num_partial_prefills
}
) "
...
@@ -323,7 +302,3 @@ class SchedulerConfig:
...
@@ -323,7 +302,3 @@ class SchedulerConfig:
f
"max_num_partial_prefills (
{
self
.
max_num_partial_prefills
}
)."
)
f
"max_num_partial_prefills (
{
self
.
max_num_partial_prefills
}
)."
)
return
self
return
self
@
property
def
is_multi_step
(
self
)
->
bool
:
return
self
.
num_scheduler_steps
>
1
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment