Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
47532cd9
Unverified
Commit
47532cd9
authored
Mar 11, 2025
by
Joe Runde
Committed by
GitHub
Mar 12, 2025
Browse files
[core][V1] pluggable scheduler (#14466)
Signed-off-by:
Joe Runde
<
Joseph.Runde@ibm.com
>
parent
36e0c8f7
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
58 additions
and
11 deletions
+58
-11
tests/plugins_tests/test_scheduler_plugins.py
tests/plugins_tests/test_scheduler_plugins.py
+40
-9
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+5
-0
vllm/v1/engine/core.py
vllm/v1/engine/core.py
+13
-2
No files found.
tests/plugins_tests/test_scheduler_plugins.py
View file @
47532cd9
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
pytest
from
vllm.core.scheduler
import
Scheduler
from
vllm.core.scheduler
import
Scheduler
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.engine.llm_engine
import
LLMEngine
from
vllm.sampling_params
import
SamplingParams
from
vllm.v1.core.scheduler
import
Scheduler
as
V1Scheduler
from
vllm.v1.engine.llm_engine
import
LLMEngine
as
V1LLMEngine
class
DummyScheduler
(
Scheduler
):
class
Dummy
V0
Scheduler
(
Scheduler
):
def
schedule
(
self
):
def
schedule
(
self
):
raise
Exception
(
"Exception raised by DummyScheduler"
)
raise
Exception
(
"Exception raised by DummyV0Scheduler"
)
class
DummyV1Scheduler
(
V1Scheduler
):
def
test_scheduler_plugins
(
):
def
schedule
(
self
):
import
pytest
raise
Exception
(
"Exception raised by DummyV1Scheduler"
)
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.engine.llm_engine
import
LLMEngine
from
vllm.sampling_params
import
SamplingParams
def
test_scheduler_plugins_v0
(
monkeypatch
):
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"0"
)
with
pytest
.
raises
(
Exception
)
as
exception_info
:
with
pytest
.
raises
(
Exception
)
as
exception_info
:
engine_args
=
EngineArgs
(
engine_args
=
EngineArgs
(
model
=
"facebook/opt-125m"
,
model
=
"facebook/opt-125m"
,
enforce_eager
=
True
,
# reduce test time
enforce_eager
=
True
,
# reduce test time
scheduler_cls
=
DummyScheduler
,
scheduler_cls
=
Dummy
V0
Scheduler
,
)
)
engine
=
LLMEngine
.
from_engine_args
(
engine_args
=
engine_args
)
engine
=
LLMEngine
.
from_engine_args
(
engine_args
=
engine_args
)
...
@@ -30,4 +38,27 @@ def test_scheduler_plugins():
...
@@ -30,4 +38,27 @@ def test_scheduler_plugins():
engine
.
add_request
(
"0"
,
"foo"
,
sampling_params
)
engine
.
add_request
(
"0"
,
"foo"
,
sampling_params
)
engine
.
step
()
engine
.
step
()
assert
str
(
exception_info
.
value
)
==
"Exception raised by DummyScheduler"
assert
str
(
exception_info
.
value
)
==
"Exception raised by DummyV0Scheduler"
def
test_scheduler_plugins_v1
(
monkeypatch
):
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
# Explicitly turn off engine multiprocessing so that the scheduler runs in
# this process
monkeypatch
.
setenv
(
"VLLM_ENABLE_V1_MULTIPROCESSING"
,
"0"
)
with
pytest
.
raises
(
Exception
)
as
exception_info
:
engine_args
=
EngineArgs
(
model
=
"facebook/opt-125m"
,
enforce_eager
=
True
,
# reduce test time
scheduler_cls
=
DummyV1Scheduler
,
)
engine
=
V1LLMEngine
.
from_engine_args
(
engine_args
=
engine_args
)
sampling_params
=
SamplingParams
(
max_tokens
=
1
)
engine
.
add_request
(
"0"
,
"foo"
,
sampling_params
)
engine
.
step
()
assert
str
(
exception_info
.
value
)
==
"Exception raised by DummyV1Scheduler"
vllm/engine/arg_utils.py
View file @
47532cd9
...
@@ -1437,6 +1437,11 @@ class EngineArgs:
...
@@ -1437,6 +1437,11 @@ class EngineArgs:
# V1 always uses chunked prefills.
# V1 always uses chunked prefills.
self
.
enable_chunked_prefill
=
True
self
.
enable_chunked_prefill
=
True
# V1 should use the new scheduler by default.
# Swap it only if this arg is set to the original V0 default
if
self
.
scheduler_cls
==
EngineArgs
.
scheduler_cls
:
self
.
scheduler_cls
=
"vllm.v1.core.scheduler.Scheduler"
# When no user override, set the default values based on the usage
# When no user override, set the default values based on the usage
# context.
# context.
# Use different default values for different hardware.
# Use different default values for different hardware.
...
...
vllm/v1/engine/core.py
View file @
47532cd9
...
@@ -19,9 +19,10 @@ from vllm.logger import init_logger
...
@@ -19,9 +19,10 @@ from vllm.logger import init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.transformers_utils.config
import
(
from
vllm.transformers_utils.config
import
(
maybe_register_config_serialize_by_value
)
maybe_register_config_serialize_by_value
)
from
vllm.utils
import
get_exception_traceback
,
zmq_socket_ctx
from
vllm.utils
import
(
get_exception_traceback
,
resolve_obj_by_qualname
,
zmq_socket_ctx
)
from
vllm.v1.core.kv_cache_utils
import
get_kv_cache_configs
from
vllm.v1.core.kv_cache_utils
import
get_kv_cache_configs
from
vllm.v1.core.scheduler
import
Scheduler
,
SchedulerOutput
from
vllm.v1.core.scheduler
import
SchedulerOutput
from
vllm.v1.engine
import
(
EngineCoreOutputs
,
EngineCoreRequest
,
from
vllm.v1.engine
import
(
EngineCoreOutputs
,
EngineCoreRequest
,
EngineCoreRequestType
,
UtilityOutput
)
EngineCoreRequestType
,
UtilityOutput
)
from
vllm.v1.engine.mm_input_cache
import
MMInputCacheServer
from
vllm.v1.engine.mm_input_cache
import
MMInputCacheServer
...
@@ -65,6 +66,16 @@ class EngineCore:
...
@@ -65,6 +66,16 @@ class EngineCore:
self
.
structured_output_manager
=
StructuredOutputManager
(
vllm_config
)
self
.
structured_output_manager
=
StructuredOutputManager
(
vllm_config
)
# Setup scheduler.
# Setup scheduler.
if
isinstance
(
vllm_config
.
scheduler_config
.
scheduler_cls
,
str
):
logger
.
warning
(
"Using configured V1 scheduler class %s. "
"This scheduler interface is not public and "
"compatibility may not be maintained."
,
vllm_config
.
scheduler_config
.
scheduler_cls
)
Scheduler
=
resolve_obj_by_qualname
(
vllm_config
.
scheduler_config
.
scheduler_cls
)
else
:
Scheduler
=
vllm_config
.
scheduler_config
.
scheduler_cls
self
.
scheduler
=
Scheduler
(
self
.
scheduler
=
Scheduler
(
scheduler_config
=
vllm_config
.
scheduler_config
,
scheduler_config
=
vllm_config
.
scheduler_config
,
model_config
=
vllm_config
.
model_config
,
model_config
=
vllm_config
.
model_config
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment