Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
be76e5aa
"vscode:/vscode.git/clone" did not exist on "416f05929ac66f5ae364936b70087fc60cacee4b"
Unverified
Commit
be76e5aa
authored
Sep 30, 2024
by
Sebastian Schoennenbeck
Committed by
GitHub
Sep 30, 2024
Browse files
[Core] Make scheduling policy settable via EngineArgs (#8956)
parent
2ae25f79
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
2 deletions
+14
-2
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+14
-2
No files found.
vllm/engine/arg_utils.py
View file @
be76e5aa
...
...
@@ -2,8 +2,8 @@ import argparse
import
dataclasses
import
json
from
dataclasses
import
dataclass
from
typing
import
(
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Mapping
,
Optional
,
Tuple
,
Type
,
Union
)
from
typing
import
(
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Literal
,
Mapping
,
Optional
,
Tuple
,
Type
,
Union
)
import
torch
...
...
@@ -177,6 +177,7 @@ class EngineArgs:
disable_async_output_proc
:
bool
=
False
override_neuron_config
:
Optional
[
Dict
[
str
,
Any
]]
=
None
mm_processor_kwargs
:
Optional
[
Dict
[
str
,
Any
]]
=
None
scheduling_policy
:
Literal
[
"fcfs"
,
"priority"
]
=
"fcfs"
def
__post_init__
(
self
):
if
self
.
tokenizer
is
None
:
...
...
@@ -797,6 +798,16 @@ class EngineArgs:
default
=
None
,
help
=
"override or set neuron device configuration."
)
parser
.
add_argument
(
'--scheduling-policy'
,
choices
=
[
'fcfs'
,
'priority'
],
default
=
"fcfs"
,
help
=
'The scheduling policy to use. "fcfs" (first come first served'
', i.e. requests are handled in order of arrival; default) '
'or "priority" (requests are handled based on given '
'priority (lower value means earlier handling) and time of '
'arrival deciding any ties).'
)
return
parser
@
classmethod
...
...
@@ -1011,6 +1022,7 @@ class EngineArgs:
multi_step_stream_outputs
=
self
.
multi_step_stream_outputs
,
send_delta_data
=
(
envs
.
VLLM_USE_RAY_SPMD_WORKER
and
parallel_config
.
use_ray
),
policy
=
self
.
scheduling_policy
,
)
lora_config
=
LoRAConfig
(
max_lora_rank
=
self
.
max_lora_rank
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment