"server/text_generation_server/models/flash_gemma2.py" did not exist on "85aa7e2e7b02608eea04206b6cc0fa0ccced80ef"
Unverified Commit 43ad0590 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[Auto Sync] Update scheduler.py, server_args.py (20251020) (#11875)


Co-authored-by: default avatargithub-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: default avatarKan Wu <wukanustc@gmail.com>
parent b4948512
......@@ -306,6 +306,9 @@ class Scheduler(
self.dp_size = server_args.dp_size
self.schedule_policy = server_args.schedule_policy
self.enable_priority_scheduling = server_args.enable_priority_scheduling
self.abort_on_priority_when_disabled = (
server_args.abort_on_priority_when_disabled
)
self.schedule_low_priority_values_first = (
server_args.schedule_low_priority_values_first
)
......@@ -1560,7 +1563,11 @@ class Scheduler(
req.priority = sys.maxsize
else:
req.priority = -sys.maxsize - 1
elif not self.enable_priority_scheduling and req.priority is not None:
elif (
not self.enable_priority_scheduling
and req.priority is not None
and self.abort_on_priority_when_disabled
):
abort_req = AbortReq(
finished_reason={
"type": "abort",
......
......@@ -220,6 +220,7 @@ class ServerArgs:
max_prefill_tokens: int = 16384
schedule_policy: str = "fcfs"
enable_priority_scheduling: bool = False
abort_on_priority_when_disabled: bool = False
schedule_low_priority_values_first: bool = False
priority_scheduling_preemption_threshold: int = 10
schedule_conservativeness: float = 1.0
......@@ -1771,6 +1772,12 @@ class ServerArgs:
default=ServerArgs.enable_priority_scheduling,
help="Enable priority scheduling. Requests with higher priority integer values will be scheduled first by default.",
)
parser.add_argument(
"--abort-on-priority-when-disabled",
action="store_true",
default=ServerArgs.abort_on_priority_when_disabled,
help="If set, abort requests that specify a priority when priority scheduling is disabled.",
)
parser.add_argument(
"--schedule-low-priority-values-first",
action="store_true",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment