Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
efbc116a
"examples/vscode:/vscode.git/clone" did not exist on "993cb9fad66da63e9abd3a69bc857e5e274457ab"
Unverified
Commit
efbc116a
authored
Nov 03, 2024
by
Lianmin Zheng
Committed by
GitHub
Nov 03, 2024
Browse files
Do not use longest prefix matching when #queue-req is large (#1896)
parent
6aed0445
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
13 additions
and
7 deletions
+13
-7
python/sglang/srt/managers/schedule_policy.py
python/sglang/srt/managers/schedule_policy.py
+13
-7
No files found.
python/sglang/srt/managers/schedule_policy.py
View file @
efbc116a
...
...
@@ -45,9 +45,15 @@ class SchedulePolicy:
self
.
tree_cache
=
tree_cache
def
calc_priority
(
self
,
waiting_queue
:
List
[
Req
]):
if
len
(
waiting_queue
)
>
128
and
self
.
policy
==
"lpm"
:
# Turn off the expensive prefix matching and sorting when the #queue is large.
policy
=
"fcfs"
else
:
policy
=
self
.
policy
# Compute matched prefix length
prefix_computed
=
False
if
self
.
policy
==
"lpm"
or
self
.
policy
==
"dfs-weight"
:
if
policy
==
"lpm"
or
policy
==
"dfs-weight"
:
for
r
in
waiting_queue
:
# NOTE: the prefix_indices must always be aligned with last_node
r
.
prefix_indices
,
r
.
last_node
=
self
.
tree_cache
.
match_prefix
(
...
...
@@ -56,18 +62,18 @@ class SchedulePolicy:
prefix_computed
=
True
if
self
.
policy
==
"lpm"
:
if
policy
==
"lpm"
:
# Longest Prefix Match
waiting_queue
.
sort
(
key
=
lambda
x
:
-
len
(
x
.
prefix_indices
))
elif
self
.
policy
==
"fcfs"
:
elif
policy
==
"fcfs"
:
# first come first serve
pass
elif
self
.
policy
==
"lof"
:
elif
policy
==
"lof"
:
# longest output first
waiting_queue
.
sort
(
key
=
lambda
x
:
-
x
.
sampling_params
.
max_new_tokens
)
elif
self
.
policy
==
"random"
:
elif
policy
==
"random"
:
random
.
shuffle
(
waiting_queue
)
elif
self
.
policy
==
"dfs-weight"
:
elif
policy
==
"dfs-weight"
:
last_node_to_reqs
=
defaultdict
(
list
)
for
req
in
waiting_queue
:
last_node_to_reqs
[
req
.
last_node
].
append
(
req
)
...
...
@@ -85,7 +91,7 @@ class SchedulePolicy:
waiting_queue
,
)
else
:
raise
ValueError
(
f
"Unknown schedule_policy:
{
self
.
policy
}
"
)
raise
ValueError
(
f
"Unknown schedule_policy:
{
policy
=
}
"
)
return
prefix_computed
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment