Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
10771026
Unverified
Commit
10771026
authored
Feb 26, 2025
by
IAN
Committed by
GitHub
Feb 25, 2025
Browse files
[BugFix] Fix crash when receive a req with structed output in DP attention mode. (#3841)
parent
4606e2a3
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
33 additions
and
10 deletions
+33
-10
benchmark/json_decode_regex/bench_sglang.py
benchmark/json_decode_regex/bench_sglang.py
+1
-0
python/sglang/srt/managers/scheduler.py
python/sglang/srt/managers/scheduler.py
+32
-10
No files found.
benchmark/json_decode_regex/bench_sglang.py
View file @
10771026
...
@@ -46,6 +46,7 @@ def json_decode(s, document):
...
@@ -46,6 +46,7 @@ def json_decode(s, document):
def
main
(
args
):
def
main
(
args
):
lines
=
read_jsonl
(
args
.
data_path
)
lines
=
read_jsonl
(
args
.
data_path
)
lines
=
list
(
lines
)
arguments
=
[]
arguments
=
[]
for
i
in
range
(
len
(
lines
[:
args
.
num_questions
])):
for
i
in
range
(
len
(
lines
[:
args
.
num_questions
])):
arguments
.
append
(
arguments
.
append
(
...
...
python/sglang/srt/managers/scheduler.py
View file @
10771026
...
@@ -1154,6 +1154,10 @@ class Scheduler:
...
@@ -1154,6 +1154,10 @@ class Scheduler:
elif
batch
.
forward_mode
.
is_idle
():
elif
batch
.
forward_mode
.
is_idle
():
if
self
.
enable_overlap
:
if
self
.
enable_overlap
:
self
.
tp_worker
.
resolve_batch_result
(
result
.
bid
)
self
.
tp_worker
.
resolve_batch_result
(
result
.
bid
)
if
batch
.
next_batch_sampling_info
:
batch
.
next_batch_sampling_info
.
update_regex_vocab_mask
()
self
.
current_stream
.
synchronize
()
batch
.
next_batch_sampling_info
.
sampling_info_done
.
set
()
elif
batch
.
forward_mode
.
is_dummy_first
():
elif
batch
.
forward_mode
.
is_dummy_first
():
batch
.
next_batch_sampling_info
.
update_regex_vocab_mask
()
batch
.
next_batch_sampling_info
.
update_regex_vocab_mask
()
self
.
current_stream
.
synchronize
()
self
.
current_stream
.
synchronize
()
...
@@ -1630,16 +1634,34 @@ class Scheduler:
...
@@ -1630,16 +1634,34 @@ class Scheduler:
except
futures
.
_base
.
TimeoutError
:
except
futures
.
_base
.
TimeoutError
:
break
break
if
self
.
tp_size
>
1
:
if
self
.
server_args
.
enable_dp_attention
:
# Sync across TP ranks to make sure they have the same number of ready requests
if
self
.
attn_tp_size
>
1
:
tensor
=
torch
.
tensor
(
num_ready_reqs
,
dtype
=
torch
.
int32
)
# Sync across attn TP ranks to make sure they have the same number of ready requests
torch
.
distributed
.
all_reduce
(
tensor
=
torch
.
tensor
(
num_ready_reqs
,
dtype
=
torch
.
int32
)
tensor
,
op
=
torch
.
distributed
.
ReduceOp
.
MAX
,
group
=
self
.
tp_cpu_group
torch
.
distributed
.
all_reduce
(
)
tensor
,
num_ready_reqs_max
=
tensor
.
item
()
op
=
torch
.
distributed
.
ReduceOp
.
MAX
,
for
i
in
range
(
num_ready_reqs
,
num_ready_reqs_max
):
group
=
self
.
attn_tp_cpu_group
,
self
.
grammar_queue
[
i
].
grammar
=
self
.
grammar_queue
[
i
].
grammar
.
result
()
)
num_ready_reqs
=
num_ready_reqs_max
num_ready_reqs_max
=
tensor
.
item
()
for
i
in
range
(
num_ready_reqs
,
num_ready_reqs_max
):
self
.
grammar_queue
[
i
].
grammar
=
self
.
grammar_queue
[
i
].
grammar
.
result
()
num_ready_reqs
=
num_ready_reqs_max
else
:
if
self
.
tp_size
>
1
:
# Sync across TP ranks to make sure they have the same number of ready requests
tensor
=
torch
.
tensor
(
num_ready_reqs
,
dtype
=
torch
.
int32
)
torch
.
distributed
.
all_reduce
(
tensor
,
op
=
torch
.
distributed
.
ReduceOp
.
MAX
,
group
=
self
.
tp_cpu_group
)
num_ready_reqs_max
=
tensor
.
item
()
for
i
in
range
(
num_ready_reqs
,
num_ready_reqs_max
):
self
.
grammar_queue
[
i
].
grammar
=
self
.
grammar_queue
[
i
].
grammar
.
result
()
num_ready_reqs
=
num_ready_reqs_max
self
.
waiting_queue
.
extend
(
self
.
grammar_queue
[:
num_ready_reqs
])
self
.
waiting_queue
.
extend
(
self
.
grammar_queue
[:
num_ready_reqs
])
self
.
grammar_queue
=
self
.
grammar_queue
[
num_ready_reqs
:]
self
.
grammar_queue
=
self
.
grammar_queue
[
num_ready_reqs
:]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment