Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
cf470fea
"README_HIP.md" did not exist on "6a2a503e376028fcdc87b85c6e08a3daadc412a4"
Unverified
Commit
cf470fea
authored
Oct 20, 2024
by
Lianmin Zheng
Committed by
GitHub
Oct 20, 2024
Browse files
Make token mapping non-blocking in the overlapped mode (#1740)
parent
45d5af24
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
16 deletions
+7
-16
python/sglang/srt/managers/tp_worker_overlap_thread.py
python/sglang/srt/managers/tp_worker_overlap_thread.py
+6
-16
test/killall_sglang.sh
test/killall_sglang.sh
+1
-0
No files found.
python/sglang/srt/managers/tp_worker_overlap_thread.py
View file @
cf470fea
...
...
@@ -86,16 +86,15 @@ class TpModelWorkerClient:
@
torch
.
inference_mode
()
def
forward_thread_func_
(
self
):
while
True
:
tic1
=
time
.
time
()
model_worker_batch
,
future_token_ids_ct
=
self
.
input_queue
.
get
()
# Resolve future tokens in the input
tic2
=
time
.
time
()
resolved_input_ids
=
model_worker_batch
.
input_ids
future_mask
=
resolved_
input_ids
<
0
resolved_input_ids
[
future_mask
]
=
self
.
future_token_ids_map
[
-
resolved_input_ids
[
future_mask
]
]
input_ids
=
model_worker_batch
.
input_ids
input_ids
[:]
=
torch
.
where
(
input_ids
<
0
,
self
.
future_token_ids_map
[
torch
.
clamp
(
-
input_ids
,
min
=
0
)],
input_ids
,
)
# Run forward
logits_output
,
next_token_ids
=
self
.
worker
.
forward_batch_generation
(
...
...
@@ -119,15 +118,6 @@ class TpModelWorkerClient:
assert
logits_output
.
next_token_logprobs
is
None
,
"Not supported"
self
.
output_queue
.
put
((
None
,
next_token_ids
))
if
False
:
tic3
=
time
.
time
()
self
.
acc_time_with_waiting
+=
tic3
-
tic1
self
.
acc_time_without_waiting
+=
tic3
-
tic2
if
self
.
forward_queue
.
qsize
()
==
0
:
logger
.
info
(
f
"
{
self
.
acc_time_with_waiting
=
:.
3
f
}
,
{
self
.
acc_time_without_waiting
=
:.
3
f
}
,
{
self
.
forward_queue
.
qsize
()
=
}
"
)
def
resulve_batch_result
(
self
,
bid
:
int
):
logits_output
,
next_token_ids
=
self
.
output_queue
.
get
()
return
logits_output
,
next_token_ids
...
...
test/killall_sglang.sh
View file @
cf470fea
kill
-9
$(
ps aux |
grep
'multiprocessing.spawn'
|
grep
-v
'grep'
|
awk
'{print $2}'
)
kill
-9
$(
ps aux |
grep
'sglang.launch_server'
|
grep
-v
'grep'
|
awk
'{print $2}'
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment