Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
7620cd37
"vscode:/vscode.git/clone" did not exist on "7c9402d0d2a193edf6e0d376db7a7211a833d974"
Unverified
Commit
7620cd37
authored
Jul 19, 2024
by
Liangsheng Yin
Committed by
GitHub
Jul 19, 2024
Browse files
Fix jump forward when streaming (#665)
parent
50a53887
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
12 additions
and
2 deletions
+12
-2
python/sglang/srt/managers/controller/infer_batch.py
python/sglang/srt/managers/controller/infer_batch.py
+4
-0
python/sglang/srt/managers/controller/tp_worker.py
python/sglang/srt/managers/controller/tp_worker.py
+3
-0
python/sglang/srt/managers/detokenizer_manager.py
python/sglang/srt/managers/detokenizer_manager.py
+4
-2
python/sglang/srt/managers/io_struct.py
python/sglang/srt/managers/io_struct.py
+1
-0
No files found.
python/sglang/srt/managers/controller/infer_batch.py
View file @
7620cd37
...
...
@@ -90,6 +90,7 @@ class Req:
# 1: surr_offset
# 2: read_offset
# 3: last token
self
.
vid
=
0
# version id to sync decode status with in detokenizer_manager
self
.
decoded_text
=
""
self
.
surr_offset
=
None
# Surrounding offset to defeat the cleanup algorithm
self
.
read_offset
=
None
...
...
@@ -520,6 +521,9 @@ class Batch:
req
.
output_ids
=
cur_output_ids
continue
# The decode status has diverged from detokenizer_manager
req
.
vid
+=
1
# insert the old request into tree_cache
if
req_pool_indices_cpu
is
None
:
req_pool_indices_cpu
=
self
.
req_pool_indices
.
tolist
()
...
...
python/sglang/srt/managers/controller/tp_worker.py
View file @
7620cd37
...
...
@@ -589,6 +589,7 @@ class ModelTpServer:
def
handle_finished_requests
(
self
,
batch
:
Batch
):
output_rids
=
[]
output_vids
=
[]
decoded_texts
=
[]
output_read_ids
=
[]
output_read_offsets
=
[]
...
...
@@ -614,6 +615,7 @@ class ModelTpServer:
)
):
output_rids
.
append
(
req
.
rid
)
output_vids
.
append
(
req
.
vid
)
decoded_texts
.
append
(
req
.
decoded_text
)
read_ids
,
read_offset
=
req
.
init_incremental_detokenize
()
output_read_ids
.
append
(
read_ids
)
...
...
@@ -653,6 +655,7 @@ class ModelTpServer:
self
.
out_pyobjs
.
append
(
BatchTokenIDOut
(
output_rids
,
output_vids
,
decoded_texts
,
output_read_ids
,
output_read_offsets
,
...
...
python/sglang/srt/managers/detokenizer_manager.py
View file @
7620cd37
...
...
@@ -20,6 +20,7 @@ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
@
dataclasses
.
dataclass
class
DecodeStatus
:
vid
:
int
decoded_text
:
str
decode_ids
:
List
[
int
]
surr_offset
:
int
...
...
@@ -53,13 +54,14 @@ class DetokenizerManager:
assert
isinstance
(
recv_obj
,
BatchTokenIDOut
)
bs
=
len
(
recv_obj
.
rids
)
# FIXME: incremental detokenize is not compatible with jump forward
# Initialize decode status
read_ids
,
surr_ids
=
[],
[]
for
i
in
range
(
bs
):
rid
=
recv_obj
.
rids
[
i
]
if
rid
not
in
self
.
decode_status
:
vid
=
recv_obj
.
vids
[
i
]
if
rid
not
in
self
.
decode_status
or
self
.
decode_status
[
rid
].
vid
!=
vid
:
s
=
DecodeStatus
(
vid
=
vid
,
decoded_text
=
recv_obj
.
decoded_texts
[
i
],
decode_ids
=
recv_obj
.
decode_ids
[
i
],
surr_offset
=
0
,
...
...
python/sglang/srt/managers/io_struct.py
View file @
7620cd37
...
...
@@ -111,6 +111,7 @@ class TokenizedGenerateReqInput:
@
dataclass
class
BatchTokenIDOut
:
rids
:
List
[
str
]
vids
:
List
[
int
]
decoded_texts
:
List
[
str
]
decode_ids
:
List
[
int
]
read_offsets
:
List
[
int
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment