Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
7620cd37
Unverified
Commit
7620cd37
authored
Jul 19, 2024
by
Liangsheng Yin
Committed by
GitHub
Jul 19, 2024
Browse files
Fix jump forward when streaming (#665)
parent
50a53887
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
12 additions
and
2 deletions
+12
-2
python/sglang/srt/managers/controller/infer_batch.py
python/sglang/srt/managers/controller/infer_batch.py
+4
-0
python/sglang/srt/managers/controller/tp_worker.py
python/sglang/srt/managers/controller/tp_worker.py
+3
-0
python/sglang/srt/managers/detokenizer_manager.py
python/sglang/srt/managers/detokenizer_manager.py
+4
-2
python/sglang/srt/managers/io_struct.py
python/sglang/srt/managers/io_struct.py
+1
-0
No files found.
python/sglang/srt/managers/controller/infer_batch.py
View file @
7620cd37
...
@@ -90,6 +90,7 @@ class Req:
...
@@ -90,6 +90,7 @@ class Req:
# 1: surr_offset
# 1: surr_offset
# 2: read_offset
# 2: read_offset
# 3: last token
# 3: last token
self
.
vid
=
0
# version id to sync decode status with in detokenizer_manager
self
.
decoded_text
=
""
self
.
decoded_text
=
""
self
.
surr_offset
=
None
# Surrounding offset to defeat the cleanup algorithm
self
.
surr_offset
=
None
# Surrounding offset to defeat the cleanup algorithm
self
.
read_offset
=
None
self
.
read_offset
=
None
...
@@ -520,6 +521,9 @@ class Batch:
...
@@ -520,6 +521,9 @@ class Batch:
req
.
output_ids
=
cur_output_ids
req
.
output_ids
=
cur_output_ids
continue
continue
# The decode status has diverged from detokenizer_manager
req
.
vid
+=
1
# insert the old request into tree_cache
# insert the old request into tree_cache
if
req_pool_indices_cpu
is
None
:
if
req_pool_indices_cpu
is
None
:
req_pool_indices_cpu
=
self
.
req_pool_indices
.
tolist
()
req_pool_indices_cpu
=
self
.
req_pool_indices
.
tolist
()
...
...
python/sglang/srt/managers/controller/tp_worker.py
View file @
7620cd37
...
@@ -589,6 +589,7 @@ class ModelTpServer:
...
@@ -589,6 +589,7 @@ class ModelTpServer:
def
handle_finished_requests
(
self
,
batch
:
Batch
):
def
handle_finished_requests
(
self
,
batch
:
Batch
):
output_rids
=
[]
output_rids
=
[]
output_vids
=
[]
decoded_texts
=
[]
decoded_texts
=
[]
output_read_ids
=
[]
output_read_ids
=
[]
output_read_offsets
=
[]
output_read_offsets
=
[]
...
@@ -614,6 +615,7 @@ class ModelTpServer:
...
@@ -614,6 +615,7 @@ class ModelTpServer:
)
)
):
):
output_rids
.
append
(
req
.
rid
)
output_rids
.
append
(
req
.
rid
)
output_vids
.
append
(
req
.
vid
)
decoded_texts
.
append
(
req
.
decoded_text
)
decoded_texts
.
append
(
req
.
decoded_text
)
read_ids
,
read_offset
=
req
.
init_incremental_detokenize
()
read_ids
,
read_offset
=
req
.
init_incremental_detokenize
()
output_read_ids
.
append
(
read_ids
)
output_read_ids
.
append
(
read_ids
)
...
@@ -653,6 +655,7 @@ class ModelTpServer:
...
@@ -653,6 +655,7 @@ class ModelTpServer:
self
.
out_pyobjs
.
append
(
self
.
out_pyobjs
.
append
(
BatchTokenIDOut
(
BatchTokenIDOut
(
output_rids
,
output_rids
,
output_vids
,
decoded_texts
,
decoded_texts
,
output_read_ids
,
output_read_ids
,
output_read_offsets
,
output_read_offsets
,
...
...
python/sglang/srt/managers/detokenizer_manager.py
View file @
7620cd37
...
@@ -20,6 +20,7 @@ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
...
@@ -20,6 +20,7 @@ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
@
dataclasses
.
dataclass
@
dataclasses
.
dataclass
class
DecodeStatus
:
class
DecodeStatus
:
vid
:
int
decoded_text
:
str
decoded_text
:
str
decode_ids
:
List
[
int
]
decode_ids
:
List
[
int
]
surr_offset
:
int
surr_offset
:
int
...
@@ -53,13 +54,14 @@ class DetokenizerManager:
...
@@ -53,13 +54,14 @@ class DetokenizerManager:
assert
isinstance
(
recv_obj
,
BatchTokenIDOut
)
assert
isinstance
(
recv_obj
,
BatchTokenIDOut
)
bs
=
len
(
recv_obj
.
rids
)
bs
=
len
(
recv_obj
.
rids
)
# FIXME: incremental detokenize is not compatible with jump forward
# Initialize decode status
# Initialize decode status
read_ids
,
surr_ids
=
[],
[]
read_ids
,
surr_ids
=
[],
[]
for
i
in
range
(
bs
):
for
i
in
range
(
bs
):
rid
=
recv_obj
.
rids
[
i
]
rid
=
recv_obj
.
rids
[
i
]
if
rid
not
in
self
.
decode_status
:
vid
=
recv_obj
.
vids
[
i
]
if
rid
not
in
self
.
decode_status
or
self
.
decode_status
[
rid
].
vid
!=
vid
:
s
=
DecodeStatus
(
s
=
DecodeStatus
(
vid
=
vid
,
decoded_text
=
recv_obj
.
decoded_texts
[
i
],
decoded_text
=
recv_obj
.
decoded_texts
[
i
],
decode_ids
=
recv_obj
.
decode_ids
[
i
],
decode_ids
=
recv_obj
.
decode_ids
[
i
],
surr_offset
=
0
,
surr_offset
=
0
,
...
...
python/sglang/srt/managers/io_struct.py
View file @
7620cd37
...
@@ -111,6 +111,7 @@ class TokenizedGenerateReqInput:
...
@@ -111,6 +111,7 @@ class TokenizedGenerateReqInput:
@
dataclass
@
dataclass
class
BatchTokenIDOut
:
class
BatchTokenIDOut
:
rids
:
List
[
str
]
rids
:
List
[
str
]
vids
:
List
[
int
]
decoded_texts
:
List
[
str
]
decoded_texts
:
List
[
str
]
decode_ids
:
List
[
int
]
decode_ids
:
List
[
int
]
read_offsets
:
List
[
int
]
read_offsets
:
List
[
int
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment