Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0877f1e7
Unverified
Commit
0877f1e7
authored
Jul 07, 2024
by
Liangsheng Yin
Committed by
GitHub
Jul 07, 2024
Browse files
Fix streaming (#600)
parent
5304b4ef
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
5 additions
and
15 deletions
+5
-15
python/sglang/backend/runtime_endpoint.py
python/sglang/backend/runtime_endpoint.py
+0
-5
python/sglang/srt/managers/detokenizer_manager.py
python/sglang/srt/managers/detokenizer_manager.py
+3
-6
python/sglang/srt/managers/io_struct.py
python/sglang/srt/managers/io_struct.py
+0
-1
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+0
-1
python/sglang/srt/openai_api_adapter.py
python/sglang/srt/openai_api_adapter.py
+2
-2
No files found.
python/sglang/backend/runtime_endpoint.py
View file @
0877f1e7
...
@@ -180,7 +180,6 @@ class RuntimeEndpoint(BaseBackend):
...
@@ -180,7 +180,6 @@ class RuntimeEndpoint(BaseBackend):
self
.
_assert_success
(
res
)
self
.
_assert_success
(
res
)
pos
=
0
pos
=
0
incomplete_text
=
""
for
chunk
in
res
.
iter_lines
(
decode_unicode
=
False
):
for
chunk
in
res
.
iter_lines
(
decode_unicode
=
False
):
chunk
=
chunk
.
decode
(
"utf-8"
)
chunk
=
chunk
.
decode
(
"utf-8"
)
if
chunk
and
chunk
.
startswith
(
"data:"
):
if
chunk
and
chunk
.
startswith
(
"data:"
):
...
@@ -188,14 +187,10 @@ class RuntimeEndpoint(BaseBackend):
...
@@ -188,14 +187,10 @@ class RuntimeEndpoint(BaseBackend):
break
break
data
=
json
.
loads
(
chunk
[
5
:].
strip
(
"
\n
"
))
data
=
json
.
loads
(
chunk
[
5
:].
strip
(
"
\n
"
))
chunk_text
=
data
[
"text"
][
pos
:]
chunk_text
=
data
[
"text"
][
pos
:]
incomplete_text
=
data
[
"incomplete_text"
]
meta_info
=
data
[
"meta_info"
]
meta_info
=
data
[
"meta_info"
]
pos
+=
len
(
chunk_text
)
pos
+=
len
(
chunk_text
)
yield
chunk_text
,
meta_info
yield
chunk_text
,
meta_info
if
len
(
incomplete_text
)
>
0
:
yield
incomplete_text
,
meta_info
def
select
(
def
select
(
self
,
self
,
s
:
StreamExecutor
,
s
:
StreamExecutor
,
...
...
python/sglang/srt/managers/detokenizer_manager.py
View file @
0877f1e7
...
@@ -55,13 +55,11 @@ class DetokenizerManager:
...
@@ -55,13 +55,11 @@ class DetokenizerManager:
# Trim stop str
# Trim stop str
# TODO(lmzheng): handle the case where multiple stop strs are hit
# TODO(lmzheng): handle the case where multiple stop strs are hit
output_strs
=
[]
output_strs
=
[]
incomplete_strs
=
[]
for
i
in
range
(
len
(
recv_obj
.
rids
)):
for
i
in
range
(
len
(
recv_obj
.
rids
)):
new_text
=
read_texts
[
i
][
len
(
surr_texts
[
i
])
:]
new_text
=
read_texts
[
i
][
len
(
surr_texts
[
i
])
:]
complete_new_text
=
find_printable_text
(
new_text
)
if
recv_obj
.
finished_reason
[
i
]
is
None
:
incomplete_new_text
=
new_text
[
len
(
complete_new_text
)
:]
new_text
=
find_printable_text
(
new_text
)
output_strs
.
append
(
recv_obj
.
decoded_texts
[
i
]
+
complete_new_text
)
output_strs
.
append
(
recv_obj
.
decoded_texts
[
i
]
+
new_text
)
incomplete_strs
.
append
(
incomplete_new_text
)
if
isinstance
(
recv_obj
.
finished_reason
[
i
],
FINISH_MATCHED_STR
):
if
isinstance
(
recv_obj
.
finished_reason
[
i
],
FINISH_MATCHED_STR
):
pos
=
output_strs
[
i
].
find
(
recv_obj
.
finished_reason
[
i
].
matched
)
pos
=
output_strs
[
i
].
find
(
recv_obj
.
finished_reason
[
i
].
matched
)
...
@@ -72,7 +70,6 @@ class DetokenizerManager:
...
@@ -72,7 +70,6 @@ class DetokenizerManager:
BatchStrOut
(
BatchStrOut
(
rids
=
recv_obj
.
rids
,
rids
=
recv_obj
.
rids
,
output_strs
=
output_strs
,
output_strs
=
output_strs
,
incomplete_strs
=
incomplete_strs
,
meta_info
=
recv_obj
.
meta_info
,
meta_info
=
recv_obj
.
meta_info
,
finished_reason
=
recv_obj
.
finished_reason
,
finished_reason
=
recv_obj
.
finished_reason
,
)
)
...
...
python/sglang/srt/managers/io_struct.py
View file @
0877f1e7
...
@@ -123,7 +123,6 @@ class BatchTokenIDOut:
...
@@ -123,7 +123,6 @@ class BatchTokenIDOut:
class
BatchStrOut
:
class
BatchStrOut
:
rids
:
List
[
str
]
rids
:
List
[
str
]
output_strs
:
List
[
str
]
output_strs
:
List
[
str
]
incomplete_strs
:
List
[
str
]
meta_info
:
List
[
Dict
]
meta_info
:
List
[
Dict
]
finished_reason
:
List
[
BaseFinishReason
]
finished_reason
:
List
[
BaseFinishReason
]
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
0877f1e7
...
@@ -317,7 +317,6 @@ class TokenizerManager:
...
@@ -317,7 +317,6 @@ class TokenizerManager:
recv_obj
.
meta_info
[
i
][
"id"
]
=
rid
recv_obj
.
meta_info
[
i
][
"id"
]
=
rid
out_dict
=
{
out_dict
=
{
"text"
:
recv_obj
.
output_strs
[
i
],
"text"
:
recv_obj
.
output_strs
[
i
],
"incomplete_text"
:
recv_obj
.
incomplete_strs
[
i
],
"meta_info"
:
recv_obj
.
meta_info
[
i
],
"meta_info"
:
recv_obj
.
meta_info
[
i
],
}
}
state
.
out_list
.
append
(
out_dict
)
state
.
out_list
.
append
(
out_dict
)
...
...
python/sglang/srt/openai_api_adapter.py
View file @
0877f1e7
...
@@ -164,7 +164,7 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
...
@@ -164,7 +164,7 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
logprobs
=
None
logprobs
=
None
delta
=
text
[
len
(
stream_buffer
)
:]
delta
=
text
[
len
(
stream_buffer
)
:]
stream_buffer
=
content
[
"text"
]
stream_buffer
=
stream_buffer
+
delta
choice_data
=
CompletionResponseStreamChoice
(
choice_data
=
CompletionResponseStreamChoice
(
index
=
0
,
index
=
0
,
text
=
delta
,
text
=
delta
,
...
@@ -323,7 +323,7 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
...
@@ -323,7 +323,7 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
text
=
content
[
"text"
]
text
=
content
[
"text"
]
delta
=
text
[
len
(
stream_buffer
)
:]
delta
=
text
[
len
(
stream_buffer
)
:]
stream_buffer
=
text
stream_buffer
=
stream_buffer
+
delta
choice_data
=
ChatCompletionResponseStreamChoice
(
choice_data
=
ChatCompletionResponseStreamChoice
(
index
=
0
,
index
=
0
,
delta
=
DeltaMessage
(
content
=
delta
),
delta
=
DeltaMessage
(
content
=
delta
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment