Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
90876940
Unverified
Commit
90876940
authored
Feb 22, 2025
by
fzyzcjy
Committed by
GitHub
Feb 21, 2025
Browse files
Improve: Use TypeBasedDispatcher in DetokenizerManager (#3117)
parent
a3339d8c
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
112 additions
and
101 deletions
+112
-101
python/sglang/srt/managers/detokenizer_manager.py
python/sglang/srt/managers/detokenizer_manager.py
+112
-101
No files found.
python/sglang/srt/managers/detokenizer_manager.py
View file @
90876940
...
@@ -32,7 +32,11 @@ from sglang.srt.managers.io_struct import (
...
@@ -32,7 +32,11 @@ from sglang.srt.managers.io_struct import (
)
)
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.utils
import
configure_logger
,
get_zmq_socket
from
sglang.srt.utils
import
configure_logger
,
get_zmq_socket
from
sglang.utils
import
find_printable_text
,
get_exception_traceback
from
sglang.utils
import
(
TypeBasedDispatcher
,
find_printable_text
,
get_exception_traceback
,
)
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -83,6 +87,13 @@ class DetokenizerManager:
...
@@ -83,6 +87,13 @@ class DetokenizerManager:
self
.
decode_status
=
LimitedCapacityDict
(
capacity
=
DETOKENIZER_MAX_STATES
)
self
.
decode_status
=
LimitedCapacityDict
(
capacity
=
DETOKENIZER_MAX_STATES
)
self
.
_request_dispatcher
=
TypeBasedDispatcher
(
[
(
BatchEmbeddingOut
,
self
.
handle_batch_embedding_out
),
(
BatchTokenIDOut
,
self
.
handle_batch_token_id_out
),
]
)
def
trim_matched_stop
(
def
trim_matched_stop
(
self
,
output
:
Union
[
str
,
List
[
int
]],
finished_reason
:
Dict
,
no_stop_trim
:
bool
self
,
output
:
Union
[
str
,
List
[
int
]],
finished_reason
:
Dict
,
no_stop_trim
:
bool
):
):
...
@@ -111,14 +122,14 @@ class DetokenizerManager:
...
@@ -111,14 +122,14 @@ class DetokenizerManager:
while
True
:
while
True
:
recv_obj
=
self
.
recv_from_scheduler
.
recv_pyobj
()
recv_obj
=
self
.
recv_from_scheduler
.
recv_pyobj
()
output
=
self
.
_request_dispatcher
(
recv_obj
)
self
.
send_to_tokenizer
.
send_pyobj
(
output
)
if
isinstance
(
recv_obj
,
BatchEmbeddingOut
):
def
handle_batch_embedding_out
(
self
,
recv_obj
:
BatchEmbeddingOut
):
# If it is embedding model, no detokenization is needed.
# If it is embedding model, no detokenization is needed.
self
.
send_to_tokenizer
.
send_pyobj
(
recv_obj
)
return
recv_obj
continue
else
:
assert
isinstance
(
recv_obj
,
BatchTokenIDOut
)
def
handle_batch_token_id_out
(
self
,
recv_obj
:
BatchTokenIDOut
):
bs
=
len
(
recv_obj
.
rids
)
bs
=
len
(
recv_obj
.
rids
)
# Initialize decode status
# Initialize decode status
...
@@ -196,8 +207,7 @@ class DetokenizerManager:
...
@@ -196,8 +207,7 @@ class DetokenizerManager:
)
)
)
)
self
.
send_to_tokenizer
.
send_pyobj
(
out
=
BatchStrOut
(
BatchStrOut
(
rids
=
recv_obj
.
rids
,
rids
=
recv_obj
.
rids
,
finished_reasons
=
recv_obj
.
finished_reasons
,
finished_reasons
=
recv_obj
.
finished_reasons
,
output_strs
=
output_strs
,
output_strs
=
output_strs
,
...
@@ -215,12 +225,13 @@ class DetokenizerManager:
...
@@ -215,12 +225,13 @@ class DetokenizerManager:
output_top_logprobs_idx
=
recv_obj
.
output_top_logprobs_idx
,
output_top_logprobs_idx
=
recv_obj
.
output_top_logprobs_idx
,
output_hidden_states
=
recv_obj
.
output_hidden_states
,
output_hidden_states
=
recv_obj
.
output_hidden_states
,
)
)
)
# remove decodestatus for completed requests
# remove decodestatus for completed requests
for
rid
in
finished_reqs
:
for
rid
in
finished_reqs
:
self
.
decode_status
.
pop
(
rid
)
self
.
decode_status
.
pop
(
rid
)
return
out
class
LimitedCapacityDict
(
OrderedDict
):
class
LimitedCapacityDict
(
OrderedDict
):
def
__init__
(
self
,
capacity
:
int
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
capacity
:
int
,
*
args
,
**
kwargs
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment