Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
eb7318f1
"docs/source/vscode:/vscode.git/clone" did not exist on "5598503aa097f3c368b9e0025e15e26b904e71f2"
Unverified
Commit
eb7318f1
authored
Oct 19, 2025
by
narutolhy
Committed by
GitHub
Oct 19, 2025
Browse files
support tokenized batch request (#11091)
parent
6058fb52
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
33 additions
and
6 deletions
+33
-6
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+33
-6
No files found.
python/sglang/srt/managers/tokenizer_manager.py
View file @
eb7318f1
...
...
@@ -759,6 +759,14 @@ class TokenizerManager(TokenizerCommunicatorMixin):
"""Handle batch tokenization for text inputs only."""
logger
.
debug
(
f
"Starting batch tokenization for
{
batch_size
}
text requests"
)
# If batch does not have text nothing to tokenize
# so lets construct the return object
if
not
self
.
_batch_has_text
(
batch_size
,
obj
):
# All requests already have input_ids, no need to tokenize
return
[
await
self
.
_tokenize_one_request
(
obj
[
i
])
for
i
in
range
(
batch_size
)]
self
.
_validate_batch_tokenization_constraints
(
batch_size
,
obj
)
# Collect requests and texts
requests
=
[
obj
[
i
]
for
i
in
range
(
batch_size
)]
texts
=
[
req
.
text
for
req
in
requests
]
...
...
@@ -808,6 +816,30 @@ class TokenizerManager(TokenizerCommunicatorMixin):
"Batch tokenization is not needed for input_embeds. Do not set `enable_tokenizer_batch_encode`."
)
def
_batch_has_text
(
self
,
batch_size
:
int
,
obj
:
Union
[
GenerateReqInput
,
EmbeddingReqInput
]
)
->
bool
:
"""Check if any request in the batch contains text input."""
for
i
in
range
(
batch_size
):
if
obj
[
i
].
text
:
return
True
elif
self
.
is_generation
and
obj
[
i
].
contains_mm_input
():
return
True
return
False
def
_should_use_batch_tokenization
(
self
,
batch_size
,
requests
)
->
bool
:
"""Return True if we should run the tokenizer in batch mode.
Current policy:
- Respect explicit server flag `enable_tokenizer_batch_encode`.
- Or, if no request has text or multimodal input (all use pre-tokenized input_ids or input_embeds), batch the requests without tokenization.
"""
return
batch_size
>
0
and
(
self
.
server_args
.
enable_tokenizer_batch_encode
or
not
self
.
_batch_has_text
(
batch_size
,
requests
)
)
def
_send_one_request
(
self
,
obj
:
Union
[
GenerateReqInput
,
EmbeddingReqInput
],
...
...
@@ -942,13 +974,8 @@ class TokenizerManager(TokenizerCommunicatorMixin):
generators
=
[]
rids
=
[]
if
getattr
(
obj
,
"parallel_sample_num"
,
1
)
==
1
:
if
self
.
server_args
.
enable_tokenizer_batch_encode
:
# Validate batch tokenization constraints
self
.
_validate_batch_tokenization_constraints
(
batch_size
,
obj
)
if
self
.
_should_use_batch_tokenization
(
batch_size
,
obj
):
tokenized_objs
=
await
self
.
_batch_tokenize_and_process
(
batch_size
,
obj
)
# Send as a single batched request
self
.
_send_batch_request
(
obj
,
tokenized_objs
,
created_time
)
# Set up generators for each request in the batch
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment